summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/6lowpan_i.h4
-rw-r--r--net/6lowpan/Makefile2
-rw-r--r--net/6lowpan/core.c50
-rw-r--r--net/6lowpan/debugfs.c39
-rw-r--r--net/6lowpan/iphc.c167
-rw-r--r--net/6lowpan/ndisc.c234
-rw-r--r--net/batman-adv/routing.c2
-rw-r--r--net/batman-adv/send.c4
-rw-r--r--net/bluetooth/6lowpan.c13
-rw-r--r--net/caif/chnl_net.c1
-rw-r--r--net/can/Makefile3
-rw-r--r--net/can/af_can.c22
-rw-r--r--net/can/bcm.c309
-rw-r--r--net/can/proc.c3
-rw-r--r--net/core/dev.c59
-rw-r--r--net/core/ethtool.c1
-rw-r--r--net/core/fib_rules.c33
-rw-r--r--net/core/filter.c78
-rw-r--r--net/core/gen_estimator.c24
-rw-r--r--net/core/gen_stats.c35
-rw-r--r--net/core/pktgen.c1
-rw-r--r--net/core/rtnetlink.c22
-rw-r--r--net/core/skbuff.c46
-rw-r--r--net/core/utils.c8
-rw-r--r--net/dsa/Makefile2
-rw-r--r--net/dsa/dsa.c253
-rw-r--r--net/dsa/dsa2.c690
-rw-r--r--net/dsa/dsa_priv.h9
-rw-r--r--net/dsa/slave.c80
-rw-r--r--net/dsa/tag_brcm.c4
-rw-r--r--net/dsa/tag_dsa.c10
-rw-r--r--net/dsa/tag_edsa.c10
-rw-r--r--net/dsa/tag_trailer.c4
-rw-r--r--net/ieee802154/6lowpan/core.c27
-rw-r--r--net/ieee802154/6lowpan/tx.c113
-rw-r--r--net/ipv4/Kconfig16
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/fib_rules.c6
-rw-r--r--net/ipv4/fou.c81
-rw-r--r--net/ipv4/gre_demux.c1
-rw-r--r--net/ipv4/inet_diag.c25
-rw-r--r--net/ipv4/inet_fragment.c2
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_gre.c51
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_tunnel.c2
-rw-r--r--net/ipv4/tcp_dctcp.c4
-rw-r--r--net/ipv4/tcp_input.c31
-rw-r--r--net/ipv4/tcp_nv.c476
-rw-r--r--net/ipv4/tcp_output.c4
-rw-r--r--net/ipv4/udp_tunnel.c61
-rw-r--r--net/ipv4/xfrm4_policy.c8
-rw-r--r--net/ipv6/addrconf.c268
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/fib6_rules.c6
-rw-r--r--net/ipv6/icmp.c78
-rw-r--r--net/ipv6/ila/ila.h3
-rw-r--r--net/ipv6/ila/ila_common.c16
-rw-r--r--net/ipv6/ila/ila_lwt.c4
-rw-r--r--net/ipv6/ila/ila_xlat.c8
-rw-r--r--net/ipv6/ip6_icmp.c2
-rw-r--r--net/ipv6/ip6_input.c1
-rw-r--r--net/ipv6/ip6_output.c14
-rw-r--r--net/ipv6/ndisc.c123
-rw-r--r--net/ipv6/ping.c4
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/route.c32
-rw-r--r--net/ipv6/sit.c47
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/ipv6/xfrm6_policy.c4
-rw-r--r--net/iucv/af_iucv.c223
-rw-r--r--net/l2tp/l2tp_eth.c4
-rw-r--r--net/l2tp/l2tp_ip6.c8
-rw-r--r--net/l3mdev/l3mdev.c64
-rw-r--r--net/mac80211/agg-tx.c8
-rw-r--r--net/mac80211/debugfs.c173
-rw-r--r--net/mac80211/debugfs_sta.c78
-rw-r--r--net/mac80211/ieee80211_i.h31
-rw-r--r--net/mac80211/iface.c26
-rw-r--r--net/mac80211/main.c10
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mac80211/sta_info.c14
-rw-r--r--net/mac80211/tx.c292
-rw-r--r--net/mac80211/util.c34
-rw-r--r--net/mpls/af_mpls.c9
-rw-r--r--net/netfilter/xt_RATEEST.c2
-rw-r--r--net/netlink/af_netlink.h14
-rw-r--r--net/openvswitch/actions.c40
-rw-r--r--net/openvswitch/conntrack.c69
-rw-r--r--net/openvswitch/datapath.c42
-rw-r--r--net/openvswitch/datapath.h5
-rw-r--r--net/openvswitch/flow_netlink.c9
-rw-r--r--net/openvswitch/vport-internal_dev.c2
-rw-r--r--net/openvswitch/vport.c1
-rw-r--r--net/packet/af_packet.c36
-rw-r--r--net/rds/cong.c3
-rw-r--r--net/rds/connection.c329
-rw-r--r--net/rds/ib.c1
-rw-r--r--net/rds/ib_cm.c3
-rw-r--r--net/rds/ib_rdma.c3
-rw-r--r--net/rds/ib_recv.c1
-rw-r--r--net/rds/ib_send.c1
-rw-r--r--net/rds/loop.c1
-rw-r--r--net/rds/rdma_transport.c1
-rw-r--r--net/rds/rds.h152
-rw-r--r--net/rds/rds_single_path.h30
-rw-r--r--net/rds/recv.c27
-rw-r--r--net/rds/send.c293
-rw-r--r--net/rds/tcp.c7
-rw-r--r--net/rds/tcp_connect.c4
-rw-r--r--net/rds/tcp_listen.c11
-rw-r--r--net/rds/tcp_recv.c1
-rw-r--r--net/rds/tcp_send.c1
-rw-r--r--net/rds/threads.c95
-rw-r--r--net/rxrpc/Makefile36
-rw-r--r--net/rxrpc/af_rxrpc.c297
-rw-r--r--net/rxrpc/ar-connection.c927
-rw-r--r--net/rxrpc/ar-error.c230
-rw-r--r--net/rxrpc/ar-internal.h423
-rw-r--r--net/rxrpc/ar-local.c415
-rw-r--r--net/rxrpc/ar-peer.c303
-rw-r--r--net/rxrpc/ar-transport.c284
-rw-r--r--net/rxrpc/call_accept.c (renamed from net/rxrpc/ar-accept.c)46
-rw-r--r--net/rxrpc/call_event.c (renamed from net/rxrpc/ar-ack.c)37
-rw-r--r--net/rxrpc/call_object.c (renamed from net/rxrpc/ar-call.c)367
-rw-r--r--net/rxrpc/conn_client.c94
-rw-r--r--net/rxrpc/conn_event.c (renamed from net/rxrpc/ar-connevent.c)33
-rw-r--r--net/rxrpc/conn_object.c686
-rw-r--r--net/rxrpc/input.c (renamed from net/rxrpc/ar-input.c)69
-rw-r--r--net/rxrpc/key.c (renamed from net/rxrpc/ar-key.c)6
-rw-r--r--net/rxrpc/local_event.c116
-rw-r--r--net/rxrpc/local_object.c387
-rw-r--r--net/rxrpc/misc.c6
-rw-r--r--net/rxrpc/output.c (renamed from net/rxrpc/ar-output.c)233
-rw-r--r--net/rxrpc/peer_event.c281
-rw-r--r--net/rxrpc/peer_object.c315
-rw-r--r--net/rxrpc/proc.c (renamed from net/rxrpc/ar-proc.c)39
-rw-r--r--net/rxrpc/recvmsg.c (renamed from net/rxrpc/ar-recvmsg.c)10
-rw-r--r--net/rxrpc/rxkad.c76
-rw-r--r--net/rxrpc/security.c (renamed from net/rxrpc/ar-security.c)8
-rw-r--r--net/rxrpc/skbuff.c (renamed from net/rxrpc/ar-skbuff.c)2
-rw-r--r--net/rxrpc/sysctl.c12
-rw-r--r--net/rxrpc/utils.c41
-rw-r--r--net/sched/act_api.c32
-rw-r--r--net/sched/act_bpf.c8
-rw-r--r--net/sched/act_connmark.c6
-rw-r--r--net/sched/act_csum.c7
-rw-r--r--net/sched/act_gact.c7
-rw-r--r--net/sched/act_ife.c13
-rw-r--r--net/sched/act_ipt.c19
-rw-r--r--net/sched/act_mirred.c9
-rw-r--r--net/sched/act_nat.c7
-rw-r--r--net/sched/act_pedit.c8
-rw-r--r--net/sched/act_police.c12
-rw-r--r--net/sched/act_simple.c10
-rw-r--r--net/sched/act_skbedit.c10
-rw-r--r--net/sched/act_vlan.c13
-rw-r--r--net/sched/cls_api.c48
-rw-r--r--net/sched/cls_flower.c65
-rw-r--r--net/sched/sch_api.c30
-rw-r--r--net/sched/sch_atm.c33
-rw-r--r--net/sched/sch_blackhole.c5
-rw-r--r--net/sched/sch_cbq.c305
-rw-r--r--net/sched/sch_choke.c41
-rw-r--r--net/sched/sch_codel.c10
-rw-r--r--net/sched/sch_drr.c38
-rw-r--r--net/sched/sch_dsmark.c27
-rw-r--r--net/sched/sch_fifo.c18
-rw-r--r--net/sched/sch_fq.c29
-rw-r--r--net/sched/sch_fq_codel.c64
-rw-r--r--net/sched/sch_generic.c90
-rw-r--r--net/sched/sch_gred.c42
-rw-r--r--net/sched/sch_hfsc.c44
-rw-r--r--net/sched/sch_hhf.c24
-rw-r--r--net/sched/sch_htb.c68
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c11
-rw-r--r--net/sched/sch_multiq.c32
-rw-r--r--net/sched/sch_netem.c73
-rw-r--r--net/sched/sch_pie.c7
-rw-r--r--net/sched/sch_plug.c19
-rw-r--r--net/sched/sch_prio.c27
-rw-r--r--net/sched/sch_qfq.c63
-rw-r--r--net/sched/sch_red.c28
-rw-r--r--net/sched/sch_sfb.c7
-rw-r--r--net/sched/sch_sfq.c11
-rw-r--r--net/sched/sch_tbf.c34
-rw-r--r--net/sched/sch_teql.c4
-rw-r--r--net/sctp/Makefile3
-rw-r--r--net/sctp/input.c57
-rw-r--r--net/sctp/inqueue.c78
-rw-r--r--net/sctp/offload.c98
-rw-r--r--net/sctp/output.c366
-rw-r--r--net/sctp/protocol.c6
-rw-r--r--net/sctp/sm_sideeffect.c4
-rw-r--r--net/sctp/socket.c10
-rw-r--r--net/tipc/Makefile2
-rw-r--r--net/tipc/addr.h1
-rw-r--r--net/tipc/bearer.c8
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/core.c1
-rw-r--r--net/tipc/core.h15
-rw-r--r--net/tipc/discover.c5
-rw-r--r--net/tipc/link.c51
-rw-r--r--net/tipc/monitor.c651
-rw-r--r--net/tipc/monitor.h73
-rw-r--r--net/tipc/node.c48
-rw-r--r--net/tipc/server.c3
-rw-r--r--net/tipc/udp_media.c24
-rw-r--r--net/wireless/core.c30
-rw-r--r--net/wireless/core.h4
-rw-r--r--net/wireless/nl80211.c232
-rw-r--r--net/wireless/nl80211.h2
-rw-r--r--net/wireless/sme.c8
214 files changed, 9185 insertions, 5763 deletions
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
index 97ecc27aeca6..a67caee11929 100644
--- a/net/6lowpan/6lowpan_i.h
+++ b/net/6lowpan/6lowpan_i.h
@@ -12,6 +12,10 @@ static inline bool lowpan_is_ll(const struct net_device *dev,
return lowpan_dev(dev)->lltype == lltype;
}
+extern const struct ndisc_ops lowpan_ndisc_ops;
+
+int addrconf_ifid_802154_6lowpan(u8 *eui, struct net_device *dev);
+
#ifdef CONFIG_6LOWPAN_DEBUGFS
int lowpan_dev_debugfs_init(struct net_device *dev);
void lowpan_dev_debugfs_exit(struct net_device *dev);
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
index e44f3bf2dd42..12d131ab2324 100644
--- a/net/6lowpan/Makefile
+++ b/net/6lowpan/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_6LOWPAN) += 6lowpan.o
-6lowpan-y := core.o iphc.o nhc.o
+6lowpan-y := core.o iphc.o nhc.o ndisc.o
6lowpan-$(CONFIG_6LOWPAN_DEBUGFS) += debugfs.o
#rfc6282 nhcs
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 7a240b3eaed1..5945f7e19c67 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <net/6lowpan.h>
+#include <net/addrconf.h>
#include "6lowpan_i.h"
@@ -33,6 +34,8 @@ int lowpan_register_netdevice(struct net_device *dev,
for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
lowpan_dev(dev)->ctx.table[i].id = i;
+ dev->ndisc_ops = &lowpan_ndisc_ops;
+
ret = register_netdevice(dev);
if (ret < 0)
return ret;
@@ -72,16 +75,61 @@ void lowpan_unregister_netdev(struct net_device *dev)
}
EXPORT_SYMBOL(lowpan_unregister_netdev);
+int addrconf_ifid_802154_6lowpan(u8 *eui, struct net_device *dev)
+{
+ struct wpan_dev *wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+ /* Set short_addr autoconfiguration if short_addr is present only */
+ if (!lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr))
+ return -1;
+
+ /* For either address format, all zero addresses MUST NOT be used */
+ if (wpan_dev->pan_id == cpu_to_le16(0x0000) &&
+ wpan_dev->short_addr == cpu_to_le16(0x0000))
+ return -1;
+
+ /* Alternatively, if no PAN ID is known, 16 zero bits may be used */
+ if (wpan_dev->pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST))
+ memset(eui, 0, 2);
+ else
+ ieee802154_le16_to_be16(eui, &wpan_dev->pan_id);
+
+ /* The "Universal/Local" (U/L) bit shall be set to zero */
+ eui[0] &= ~2;
+ eui[2] = 0;
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[5] = 0;
+ ieee802154_le16_to_be16(&eui[6], &wpan_dev->short_addr);
+ return 0;
+}
+
static int lowpan_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct inet6_dev *idev;
+ struct in6_addr addr;
int i;
if (dev->type != ARPHRD_6LOWPAN)
return NOTIFY_DONE;
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ return NOTIFY_DONE;
+
switch (event) {
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+ /* (802.15.4 6LoWPAN short address slaac handling */
+ if (lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154) &&
+ addrconf_ifid_802154_6lowpan(addr.s6_addr + 8, dev) == 0) {
+ __ipv6_addr_set_half(&addr.s6_addr32[0],
+ htonl(0xFE800000), 0);
+ addrconf_add_linklocal(idev, &addr, 0);
+ }
+ break;
case NETDEV_DOWN:
for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE,
@@ -112,8 +160,6 @@ static int __init lowpan_module_init(void)
return ret;
}
- request_module_nowait("ipv6");
-
request_module_nowait("nhc_dest");
request_module_nowait("nhc_fragment");
request_module_nowait("nhc_hop");
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index acbaa3db493b..24915e0bb9ea 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -245,6 +245,41 @@ static const struct file_operations lowpan_context_fops = {
.release = single_release,
};
+static int lowpan_short_addr_get(void *data, u64 *val)
+{
+ struct wpan_dev *wdev = data;
+
+ rtnl_lock();
+ *val = le16_to_cpu(wdev->short_addr);
+ rtnl_unlock();
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(lowpan_short_addr_fops, lowpan_short_addr_get,
+ NULL, "0x%04llx\n");
+
+static int lowpan_dev_debugfs_802154_init(const struct net_device *dev,
+ struct lowpan_dev *ldev)
+{
+ struct dentry *dentry, *root;
+
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return 0;
+
+ root = debugfs_create_dir("ieee802154", ldev->iface_debugfs);
+ if (!root)
+ return -EINVAL;
+
+ dentry = debugfs_create_file("short_addr", 0444, root,
+ lowpan_802154_dev(dev)->wdev->ieee802154_ptr,
+ &lowpan_short_addr_fops);
+ if (!dentry)
+ return -EINVAL;
+
+ return 0;
+}
+
int lowpan_dev_debugfs_init(struct net_device *dev)
{
struct lowpan_dev *ldev = lowpan_dev(dev);
@@ -272,6 +307,10 @@ int lowpan_dev_debugfs_init(struct net_device *dev)
goto remove_root;
}
+ ret = lowpan_dev_debugfs_802154_init(dev, ldev);
+ if (ret < 0)
+ goto remove_root;
+
return 0;
remove_root:
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 8501dd532fe1..79f1fa22509a 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -761,22 +761,75 @@ static const u8 lowpan_iphc_dam_to_sam_value[] = {
[LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11,
};
-static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct in6_addr *ipaddr,
+static inline bool
+lowpan_iphc_compress_ctx_802154_lladdr(const struct in6_addr *ipaddr,
+ const struct lowpan_iphc_ctx *ctx,
+ const void *lladdr)
+{
+ const struct ieee802154_addr *addr = lladdr;
+ unsigned char extended_addr[EUI64_ADDR_LEN];
+ bool lladdr_compress = false;
+ struct in6_addr tmp = {};
+
+ switch (addr->mode) {
+ case IEEE802154_ADDR_LONG:
+ ieee802154_le64_to_be64(&extended_addr, &addr->extended_addr);
+ /* check for SAM/DAM = 11 */
+ memcpy(&tmp.s6_addr[8], &extended_addr, EUI64_ADDR_LEN);
+ /* second bit-flip (Universe/Local) is done according RFC2464 */
+ tmp.s6_addr[8] ^= 0x02;
+ /* context information are always used */
+ ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+ if (ipv6_addr_equal(&tmp, ipaddr))
+ lladdr_compress = true;
+ break;
+ case IEEE802154_ADDR_SHORT:
+ tmp.s6_addr[11] = 0xFF;
+ tmp.s6_addr[12] = 0xFE;
+ ieee802154_le16_to_be16(&tmp.s6_addr16[7],
+ &addr->short_addr);
+ /* context information are always used */
+ ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+ if (ipv6_addr_equal(&tmp, ipaddr))
+ lladdr_compress = true;
+ break;
+ default:
+ /* should never handled and filtered by 802154 6lowpan */
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return lladdr_compress;
+}
+
+static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct net_device *dev,
+ const struct in6_addr *ipaddr,
const struct lowpan_iphc_ctx *ctx,
const unsigned char *lladdr, bool sam)
{
struct in6_addr tmp = {};
u8 dam;
- /* check for SAM/DAM = 11 */
- memcpy(&tmp.s6_addr[8], lladdr, 8);
- /* second bit-flip (Universe/Local) is done according RFC2464 */
- tmp.s6_addr[8] ^= 0x02;
- /* context information are always used */
- ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
- if (ipv6_addr_equal(&tmp, ipaddr)) {
- dam = LOWPAN_IPHC_DAM_11;
- goto out;
+ switch (lowpan_dev(dev)->lltype) {
+ case LOWPAN_LLTYPE_IEEE802154:
+ if (lowpan_iphc_compress_ctx_802154_lladdr(ipaddr, ctx,
+ lladdr)) {
+ dam = LOWPAN_IPHC_DAM_11;
+ goto out;
+ }
+ break;
+ default:
+ /* check for SAM/DAM = 11 */
+ memcpy(&tmp.s6_addr[8], lladdr, EUI64_ADDR_LEN);
+ /* second bit-flip (Universe/Local) is done according RFC2464 */
+ tmp.s6_addr[8] ^= 0x02;
+ /* context information are always used */
+ ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+ if (ipv6_addr_equal(&tmp, ipaddr)) {
+ dam = LOWPAN_IPHC_DAM_11;
+ goto out;
+ }
+ break;
}
memset(&tmp, 0, sizeof(tmp));
@@ -813,28 +866,85 @@ out:
return dam;
}
-static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr,
+static inline bool
+lowpan_iphc_compress_802154_lladdr(const struct in6_addr *ipaddr,
+ const void *lladdr)
+{
+ const struct ieee802154_addr *addr = lladdr;
+ unsigned char extended_addr[EUI64_ADDR_LEN];
+ bool lladdr_compress = false;
+ struct in6_addr tmp = {};
+
+ switch (addr->mode) {
+ case IEEE802154_ADDR_LONG:
+ ieee802154_le64_to_be64(&extended_addr, &addr->extended_addr);
+ if (is_addr_mac_addr_based(ipaddr, extended_addr))
+ lladdr_compress = true;
+ break;
+ case IEEE802154_ADDR_SHORT:
+ /* fe:80::ff:fe00:XXXX
+ * \__/
+ * short_addr
+ *
+ * Universe/Local bit is zero.
+ */
+ tmp.s6_addr[0] = 0xFE;
+ tmp.s6_addr[1] = 0x80;
+ tmp.s6_addr[11] = 0xFF;
+ tmp.s6_addr[12] = 0xFE;
+ ieee802154_le16_to_be16(&tmp.s6_addr16[7],
+ &addr->short_addr);
+ if (ipv6_addr_equal(&tmp, ipaddr))
+ lladdr_compress = true;
+ break;
+ default:
+ /* should never handled and filtered by 802154 6lowpan */
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return lladdr_compress;
+}
+
+static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct net_device *dev,
+ const struct in6_addr *ipaddr,
const unsigned char *lladdr, bool sam)
{
- u8 dam = LOWPAN_IPHC_DAM_00;
+ u8 dam = LOWPAN_IPHC_DAM_01;
- if (is_addr_mac_addr_based(ipaddr, lladdr)) {
- dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
- pr_debug("address compression 0 bits\n");
- } else if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
+ switch (lowpan_dev(dev)->lltype) {
+ case LOWPAN_LLTYPE_IEEE802154:
+ if (lowpan_iphc_compress_802154_lladdr(ipaddr, lladdr)) {
+ dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
+ pr_debug("address compression 0 bits\n");
+ goto out;
+ }
+ break;
+ default:
+ if (is_addr_mac_addr_based(ipaddr, lladdr)) {
+ dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
+ pr_debug("address compression 0 bits\n");
+ goto out;
+ }
+ break;
+ }
+
+ if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
/* compress IID to 16 bits xxxx::XXXX */
lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[7], 2);
dam = LOWPAN_IPHC_DAM_10; /* 16-bits */
raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)",
*hc_ptr - 2, 2);
- } else {
- /* do not compress IID => xxxx::IID */
- lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
- dam = LOWPAN_IPHC_DAM_01; /* 64-bits */
- raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
- *hc_ptr - 8, 8);
+ goto out;
}
+ /* do not compress IID => xxxx::IID */
+ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
+ raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
+ *hc_ptr - 8, 8);
+
+out:
+
if (sam)
return lowpan_iphc_dam_to_sam_value[dam];
else
@@ -1013,9 +1123,6 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
iphc0 = LOWPAN_DISPATCH_IPHC;
iphc1 = 0;
- raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN);
- raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN);
-
raw_dump_table(__func__, "sending raw skb network uncompressed packet",
skb->data, skb->len);
@@ -1088,14 +1195,15 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
iphc1 |= LOWPAN_IPHC_SAC;
} else {
if (sci) {
- iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->saddr,
+ iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, dev,
+ &hdr->saddr,
&sci_entry, saddr,
true);
iphc1 |= LOWPAN_IPHC_SAC;
} else {
if (ipv6_saddr_type & IPV6_ADDR_LINKLOCAL &&
lowpan_is_linklocal_zero_padded(hdr->saddr)) {
- iphc1 |= lowpan_compress_addr_64(&hc_ptr,
+ iphc1 |= lowpan_compress_addr_64(&hc_ptr, dev,
&hdr->saddr,
saddr, true);
pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n",
@@ -1123,14 +1231,15 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
}
} else {
if (dci) {
- iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->daddr,
+ iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, dev,
+ &hdr->daddr,
&dci_entry, daddr,
false);
iphc1 |= LOWPAN_IPHC_DAC;
} else {
if (ipv6_daddr_type & IPV6_ADDR_LINKLOCAL &&
lowpan_is_linklocal_zero_padded(hdr->daddr)) {
- iphc1 |= lowpan_compress_addr_64(&hc_ptr,
+ iphc1 |= lowpan_compress_addr_64(&hc_ptr, dev,
&hdr->daddr,
daddr, false);
pr_debug("dest address unicast link-local %pI6c iphc1 0x%02x\n",
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
new file mode 100644
index 000000000000..ae1d4199aa4c
--- /dev/null
+++ b/net/6lowpan/ndisc.c
@@ -0,0 +1,234 @@
+/* This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ * (C) 2016 Pengutronix, Alexander Aring <aar@pengutronix.de>
+ */
+
+#include <net/6lowpan.h>
+#include <net/addrconf.h>
+#include <net/ndisc.h>
+
+#include "6lowpan_i.h"
+
+static int lowpan_ndisc_is_useropt(u8 nd_opt_type)
+{
+ return nd_opt_type == ND_OPT_6CO;
+}
+
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+#define NDISC_802154_SHORT_ADDR_LENGTH 1
+static int lowpan_ndisc_parse_802154_options(const struct net_device *dev,
+ struct nd_opt_hdr *nd_opt,
+ struct ndisc_options *ndopts)
+{
+ switch (nd_opt->nd_opt_len) {
+ case NDISC_802154_SHORT_ADDR_LENGTH:
+ if (ndopts->nd_802154_opt_array[nd_opt->nd_opt_type])
+ ND_PRINTK(2, warn,
+ "%s: duplicated short addr ND6 option found: type=%d\n",
+ __func__, nd_opt->nd_opt_type);
+ else
+ ndopts->nd_802154_opt_array[nd_opt->nd_opt_type] = nd_opt;
+ return 1;
+ default:
+ /* all others will be handled by ndisc IPv6 option parsing */
+ return 0;
+ }
+}
+
+static int lowpan_ndisc_parse_options(const struct net_device *dev,
+ struct nd_opt_hdr *nd_opt,
+ struct ndisc_options *ndopts)
+{
+ switch (nd_opt->nd_opt_type) {
+ case ND_OPT_SOURCE_LL_ADDR:
+ case ND_OPT_TARGET_LL_ADDR:
+ return lowpan_ndisc_parse_802154_options(dev, nd_opt, ndopts);
+ default:
+ return 0;
+ }
+}
+
+static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags,
+ u8 icmp6_type,
+ const struct ndisc_options *ndopts)
+{
+ struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
+ u8 *lladdr_short = NULL;
+
+ switch (icmp6_type) {
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_ROUTER_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ if (ndopts->nd_802154_opts_src_lladdr) {
+ lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_src_lladdr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ if (!lladdr_short) {
+ ND_PRINTK(2, warn,
+ "NA: invalid short link-layer address length\n");
+ return;
+ }
+ }
+ break;
+ case NDISC_REDIRECT:
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ if (ndopts->nd_802154_opts_tgt_lladdr) {
+ lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_tgt_lladdr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ if (!lladdr_short) {
+ ND_PRINTK(2, warn,
+ "NA: invalid short link-layer address length\n");
+ return;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ write_lock_bh(&n->lock);
+ if (lladdr_short)
+ ieee802154_be16_to_le16(&neigh->short_addr, lladdr_short);
+ else
+ neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+ write_unlock_bh(&n->lock);
+}
+
+static void lowpan_ndisc_update(const struct net_device *dev,
+ struct neighbour *n, u32 flags, u8 icmp6_type,
+ const struct ndisc_options *ndopts)
+{
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return;
+
+ /* react on overrides only. TODO check if this is really right. */
+ if (flags & NEIGH_UPDATE_F_OVERRIDE)
+ lowpan_ndisc_802154_update(n, flags, icmp6_type, ndopts);
+}
+
+static int lowpan_ndisc_opt_addr_space(const struct net_device *dev,
+ u8 icmp6_type, struct neighbour *neigh,
+ u8 *ha_buf, u8 **ha)
+{
+ struct lowpan_802154_neigh *n;
+ struct wpan_dev *wpan_dev;
+ int addr_space = 0;
+
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return 0;
+
+ switch (icmp6_type) {
+ case NDISC_REDIRECT:
+ n = lowpan_802154_neigh(neighbour_priv(neigh));
+
+ read_lock_bh(&neigh->lock);
+ if (lowpan_802154_is_valid_src_short_addr(n->short_addr)) {
+ memcpy(ha_buf, &n->short_addr,
+ IEEE802154_SHORT_ADDR_LEN);
+ read_unlock_bh(&neigh->lock);
+ addr_space += __ndisc_opt_addr_space(IEEE802154_SHORT_ADDR_LEN, 0);
+ *ha = ha_buf;
+ }
+ read_unlock_bh(&neigh->lock);
+ break;
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ case NDISC_ROUTER_SOLICITATION:
+ wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+ if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr))
+ addr_space = __ndisc_opt_addr_space(IEEE802154_SHORT_ADDR_LEN, 0);
+ break;
+ default:
+ break;
+ }
+
+ return addr_space;
+}
+
+static void lowpan_ndisc_fill_addr_option(const struct net_device *dev,
+ struct sk_buff *skb, u8 icmp6_type,
+ const u8 *ha)
+{
+ struct wpan_dev *wpan_dev;
+ __be16 short_addr;
+ u8 opt_type;
+
+ if (!lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154))
+ return;
+
+ switch (icmp6_type) {
+ case NDISC_REDIRECT:
+ if (ha) {
+ ieee802154_le16_to_be16(&short_addr, ha);
+ __ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
+ &short_addr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ }
+ return;
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ opt_type = ND_OPT_TARGET_LL_ADDR;
+ break;
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ opt_type = ND_OPT_SOURCE_LL_ADDR;
+ break;
+ default:
+ return;
+ }
+
+ wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr;
+
+ if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr)) {
+ ieee802154_le16_to_be16(&short_addr,
+ &wpan_dev->short_addr);
+ __ndisc_fill_addr_option(skb, opt_type, &short_addr,
+ IEEE802154_SHORT_ADDR_LEN, 0);
+ }
+}
+
+static void lowpan_ndisc_prefix_rcv_add_addr(struct net *net,
+ struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ struct in6_addr *addr,
+ int addr_type, u32 addr_flags,
+ bool sllao, bool tokenized,
+ __u32 valid_lft,
+ u32 prefered_lft,
+ bool dev_addr_generated)
+{
+ int err;
+
+ /* generates short based address for RA PIO's */
+ if (lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154) && dev_addr_generated &&
+ !addrconf_ifid_802154_6lowpan(addr->s6_addr + 8, dev)) {
+ err = addrconf_prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+ addr, addr_type, addr_flags,
+ sllao, tokenized, valid_lft,
+ prefered_lft);
+ if (err)
+ ND_PRINTK(2, warn,
+ "RA: could not add a short address based address for prefix: %pI6c\n",
+ &pinfo->prefix);
+ }
+}
+#endif
+
+const struct ndisc_ops lowpan_ndisc_ops = {
+ .is_useropt = lowpan_ndisc_is_useropt,
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+ .parse_options = lowpan_ndisc_parse_options,
+ .update = lowpan_ndisc_update,
+ .opt_addr_space = lowpan_ndisc_opt_addr_space,
+ .fill_addr_option = lowpan_ndisc_fill_addr_option,
+ .prefix_rcv_add_addr = lowpan_ndisc_prefix_rcv_add_addr,
+#endif
+};
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 6c2901a86230..396c0134c5ab 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -654,7 +654,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
len + ETH_HLEN);
ret = NET_RX_SUCCESS;
- } else if (res == NET_XMIT_POLICED) {
+ } else if (res == -EINPROGRESS) {
/* skb was buffered and consumed */
ret = NET_RX_SUCCESS;
}
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index f2f125684ed9..b1a4e8a811c8 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -156,7 +156,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
* attempted.
*
* Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or
- * NET_XMIT_POLICED if the skb is buffered for later transmit.
+ * -EINPROGRESS if the skb is buffered for later transmit.
*/
int batadv_send_skb_to_orig(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
@@ -188,7 +188,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
* network coding fails, then send the packet as usual.
*/
if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) {
- ret = NET_XMIT_POLICED;
+ ret = -EINPROGRESS;
} else {
batadv_send_unicast_skb(skb, neigh_node);
ret = NET_XMIT_SUCCESS;
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 780089d75915..d020299baba4 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -627,20 +627,9 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
return err < 0 ? NET_XMIT_DROP : err;
}
-static struct lock_class_key bt_tx_busylock;
-static struct lock_class_key bt_netdev_xmit_lock_key;
-
-static void bt_set_lockdep_class_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock, &bt_netdev_xmit_lock_key);
-}
-
static int bt_dev_init(struct net_device *dev)
{
- netdev_for_each_tx_queue(dev, bt_set_lockdep_class_one, NULL);
- dev->qdisc_tx_busylock = &bt_tx_busylock;
+ netdev_lockdep_set_classes(dev);
return 0;
}
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 67a4a36febd1..3408ed51b611 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -13,7 +13,6 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
-#include <linux/moduleparam.h>
#include <linux/ip.h>
#include <linux/sched.h>
#include <linux/sockios.h>
diff --git a/net/can/Makefile b/net/can/Makefile
index cef49eb1f5c7..10936754e3f2 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -3,7 +3,8 @@
#
obj-$(CONFIG_CAN) += can.o
-can-y := af_can.o proc.o
+can-y := af_can.o
+can-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_CAN_RAW) += can-raw.o
can-raw-y := raw.o
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 166d436196c1..1108079d934f 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -911,14 +911,14 @@ static __init int can_init(void)
if (!rcv_cache)
return -ENOMEM;
- if (stats_timer) {
+ if (IS_ENABLED(CONFIG_PROC_FS)) {
+ if (stats_timer) {
/* the statistics are updated every second (timer triggered) */
- setup_timer(&can_stattimer, can_stat_update, 0);
- mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
- } else
- can_stattimer.function = NULL;
-
- can_init_proc();
+ setup_timer(&can_stattimer, can_stat_update, 0);
+ mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
+ }
+ can_init_proc();
+ }
/* protocol register */
sock_register(&can_family_ops);
@@ -933,10 +933,12 @@ static __exit void can_exit(void)
{
struct net_device *dev;
- if (stats_timer)
- del_timer_sync(&can_stattimer);
+ if (IS_ENABLED(CONFIG_PROC_FS)) {
+ if (stats_timer)
+ del_timer_sync(&can_stattimer);
- can_remove_proc();
+ can_remove_proc();
+ }
/* protocol unregister */
dev_remove_pack(&canfd_packet);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 6863310d6973..8e999ffdf28b 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1,7 +1,7 @@
/*
* bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
*
- * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * Copyright (c) 2002-2016 Volkswagen Group Electronic Research
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -67,27 +67,31 @@
*/
#define MAX_NFRAMES 256
-/* use of last_frames[index].can_dlc */
+/* use of last_frames[index].flags */
#define RX_RECV 0x40 /* received data for this element */
#define RX_THR 0x80 /* element not been sent due to throttle feature */
-#define BCM_CAN_DLC_MASK 0x0F /* clean private flags in can_dlc by masking */
+#define BCM_CAN_FLAGS_MASK 0x3F /* to clean private flags after usage */
/* get best masking value for can_rx_register() for a given single can_id */
#define REGMASK(id) ((id & CAN_EFF_FLAG) ? \
(CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
(CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
-#define CAN_BCM_VERSION CAN_VERSION
+#define CAN_BCM_VERSION "20160617"
MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
MODULE_ALIAS("can-proto-2");
-/* easy access to can_frame payload */
-static inline u64 GET_U64(const struct can_frame *cp)
+/*
+ * easy access to the first 64 bit of can(fd)_frame payload. cp->data is
+ * 64 bit aligned so the offset has to be multiples of 8 which is ensured
+ * by the only callers in bcm_rx_cmp_to_index() bcm_rx_handler().
+ */
+static inline u64 get_u64(const struct canfd_frame *cp, int offset)
{
- return *(u64 *)cp->data;
+ return *(u64 *)(cp->data + offset);
}
struct bcm_op {
@@ -101,13 +105,14 @@ struct bcm_op {
struct tasklet_struct tsklet, thrtsklet;
ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
int rx_ifindex;
+ int cfsiz;
u32 count;
u32 nframes;
u32 currframe;
- struct can_frame *frames;
- struct can_frame *last_frames;
- struct can_frame sframe;
- struct can_frame last_sframe;
+ struct canfd_frame *frames;
+ struct canfd_frame *last_frames;
+ struct canfd_frame sframe;
+ struct canfd_frame last_sframe;
struct sock *sk;
struct net_device *rx_reg_dev;
};
@@ -136,7 +141,7 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv)
return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
}
-#define CFSIZ sizeof(struct can_frame)
+#define CFSIZ(flags) ((flags & CAN_FD_FRAME) ? CANFD_MTU : CAN_MTU)
#define OPSIZ sizeof(struct bcm_op)
#define MHSIZ sizeof(struct bcm_msg_head)
@@ -183,43 +188,50 @@ static int bcm_proc_show(struct seq_file *m, void *v)
if (!op->frames_abs)
continue;
- seq_printf(m, "rx_op: %03X %-5s ",
- op->can_id, bcm_proc_getifname(ifname, op->ifindex));
- seq_printf(m, "[%u]%c ", op->nframes,
- (op->flags & RX_CHECK_DLC)?'d':' ');
+ seq_printf(m, "rx_op: %03X %-5s ", op->can_id,
+ bcm_proc_getifname(ifname, op->ifindex));
+
+ if (op->flags & CAN_FD_FRAME)
+ seq_printf(m, "(%u)", op->nframes);
+ else
+ seq_printf(m, "[%u]", op->nframes);
+
+ seq_printf(m, "%c ", (op->flags & RX_CHECK_DLC) ? 'd' : ' ');
+
if (op->kt_ival1.tv64)
seq_printf(m, "timeo=%lld ",
- (long long)
- ktime_to_us(op->kt_ival1));
+ (long long)ktime_to_us(op->kt_ival1));
if (op->kt_ival2.tv64)
seq_printf(m, "thr=%lld ",
- (long long)
- ktime_to_us(op->kt_ival2));
+ (long long)ktime_to_us(op->kt_ival2));
seq_printf(m, "# recv %ld (%ld) => reduction: ",
- op->frames_filtered, op->frames_abs);
+ op->frames_filtered, op->frames_abs);
reduction = 100 - (op->frames_filtered * 100) / op->frames_abs;
seq_printf(m, "%s%ld%%\n",
- (reduction == 100)?"near ":"", reduction);
+ (reduction == 100) ? "near " : "", reduction);
}
list_for_each_entry(op, &bo->tx_ops, list) {
- seq_printf(m, "tx_op: %03X %s [%u] ",
- op->can_id,
- bcm_proc_getifname(ifname, op->ifindex),
- op->nframes);
+ seq_printf(m, "tx_op: %03X %s ", op->can_id,
+ bcm_proc_getifname(ifname, op->ifindex));
+
+ if (op->flags & CAN_FD_FRAME)
+ seq_printf(m, "(%u) ", op->nframes);
+ else
+ seq_printf(m, "[%u] ", op->nframes);
if (op->kt_ival1.tv64)
seq_printf(m, "t1=%lld ",
- (long long) ktime_to_us(op->kt_ival1));
+ (long long)ktime_to_us(op->kt_ival1));
if (op->kt_ival2.tv64)
seq_printf(m, "t2=%lld ",
- (long long) ktime_to_us(op->kt_ival2));
+ (long long)ktime_to_us(op->kt_ival2));
seq_printf(m, "# sent %ld\n", op->frames_abs);
}
@@ -248,7 +260,7 @@ static void bcm_can_tx(struct bcm_op *op)
{
struct sk_buff *skb;
struct net_device *dev;
- struct can_frame *cf = &op->frames[op->currframe];
+ struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe;
/* no target device? => exit */
if (!op->ifindex)
@@ -260,7 +272,7 @@ static void bcm_can_tx(struct bcm_op *op)
return;
}
- skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), gfp_any());
+ skb = alloc_skb(op->cfsiz + sizeof(struct can_skb_priv), gfp_any());
if (!skb)
goto out;
@@ -268,7 +280,7 @@ static void bcm_can_tx(struct bcm_op *op)
can_skb_prv(skb)->ifindex = dev->ifindex;
can_skb_prv(skb)->skbcnt = 0;
- memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
+ memcpy(skb_put(skb, op->cfsiz), cf, op->cfsiz);
/* send with loopback */
skb->dev = dev;
@@ -282,7 +294,7 @@ static void bcm_can_tx(struct bcm_op *op)
/* reached last frame? */
if (op->currframe >= op->nframes)
op->currframe = 0;
- out:
+out:
dev_put(dev);
}
@@ -291,13 +303,13 @@ static void bcm_can_tx(struct bcm_op *op)
* (consisting of bcm_msg_head + x CAN frames)
*/
static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
- struct can_frame *frames, int has_timestamp)
+ struct canfd_frame *frames, int has_timestamp)
{
struct sk_buff *skb;
- struct can_frame *firstframe;
+ struct canfd_frame *firstframe;
struct sockaddr_can *addr;
struct sock *sk = op->sk;
- unsigned int datalen = head->nframes * CFSIZ;
+ unsigned int datalen = head->nframes * op->cfsiz;
int err;
skb = alloc_skb(sizeof(*head) + datalen, gfp_any());
@@ -307,19 +319,19 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
memcpy(skb_put(skb, sizeof(*head)), head, sizeof(*head));
if (head->nframes) {
- /* can_frames starting here */
- firstframe = (struct can_frame *)skb_tail_pointer(skb);
+ /* CAN frames starting here */
+ firstframe = (struct canfd_frame *)skb_tail_pointer(skb);
memcpy(skb_put(skb, datalen), frames, datalen);
/*
- * the BCM uses the can_dlc-element of the can_frame
+ * the BCM uses the flags-element of the canfd_frame
* structure for internal purposes. This is only
* relevant for updates that are generated by the
* BCM, where nframes is 1
*/
if (head->nframes == 1)
- firstframe->can_dlc &= BCM_CAN_DLC_MASK;
+ firstframe->flags &= BCM_CAN_FLAGS_MASK;
}
if (has_timestamp) {
@@ -406,7 +418,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
/*
* bcm_rx_changed - create a RX_CHANGED notification due to changed content
*/
-static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
+static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
{
struct bcm_msg_head head;
@@ -418,7 +430,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
op->frames_filtered = op->frames_abs = 0;
/* this element is not throttled anymore */
- data->can_dlc &= (BCM_CAN_DLC_MASK|RX_RECV);
+ data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV);
head.opcode = RX_CHANGED;
head.flags = op->flags;
@@ -437,13 +449,13 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
* 2. send a notification to the user (if possible)
*/
static void bcm_rx_update_and_send(struct bcm_op *op,
- struct can_frame *lastdata,
- const struct can_frame *rxdata)
+ struct canfd_frame *lastdata,
+ const struct canfd_frame *rxdata)
{
- memcpy(lastdata, rxdata, CFSIZ);
+ memcpy(lastdata, rxdata, op->cfsiz);
/* mark as used and throttled by default */
- lastdata->can_dlc |= (RX_RECV|RX_THR);
+ lastdata->flags |= (RX_RECV|RX_THR);
/* throttling mode inactive ? */
if (!op->kt_ival2.tv64) {
@@ -481,33 +493,36 @@ rx_changed_settime:
* received data stored in op->last_frames[]
*/
static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
- const struct can_frame *rxdata)
+ const struct canfd_frame *rxdata)
{
+ struct canfd_frame *cf = op->frames + op->cfsiz * index;
+ struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
+ int i;
+
/*
- * no one uses the MSBs of can_dlc for comparison,
+ * no one uses the MSBs of flags for comparison,
* so we use it here to detect the first time of reception
*/
- if (!(op->last_frames[index].can_dlc & RX_RECV)) {
+ if (!(lcf->flags & RX_RECV)) {
/* received data for the first time => send update to user */
- bcm_rx_update_and_send(op, &op->last_frames[index], rxdata);
+ bcm_rx_update_and_send(op, lcf, rxdata);
return;
}
- /* do a real check in can_frame data section */
-
- if ((GET_U64(&op->frames[index]) & GET_U64(rxdata)) !=
- (GET_U64(&op->frames[index]) & GET_U64(&op->last_frames[index]))) {
- bcm_rx_update_and_send(op, &op->last_frames[index], rxdata);
- return;
+ /* do a real check in CAN frame data section */
+ for (i = 0; i < rxdata->len; i += 8) {
+ if ((get_u64(cf, i) & get_u64(rxdata, i)) !=
+ (get_u64(cf, i) & get_u64(lcf, i))) {
+ bcm_rx_update_and_send(op, lcf, rxdata);
+ return;
+ }
}
if (op->flags & RX_CHECK_DLC) {
- /* do a real check in can_frame dlc */
- if (rxdata->can_dlc != (op->last_frames[index].can_dlc &
- BCM_CAN_DLC_MASK)) {
- bcm_rx_update_and_send(op, &op->last_frames[index],
- rxdata);
+ /* do a real check in CAN frame length */
+ if (rxdata->len != lcf->len) {
+ bcm_rx_update_and_send(op, lcf, rxdata);
return;
}
}
@@ -556,8 +571,8 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
/* if user wants to be informed, when cyclic CAN-Messages come back */
if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
- /* clear received can_frames to indicate 'nothing received' */
- memset(op->last_frames, 0, op->nframes * CFSIZ);
+ /* clear received CAN frames to indicate 'nothing received' */
+ memset(op->last_frames, 0, op->nframes * op->cfsiz);
}
return HRTIMER_NORESTART;
@@ -569,9 +584,11 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
unsigned int index)
{
- if ((op->last_frames) && (op->last_frames[index].can_dlc & RX_THR)) {
+ struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
+
+ if ((op->last_frames) && (lcf->flags & RX_THR)) {
if (update)
- bcm_rx_changed(op, &op->last_frames[index]);
+ bcm_rx_changed(op, lcf);
return 1;
}
return 0;
@@ -636,15 +653,19 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
static void bcm_rx_handler(struct sk_buff *skb, void *data)
{
struct bcm_op *op = (struct bcm_op *)data;
- const struct can_frame *rxframe = (struct can_frame *)skb->data;
+ const struct canfd_frame *rxframe = (struct canfd_frame *)skb->data;
unsigned int i;
- /* disable timeout */
- hrtimer_cancel(&op->timer);
-
if (op->can_id != rxframe->can_id)
return;
+ /* make sure to handle the correct frame type (CAN / CAN FD) */
+ if (skb->len != op->cfsiz)
+ return;
+
+ /* disable timeout */
+ hrtimer_cancel(&op->timer);
+
/* save rx timestamp */
op->rx_stamp = skb->tstamp;
/* save originator for recvfrom() */
@@ -675,13 +696,14 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
* multiplex compare
*
* find the first multiplex mask that fits.
- * Remark: The MUX-mask is stored in index 0
+ * Remark: The MUX-mask is stored in index 0 - but only the
+ * first 64 bits of the frame data[] are relevant (CAN FD)
*/
for (i = 1; i < op->nframes; i++) {
- if ((GET_U64(&op->frames[0]) & GET_U64(rxframe)) ==
- (GET_U64(&op->frames[0]) &
- GET_U64(&op->frames[i]))) {
+ if ((get_u64(op->frames, 0) & get_u64(rxframe, 0)) ==
+ (get_u64(op->frames, 0) &
+ get_u64(op->frames + op->cfsiz * i, 0))) {
bcm_rx_cmp_to_index(op, i, rxframe);
break;
}
@@ -695,13 +717,14 @@ rx_starttimer:
/*
* helpers for bcm_op handling: find & delete bcm [rx|tx] op elements
*/
-static struct bcm_op *bcm_find_op(struct list_head *ops, canid_t can_id,
- int ifindex)
+static struct bcm_op *bcm_find_op(struct list_head *ops,
+ struct bcm_msg_head *mh, int ifindex)
{
struct bcm_op *op;
list_for_each_entry(op, ops, list) {
- if ((op->can_id == can_id) && (op->ifindex == ifindex))
+ if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
+ (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME))
return op;
}
@@ -744,12 +767,14 @@ static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
/*
* bcm_delete_rx_op - find and remove a rx op (returns number of removed ops)
*/
-static int bcm_delete_rx_op(struct list_head *ops, canid_t can_id, int ifindex)
+static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
+ int ifindex)
{
struct bcm_op *op, *n;
list_for_each_entry_safe(op, n, ops, list) {
- if ((op->can_id == can_id) && (op->ifindex == ifindex)) {
+ if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
+ (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
/*
* Don't care if we're bound or not (due to netdev
@@ -789,12 +814,14 @@ static int bcm_delete_rx_op(struct list_head *ops, canid_t can_id, int ifindex)
/*
* bcm_delete_tx_op - find and remove a tx op (returns number of removed ops)
*/
-static int bcm_delete_tx_op(struct list_head *ops, canid_t can_id, int ifindex)
+static int bcm_delete_tx_op(struct list_head *ops, struct bcm_msg_head *mh,
+ int ifindex)
{
struct bcm_op *op, *n;
list_for_each_entry_safe(op, n, ops, list) {
- if ((op->can_id == can_id) && (op->ifindex == ifindex)) {
+ if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
+ (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
list_del(&op->list);
bcm_remove_op(op);
return 1; /* done */
@@ -810,7 +837,7 @@ static int bcm_delete_tx_op(struct list_head *ops, canid_t can_id, int ifindex)
static int bcm_read_op(struct list_head *ops, struct bcm_msg_head *msg_head,
int ifindex)
{
- struct bcm_op *op = bcm_find_op(ops, msg_head->can_id, ifindex);
+ struct bcm_op *op = bcm_find_op(ops, msg_head, ifindex);
if (!op)
return -EINVAL;
@@ -835,6 +862,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
{
struct bcm_sock *bo = bcm_sk(sk);
struct bcm_op *op;
+ struct canfd_frame *cf;
unsigned int i;
int err;
@@ -842,39 +870,46 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (!ifindex)
return -ENODEV;
- /* check nframes boundaries - we need at least one can_frame */
+ /* check nframes boundaries - we need at least one CAN frame */
if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES)
return -EINVAL;
/* check the given can_id */
- op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex);
-
+ op = bcm_find_op(&bo->tx_ops, msg_head, ifindex);
if (op) {
/* update existing BCM operation */
/*
- * Do we need more space for the can_frames than currently
+ * Do we need more space for the CAN frames than currently
* allocated? -> This is a _really_ unusual use-case and
* therefore (complexity / locking) it is not supported.
*/
if (msg_head->nframes > op->nframes)
return -E2BIG;
- /* update can_frames content */
+ /* update CAN frames content */
for (i = 0; i < msg_head->nframes; i++) {
- err = memcpy_from_msg((u8 *)&op->frames[i], msg, CFSIZ);
- if (op->frames[i].can_dlc > 8)
- err = -EINVAL;
+ cf = op->frames + op->cfsiz * i;
+ err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz);
+
+ if (op->flags & CAN_FD_FRAME) {
+ if (cf->len > 64)
+ err = -EINVAL;
+ } else {
+ if (cf->len > 8)
+ err = -EINVAL;
+ }
if (err < 0)
return err;
if (msg_head->flags & TX_CP_CAN_ID) {
/* copy can_id into frame */
- op->frames[i].can_id = msg_head->can_id;
+ cf->can_id = msg_head->can_id;
}
}
+ op->flags = msg_head->flags;
} else {
/* insert new BCM operation for the given can_id */
@@ -883,11 +918,13 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (!op)
return -ENOMEM;
- op->can_id = msg_head->can_id;
+ op->can_id = msg_head->can_id;
+ op->cfsiz = CFSIZ(msg_head->flags);
+ op->flags = msg_head->flags;
- /* create array for can_frames and copy the data */
+ /* create array for CAN frames and copy the data */
if (msg_head->nframes > 1) {
- op->frames = kmalloc(msg_head->nframes * CFSIZ,
+ op->frames = kmalloc(msg_head->nframes * op->cfsiz,
GFP_KERNEL);
if (!op->frames) {
kfree(op);
@@ -897,10 +934,17 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
op->frames = &op->sframe;
for (i = 0; i < msg_head->nframes; i++) {
- err = memcpy_from_msg((u8 *)&op->frames[i], msg, CFSIZ);
- if (op->frames[i].can_dlc > 8)
- err = -EINVAL;
+ cf = op->frames + op->cfsiz * i;
+ err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz);
+
+ if (op->flags & CAN_FD_FRAME) {
+ if (cf->len > 64)
+ err = -EINVAL;
+ } else {
+ if (cf->len > 8)
+ err = -EINVAL;
+ }
if (err < 0) {
if (op->frames != &op->sframe)
@@ -911,7 +955,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (msg_head->flags & TX_CP_CAN_ID) {
/* copy can_id into frame */
- op->frames[i].can_id = msg_head->can_id;
+ cf->can_id = msg_head->can_id;
}
}
@@ -946,8 +990,6 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
/* check flags */
- op->flags = msg_head->flags;
-
if (op->flags & TX_RESET_MULTI_IDX) {
/* start multiple frame transmission with index 0 */
op->currframe = 0;
@@ -968,7 +1010,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (op->flags & STARTTIMER) {
hrtimer_cancel(&op->timer);
- /* spec: send can_frame when starting timer */
+ /* spec: send CAN frame when starting timer */
op->flags |= TX_ANNOUNCE;
}
@@ -981,7 +1023,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (op->flags & STARTTIMER)
bcm_tx_start_timer(op);
- return msg_head->nframes * CFSIZ + MHSIZ;
+ return msg_head->nframes * op->cfsiz + MHSIZ;
}
/*
@@ -1012,12 +1054,12 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
return -EINVAL;
/* check the given can_id */
- op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex);
+ op = bcm_find_op(&bo->rx_ops, msg_head, ifindex);
if (op) {
/* update existing BCM operation */
/*
- * Do we need more space for the can_frames than currently
+ * Do we need more space for the CAN frames than currently
* allocated? -> This is a _really_ unusual use-case and
* therefore (complexity / locking) it is not supported.
*/
@@ -1025,17 +1067,18 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
return -E2BIG;
if (msg_head->nframes) {
- /* update can_frames content */
+ /* update CAN frames content */
err = memcpy_from_msg((u8 *)op->frames, msg,
- msg_head->nframes * CFSIZ);
+ msg_head->nframes * op->cfsiz);
if (err < 0)
return err;
/* clear last_frames to indicate 'nothing received' */
- memset(op->last_frames, 0, msg_head->nframes * CFSIZ);
+ memset(op->last_frames, 0, msg_head->nframes * op->cfsiz);
}
op->nframes = msg_head->nframes;
+ op->flags = msg_head->flags;
/* Only an update -> do not call can_rx_register() */
do_rx_register = 0;
@@ -1046,20 +1089,22 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (!op)
return -ENOMEM;
- op->can_id = msg_head->can_id;
- op->nframes = msg_head->nframes;
+ op->can_id = msg_head->can_id;
+ op->nframes = msg_head->nframes;
+ op->cfsiz = CFSIZ(msg_head->flags);
+ op->flags = msg_head->flags;
if (msg_head->nframes > 1) {
- /* create array for can_frames and copy the data */
- op->frames = kmalloc(msg_head->nframes * CFSIZ,
+ /* create array for CAN frames and copy the data */
+ op->frames = kmalloc(msg_head->nframes * op->cfsiz,
GFP_KERNEL);
if (!op->frames) {
kfree(op);
return -ENOMEM;
}
- /* create and init array for received can_frames */
- op->last_frames = kzalloc(msg_head->nframes * CFSIZ,
+ /* create and init array for received CAN frames */
+ op->last_frames = kzalloc(msg_head->nframes * op->cfsiz,
GFP_KERNEL);
if (!op->last_frames) {
kfree(op->frames);
@@ -1074,7 +1119,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (msg_head->nframes) {
err = memcpy_from_msg((u8 *)op->frames, msg,
- msg_head->nframes * CFSIZ);
+ msg_head->nframes * op->cfsiz);
if (err < 0) {
if (op->frames != &op->sframe)
kfree(op->frames);
@@ -1116,7 +1161,6 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
} /* if ((op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex))) */
/* check flags */
- op->flags = msg_head->flags;
if (op->flags & RX_RTR_FRAME) {
@@ -1188,13 +1232,14 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
}
}
- return msg_head->nframes * CFSIZ + MHSIZ;
+ return msg_head->nframes * op->cfsiz + MHSIZ;
}
/*
* bcm_tx_send - send a single CAN frame to the CAN interface (for bcm_sendmsg)
*/
-static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
+static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk,
+ int cfsiz)
{
struct sk_buff *skb;
struct net_device *dev;
@@ -1204,13 +1249,13 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
if (!ifindex)
return -ENODEV;
- skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), GFP_KERNEL);
+ skb = alloc_skb(cfsiz + sizeof(struct can_skb_priv), GFP_KERNEL);
if (!skb)
return -ENOMEM;
can_skb_reserve(skb);
- err = memcpy_from_msg(skb_put(skb, CFSIZ), msg, CFSIZ);
+ err = memcpy_from_msg(skb_put(skb, cfsiz), msg, cfsiz);
if (err < 0) {
kfree_skb(skb);
return err;
@@ -1232,7 +1277,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
if (err)
return err;
- return CFSIZ + MHSIZ;
+ return cfsiz + MHSIZ;
}
/*
@@ -1244,13 +1289,23 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
struct bcm_sock *bo = bcm_sk(sk);
int ifindex = bo->ifindex; /* default ifindex for this bcm_op */
struct bcm_msg_head msg_head;
+ int cfsiz;
int ret; /* read bytes or error codes as return value */
if (!bo->bound)
return -ENOTCONN;
/* check for valid message length from userspace */
- if (size < MHSIZ || (size - MHSIZ) % CFSIZ)
+ if (size < MHSIZ)
+ return -EINVAL;
+
+ /* read message head information */
+ ret = memcpy_from_msg((u8 *)&msg_head, msg, MHSIZ);
+ if (ret < 0)
+ return ret;
+
+ cfsiz = CFSIZ(msg_head.flags);
+ if ((size - MHSIZ) % cfsiz)
return -EINVAL;
/* check for alternative ifindex for this bcm_op */
@@ -1284,12 +1339,6 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
}
}
- /* read message head information */
-
- ret = memcpy_from_msg((u8 *)&msg_head, msg, MHSIZ);
- if (ret < 0)
- return ret;
-
lock_sock(sk);
switch (msg_head.opcode) {
@@ -1303,14 +1352,14 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
break;
case TX_DELETE:
- if (bcm_delete_tx_op(&bo->tx_ops, msg_head.can_id, ifindex))
+ if (bcm_delete_tx_op(&bo->tx_ops, &msg_head, ifindex))
ret = MHSIZ;
else
ret = -EINVAL;
break;
case RX_DELETE:
- if (bcm_delete_rx_op(&bo->rx_ops, msg_head.can_id, ifindex))
+ if (bcm_delete_rx_op(&bo->rx_ops, &msg_head, ifindex))
ret = MHSIZ;
else
ret = -EINVAL;
@@ -1329,11 +1378,11 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
break;
case TX_SEND:
- /* we need exactly one can_frame behind the msg head */
- if ((msg_head.nframes != 1) || (size != CFSIZ + MHSIZ))
+ /* we need exactly one CAN frame behind the msg head */
+ if ((msg_head.nframes != 1) || (size != cfsiz + MHSIZ))
ret = -EINVAL;
else
- ret = bcm_tx_send(msg, ifindex, sk);
+ ret = bcm_tx_send(msg, ifindex, sk, cfsiz);
break;
default:
diff --git a/net/can/proc.c b/net/can/proc.c
index 1a19b985a868..85ef7bb0f176 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -517,8 +517,7 @@ void can_init_proc(void)
can_dir = proc_mkdir("can", init_net.proc_net);
if (!can_dir) {
- printk(KERN_INFO "can: failed to create /proc/net/can . "
- "CONFIG_PROC_FS missing?\n");
+ pr_info("can: failed to create /proc/net/can.\n");
return;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 904ff431d570..aba10d2a8bc3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -139,6 +139,7 @@
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
#include <linux/sctp.h>
+#include <linux/crash_dump.h>
#include "net-sysfs.h"
@@ -2249,11 +2250,12 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues);
*/
int netif_get_num_default_rss_queues(void)
{
- return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
+ return is_kdump_kernel() ?
+ 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
}
EXPORT_SYMBOL(netif_get_num_default_rss_queues);
-static inline void __netif_reschedule(struct Qdisc *q)
+static void __netif_reschedule(struct Qdisc *q)
{
struct softnet_data *sd;
unsigned long flags;
@@ -2420,7 +2422,7 @@ EXPORT_SYMBOL(__skb_tx_hash);
static void skb_warn_bad_offload(const struct sk_buff *skb)
{
- static const netdev_features_t null_features = 0;
+ static const netdev_features_t null_features;
struct net_device *dev = skb->dev;
const char *name = "";
@@ -3068,6 +3070,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
+ struct sk_buff *to_free = NULL;
bool contended;
int rc;
@@ -3075,7 +3078,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
- * This permits __QDISC___STATE_RUNNING owner to get the lock more
+ * This permits qdisc->running owner to get the lock more
* often and dequeue packets faster.
*/
contended = qdisc_is_running(q);
@@ -3084,7 +3087,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
spin_lock(root_lock);
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
- kfree_skb(skb);
+ __qdisc_drop(skb, &to_free);
rc = NET_XMIT_DROP;
} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
qdisc_run_begin(q)) {
@@ -3107,7 +3110,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
rc = NET_XMIT_SUCCESS;
} else {
- rc = q->enqueue(skb, q) & NET_XMIT_MASK;
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -3117,6 +3120,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
}
}
spin_unlock(root_lock);
+ if (unlikely(to_free))
+ kfree_skb_list(to_free);
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
@@ -3142,8 +3147,6 @@ static void skb_update_prio(struct sk_buff *skb)
DEFINE_PER_CPU(int, xmit_recursion);
EXPORT_SYMBOL(xmit_recursion);
-#define RECURSION_LIMIT 10
-
/**
* dev_loopback_xmit - loop back @skb
* @net: network namespace this loopback is happening in
@@ -3386,8 +3389,8 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
int cpu = smp_processor_id(); /* ok because BHs are off */
if (txq->xmit_lock_owner != cpu) {
-
- if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+ if (unlikely(__this_cpu_read(xmit_recursion) >
+ XMIT_RECURSION_LIMIT))
goto recursion_alert;
skb = validate_xmit_skb(skb, dev);
@@ -3898,22 +3901,14 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched;
root_lock = qdisc_lock(q);
- if (spin_trylock(root_lock)) {
- smp_mb__before_atomic();
- clear_bit(__QDISC_STATE_SCHED,
- &q->state);
- qdisc_run(q);
- spin_unlock(root_lock);
- } else {
- if (!test_bit(__QDISC_STATE_DEACTIVATED,
- &q->state)) {
- __netif_reschedule(q);
- } else {
- smp_mb__before_atomic();
- clear_bit(__QDISC_STATE_SCHED,
- &q->state);
- }
- }
+ spin_lock(root_lock);
+ /* We need to make sure head->next_sched is read
+ * before clearing __QDISC_STATE_SCHED
+ */
+ smp_mb__before_atomic();
+ clear_bit(__QDISC_STATE_SCHED, &q->state);
+ qdisc_run(q);
+ spin_unlock(root_lock);
}
}
}
@@ -5919,7 +5914,7 @@ static void netdev_adjacent_add_links(struct net_device *dev)
struct net *net = dev_net(dev);
list_for_each_entry(iter, &dev->adj_list.upper, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_add(iter->dev, dev,
&iter->dev->adj_list.lower);
@@ -5928,7 +5923,7 @@ static void netdev_adjacent_add_links(struct net_device *dev)
}
list_for_each_entry(iter, &dev->adj_list.lower, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_add(iter->dev, dev,
&iter->dev->adj_list.upper);
@@ -5944,7 +5939,7 @@ static void netdev_adjacent_del_links(struct net_device *dev)
struct net *net = dev_net(dev);
list_for_each_entry(iter, &dev->adj_list.upper, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, dev->name,
&iter->dev->adj_list.lower);
@@ -5953,7 +5948,7 @@ static void netdev_adjacent_del_links(struct net_device *dev)
}
list_for_each_entry(iter, &dev->adj_list.lower, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, dev->name,
&iter->dev->adj_list.upper);
@@ -5969,7 +5964,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
struct net *net = dev_net(dev);
list_for_each_entry(iter, &dev->adj_list.upper, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, oldname,
&iter->dev->adj_list.lower);
@@ -5978,7 +5973,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
}
list_for_each_entry(iter, &dev->adj_list.lower, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, oldname,
&iter->dev->adj_list.upper);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f4034817d255..977489820eb9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -89,6 +89,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
+ [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 840acebbb80c..98298b11f534 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -173,7 +173,8 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
EXPORT_SYMBOL_GPL(fib_rules_unregister);
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
- struct flowi *fl, int flags)
+ struct flowi *fl, int flags,
+ struct fib_lookup_arg *arg)
{
int ret = 0;
@@ -189,6 +190,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
goto out;
+ if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
+ goto out;
+
ret = ops->match(rule, fl, flags);
out:
return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -204,7 +208,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
list_for_each_entry_rcu(rule, &ops->rules_list, list) {
jumped:
- if (!fib_rule_match(rule, ops, fl, flags))
+ if (!fib_rule_match(rule, ops, fl, flags, arg))
continue;
if (rule->action == FR_ACT_GOTO) {
@@ -265,7 +269,7 @@ errout:
return err;
}
-static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -336,6 +340,14 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
if (tb[FRA_TUN_ID])
rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
+ if (tb[FRA_L3MDEV]) {
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
+ if (rule->l3mdev != 1)
+#endif
+ goto errout_free;
+ }
+
rule->action = frh->action;
rule->flags = frh->flags;
rule->table = frh_get_table(frh, tb);
@@ -371,6 +383,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
} else if (rule->action == FR_ACT_GOTO)
goto errout_free;
+ if (rule->l3mdev && rule->table)
+ goto errout_free;
+
err = ops->configure(rule, skb, frh, tb);
if (err < 0)
goto errout_free;
@@ -424,8 +439,9 @@ errout:
rules_ops_put(ops);
return err;
}
+EXPORT_SYMBOL_GPL(fib_nl_newrule);
-static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -483,6 +499,10 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
(rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
continue;
+ if (tb[FRA_L3MDEV] &&
+ (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
@@ -536,6 +556,7 @@ errout:
rules_ops_put(ops);
return err;
}
+EXPORT_SYMBOL_GPL(fib_nl_delrule);
static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
struct fib_rule *rule)
@@ -607,7 +628,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->target &&
nla_put_u32(skb, FRA_GOTO, rule->target)) ||
(rule->tun_id &&
- nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)))
+ nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
+ (rule->l3mdev &&
+ nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index c4b330c85c02..cb9fc16cac46 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -748,6 +748,17 @@ static bool chk_code_allowed(u16 code_to_probe)
return codes[code_to_probe];
}
+static bool bpf_check_basics_ok(const struct sock_filter *filter,
+ unsigned int flen)
+{
+ if (filter == NULL)
+ return false;
+ if (flen == 0 || flen > BPF_MAXINSNS)
+ return false;
+
+ return true;
+}
+
/**
* bpf_check_classic - verify socket filter code
* @filter: filter to verify
@@ -768,9 +779,6 @@ static int bpf_check_classic(const struct sock_filter *filter,
bool anc_found;
int pc;
- if (flen == 0 || flen > BPF_MAXINSNS)
- return -EINVAL;
-
/* Check the filter code now */
for (pc = 0; pc < flen; pc++) {
const struct sock_filter *ftest = &filter[pc];
@@ -1065,7 +1073,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
struct bpf_prog *fp;
/* Make sure new filter is there and in the right amounts. */
- if (fprog->filter == NULL)
+ if (!bpf_check_basics_ok(fprog->filter, fprog->len))
return -EINVAL;
fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
@@ -1112,7 +1120,7 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
int err;
/* Make sure new filter is there and in the right amounts. */
- if (fprog->filter == NULL)
+ if (!bpf_check_basics_ok(fprog->filter, fprog->len))
return -EINVAL;
fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
@@ -1207,7 +1215,6 @@ static
struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
{
unsigned int fsize = bpf_classic_proglen(fprog);
- unsigned int bpf_fsize = bpf_prog_size(fprog->len);
struct bpf_prog *prog;
int err;
@@ -1215,10 +1222,10 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
return ERR_PTR(-EPERM);
/* Make sure new filter is there and in the right amounts. */
- if (fprog->filter == NULL)
+ if (!bpf_check_basics_ok(fprog->filter, fprog->len))
return ERR_PTR(-EINVAL);
- prog = bpf_prog_alloc(bpf_fsize, 0);
+ prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
if (!prog)
return ERR_PTR(-ENOMEM);
@@ -1603,9 +1610,36 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
.arg5_type = ARG_ANYTHING,
};
+static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ if (skb_at_tc_ingress(skb))
+ skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+
+ return dev_forward_skb(dev, skb);
+}
+
+static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ int ret;
+
+ if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
+ net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+ kfree_skb(skb);
+ return -ENETDOWN;
+ }
+
+ skb->dev = dev;
+
+ __this_cpu_inc(xmit_recursion);
+ ret = dev_queue_xmit(skb);
+ __this_cpu_dec(xmit_recursion);
+
+ return ret;
+}
+
static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
struct net_device *dev;
if (unlikely(flags & ~(BPF_F_INGRESS)))
@@ -1615,19 +1649,12 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
if (unlikely(!dev))
return -EINVAL;
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (unlikely(!skb2))
+ skb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
return -ENOMEM;
- if (flags & BPF_F_INGRESS) {
- if (skb_at_tc_ingress(skb2))
- skb_postpush_rcsum(skb2, skb_mac_header(skb2),
- skb2->mac_len);
- return dev_forward_skb(dev, skb2);
- }
-
- skb2->dev = dev;
- return dev_queue_xmit(skb2);
+ return flags & BPF_F_INGRESS ?
+ __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
}
static const struct bpf_func_proto bpf_clone_redirect_proto = {
@@ -1671,15 +1698,8 @@ int skb_do_redirect(struct sk_buff *skb)
return -EINVAL;
}
- if (ri->flags & BPF_F_INGRESS) {
- if (skb_at_tc_ingress(skb))
- skb_postpush_rcsum(skb, skb_mac_header(skb),
- skb->mac_len);
- return dev_forward_skb(dev, skb);
- }
-
- skb->dev = dev;
- return dev_queue_xmit(skb);
+ return ri->flags & BPF_F_INGRESS ?
+ __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
}
static const struct bpf_func_proto bpf_redirect_proto = {
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 4573d81093fe..cad8e791f28e 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -84,6 +84,7 @@ struct gen_estimator
struct gnet_stats_basic_packed *bstats;
struct gnet_stats_rate_est64 *rate_est;
spinlock_t *stats_lock;
+ seqcount_t *running;
int ewma_log;
u32 last_packets;
unsigned long avpps;
@@ -121,26 +122,28 @@ static void est_timer(unsigned long arg)
unsigned long rate;
u64 brate;
- spin_lock(e->stats_lock);
+ if (e->stats_lock)
+ spin_lock(e->stats_lock);
read_lock(&est_lock);
if (e->bstats == NULL)
goto skip;
- __gnet_stats_copy_basic(&b, e->cpu_bstats, e->bstats);
+ __gnet_stats_copy_basic(e->running, &b, e->cpu_bstats, e->bstats);
brate = (b.bytes - e->last_bytes)<<(7 - idx);
e->last_bytes = b.bytes;
e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
- e->rate_est->bps = (e->avbps+0xF)>>5;
+ WRITE_ONCE(e->rate_est->bps, (e->avbps + 0xF) >> 5);
rate = b.packets - e->last_packets;
rate <<= (7 - idx);
e->last_packets = b.packets;
e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
- e->rate_est->pps = (e->avpps + 0xF) >> 5;
+ WRITE_ONCE(e->rate_est->pps, (e->avpps + 0xF) >> 5);
skip:
read_unlock(&est_lock);
- spin_unlock(e->stats_lock);
+ if (e->stats_lock)
+ spin_unlock(e->stats_lock);
}
if (!list_empty(&elist[idx].list))
@@ -194,6 +197,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
+ * @running: qdisc running seqcount
* @opt: rate estimator configuration TLV
*
* Creates a new rate estimator with &bstats as source and &rate_est
@@ -209,6 +213,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock,
+ seqcount_t *running,
struct nlattr *opt)
{
struct gen_estimator *est;
@@ -226,12 +231,13 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
if (est == NULL)
return -ENOBUFS;
- __gnet_stats_copy_basic(&b, cpu_bstats, bstats);
+ __gnet_stats_copy_basic(running, &b, cpu_bstats, bstats);
idx = parm->interval + 2;
est->bstats = bstats;
est->rate_est = rate_est;
est->stats_lock = stats_lock;
+ est->running = running;
est->ewma_log = parm->ewma_log;
est->last_bytes = b.bytes;
est->avbps = rate_est->bps<<5;
@@ -291,6 +297,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
+ * @running: qdisc running seqcount (might be NULL)
* @opt: rate estimator configuration TLV
*
* Replaces the configuration of a rate estimator by calling
@@ -301,10 +308,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
- spinlock_t *stats_lock, struct nlattr *opt)
+ spinlock_t *stats_lock,
+ seqcount_t *running, struct nlattr *opt)
{
gen_kill_estimator(bstats, rate_est);
- return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, opt);
+ return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index be873e4e3125..508e051304fb 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -32,10 +32,11 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
return 0;
nla_put_failure:
+ if (d->lock)
+ spin_unlock_bh(d->lock);
kfree(d->xstats);
d->xstats = NULL;
d->xstats_len = 0;
- spin_unlock_bh(d->lock);
return -1;
}
@@ -66,15 +67,16 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
{
memset(d, 0, sizeof(*d));
- spin_lock_bh(lock);
- d->lock = lock;
if (type)
d->tail = (struct nlattr *)skb_tail_pointer(skb);
d->skb = skb;
d->compat_tc_stats = tc_stats_type;
d->compat_xstats = xstats_type;
d->padattr = padattr;
-
+ if (lock) {
+ d->lock = lock;
+ spin_lock_bh(lock);
+ }
if (d->tail)
return gnet_stats_copy(d, type, NULL, 0, padattr);
@@ -128,21 +130,29 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
}
void
-__gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
+__gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
+ unsigned int seq;
+
if (cpu) {
__gnet_stats_copy_basic_cpu(bstats, cpu);
- } else {
+ return;
+ }
+ do {
+ if (running)
+ seq = read_seqcount_begin(running);
bstats->bytes = b->bytes;
bstats->packets = b->packets;
- }
+ } while (running && read_seqcount_retry(running, seq));
}
EXPORT_SYMBOL(__gnet_stats_copy_basic);
/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
+ * @running: seqcount_t pointer
* @d: dumping handle
* @cpu: copy statistic per cpu
* @b: basic statistics
@@ -154,13 +164,14 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic);
* if the room in the socket buffer was not sufficient.
*/
int
-gnet_stats_copy_basic(struct gnet_dump *d,
+gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
struct gnet_stats_basic_packed bstats = {0};
- __gnet_stats_copy_basic(&bstats, cpu, b);
+ __gnet_stats_copy_basic(running, &bstats, cpu, b);
if (d->compat_tc_stats) {
d->tc_stats.bytes = bstats.bytes;
@@ -330,8 +341,9 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
return 0;
err_out:
+ if (d->lock)
+ spin_unlock_bh(d->lock);
d->xstats_len = 0;
- spin_unlock_bh(d->lock);
return -1;
}
EXPORT_SYMBOL(gnet_stats_copy_app);
@@ -365,10 +377,11 @@ gnet_stats_finish_copy(struct gnet_dump *d)
return -1;
}
+ if (d->lock)
+ spin_unlock_bh(d->lock);
kfree(d->xstats);
d->xstats = NULL;
d->xstats_len = 0;
- spin_unlock_bh(d->lock);
return 0;
}
EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8b02df0d354d..f74ab9c3b38f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3463,7 +3463,6 @@ xmit_more:
break;
case NET_XMIT_DROP:
case NET_XMIT_CN:
- case NET_XMIT_POLICED:
/* skb has been consumed */
pkt_dev->errors++;
break;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d69c4644f8f2..eb49ca24274a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -71,9 +71,31 @@ void rtnl_lock(void)
}
EXPORT_SYMBOL(rtnl_lock);
+static struct sk_buff *defer_kfree_skb_list;
+void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
+{
+ if (head && tail) {
+ tail->next = defer_kfree_skb_list;
+ defer_kfree_skb_list = head;
+ }
+}
+EXPORT_SYMBOL(rtnl_kfree_skbs);
+
void __rtnl_unlock(void)
{
+ struct sk_buff *head = defer_kfree_skb_list;
+
+ defer_kfree_skb_list = NULL;
+
mutex_unlock(&rtnl_mutex);
+
+ while (head) {
+ struct sk_buff *next = head->next;
+
+ kfree_skb(head);
+ cond_resched();
+ head = next;
+ }
}
void rtnl_unlock(void)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f2b77e549c03..e7ec6d3ad5f0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -49,6 +49,7 @@
#include <linux/slab.h>
#include <linux/tcp.h>
#include <linux/udp.h>
+#include <linux/sctp.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NET_CLS_ACT
#include <net/pkt_sched.h>
@@ -3116,9 +3117,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int hsize;
int size;
- len = head_skb->len - offset;
- if (len > mss)
- len = mss;
+ if (unlikely(mss == GSO_BY_FRAGS)) {
+ len = list_skb->len;
+ } else {
+ len = head_skb->len - offset;
+ if (len > mss)
+ len = mss;
+ }
hsize = skb_headlen(head_skb) - offset;
if (hsize < 0)
@@ -3438,6 +3443,7 @@ done:
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
}
+EXPORT_SYMBOL_GPL(skb_gro_receive);
void __init skb_init(void)
{
@@ -4378,6 +4384,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
thlen += inner_tcp_hdrlen(skb);
} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
thlen = tcp_hdrlen(skb);
+ } else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
+ thlen = sizeof(struct sctphdr);
}
/* UFO sets gso_size to the size of the fragmentation
* payload, i.e. the size of the L4 (UDP) header is already
@@ -4387,6 +4395,38 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
+/**
+ * skb_gso_validate_mtu - Return in case such skb fits a given MTU
+ *
+ * @skb: GSO skb
+ * @mtu: MTU to validate against
+ *
+ * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
+ * once split.
+ */
+bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ const struct sk_buff *iter;
+ unsigned int hlen;
+
+ hlen = skb_gso_network_seglen(skb);
+
+ if (shinfo->gso_size != GSO_BY_FRAGS)
+ return hlen <= mtu;
+
+ /* Undo this so we can re-use header sizes */
+ hlen -= GSO_BY_FRAGS;
+
+ skb_walk_frags(skb, iter) {
+ if (hlen + skb_headlen(iter) > mtu)
+ return false;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
+
static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
if (skb_cow(skb, skb_headroom(skb)) < 0) {
diff --git a/net/core/utils.c b/net/core/utils.c
index 3d17ca8b4744..cf5622b9ccc4 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -133,7 +133,7 @@ int in4_pton(const char *src, int srclen,
s = src;
d = dbuf;
i = 0;
- while(1) {
+ while (1) {
int c;
c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) {
@@ -283,11 +283,11 @@ cont:
i = 15; d--;
if (dc) {
- while(d >= dc)
+ while (d >= dc)
dst[i--] = *d--;
- while(i >= dc - dbuf)
+ while (i >= dc - dbuf)
dst[i--] = 0;
- while(i >= 0)
+ while (i >= 0)
dst[i--] = *d--;
} else
memcpy(dst, dbuf, sizeof(dbuf));
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index da06ed1df620..8af4ded70f1c 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,6 +1,6 @@
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o slave.o
+dsa_core-y += dsa.o slave.o dsa2.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index eff5dfc2e33f..766d2a525ada 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -29,6 +29,33 @@
char dsa_driver_version[] = "0.1";
+static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* Just return the original SKB */
+ return skb;
+}
+
+static const struct dsa_device_ops none_ops = {
+ .xmit = dsa_slave_notag_xmit,
+ .rcv = NULL,
+};
+
+const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
+#ifdef CONFIG_NET_DSA_TAG_DSA
+ [DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_EDSA
+ [DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+ [DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+ [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
+#endif
+ [DSA_TAG_PROTO_NONE] = &none_ops,
+};
/* switch driver registration ***********************************************/
static DEFINE_MUTEX(dsa_switch_drivers_mutex);
@@ -180,41 +207,100 @@ __ATTRIBUTE_GROUPS(dsa_hwmon);
#endif /* CONFIG_NET_DSA_HWMON */
/* basic switch operations **************************************************/
-static int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct net_device *master)
+int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
+ struct device_node *port_dn, int port)
{
- struct dsa_chip_data *cd = ds->cd;
- struct device_node *port_dn;
struct phy_device *phydev;
- int ret, port, mode;
+ int ret, mode;
+
+ if (of_phy_is_fixed_link(port_dn)) {
+ ret = of_phy_register_fixed_link(port_dn);
+ if (ret) {
+ dev_err(dev, "failed to register fixed PHY\n");
+ return ret;
+ }
+ phydev = of_phy_find_device(port_dn);
+
+ mode = of_get_phy_mode(port_dn);
+ if (mode < 0)
+ mode = PHY_INTERFACE_MODE_NA;
+ phydev->interface = mode;
+
+ genphy_config_init(phydev);
+ genphy_read_status(phydev);
+ if (ds->drv->adjust_link)
+ ds->drv->adjust_link(ds, port, phydev);
+ }
+
+ return 0;
+}
+
+static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev)
+{
+ struct device_node *port_dn;
+ int ret, port;
for (port = 0; port < DSA_MAX_PORTS; port++) {
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- port_dn = cd->port_dn[port];
- if (of_phy_is_fixed_link(port_dn)) {
- ret = of_phy_register_fixed_link(port_dn);
- if (ret) {
- netdev_err(master,
- "failed to register fixed PHY\n");
- return ret;
- }
- phydev = of_phy_find_device(port_dn);
+ port_dn = ds->ports[port].dn;
+ ret = dsa_cpu_dsa_setup(ds, dev, port_dn, port);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
- mode = of_get_phy_mode(port_dn);
- if (mode < 0)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
+const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
+{
+ const struct dsa_device_ops *ops;
+
+ if (tag_protocol >= DSA_TAG_LAST)
+ return ERR_PTR(-EINVAL);
+ ops = dsa_device_ops[tag_protocol];
+
+ if (!ops)
+ return ERR_PTR(-ENOPROTOOPT);
+
+ return ops;
+}
+
+int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds)
+{
+ struct net_device *master;
+ struct ethtool_ops *cpu_ops;
+
+ master = ds->dst->master_netdev;
+ if (ds->master_netdev)
+ master = ds->master_netdev;
+
+ cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL);
+ if (!cpu_ops)
+ return -ENOMEM;
+
+ memcpy(&ds->dst->master_ethtool_ops, master->ethtool_ops,
+ sizeof(struct ethtool_ops));
+ ds->dst->master_orig_ethtool_ops = master->ethtool_ops;
+ memcpy(cpu_ops, &ds->dst->master_ethtool_ops,
+ sizeof(struct ethtool_ops));
+ dsa_cpu_port_ethtool_init(cpu_ops);
+ master->ethtool_ops = cpu_ops;
- genphy_config_init(phydev);
- genphy_read_status(phydev);
- if (ds->drv->adjust_link)
- ds->drv->adjust_link(ds, port, phydev);
- }
- }
return 0;
}
+void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds)
+{
+ struct net_device *master;
+
+ master = ds->dst->master_netdev;
+ if (ds->master_netdev)
+ master = ds->master_netdev;
+
+ master->ethtool_ops = ds->dst->master_orig_ethtool_ops;
+}
+
static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
{
struct dsa_switch_driver *drv = ds->drv;
@@ -243,6 +329,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
}
dst->cpu_switch = index;
dst->cpu_port = i;
+ ds->cpu_port_mask |= 1 << i;
} else if (!strcmp(name, "dsa")) {
ds->dsa_port_mask |= 1 << i;
} else {
@@ -267,37 +354,17 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
* switch.
*/
if (dst->cpu_switch == index) {
- switch (drv->tag_protocol) {
-#ifdef CONFIG_NET_DSA_TAG_DSA
- case DSA_TAG_PROTO_DSA:
- dst->rcv = dsa_netdev_ops.rcv;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
- case DSA_TAG_PROTO_EDSA:
- dst->rcv = edsa_netdev_ops.rcv;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
- case DSA_TAG_PROTO_TRAILER:
- dst->rcv = trailer_netdev_ops.rcv;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_BRCM
- case DSA_TAG_PROTO_BRCM:
- dst->rcv = brcm_netdev_ops.rcv;
- break;
-#endif
- case DSA_TAG_PROTO_NONE:
- break;
- default:
- ret = -ENOPROTOOPT;
+ dst->tag_ops = dsa_resolve_tag_protocol(drv->tag_protocol);
+ if (IS_ERR(dst->tag_ops)) {
+ ret = PTR_ERR(dst->tag_ops);
goto out;
}
- dst->tag_protocol = drv->tag_protocol;
+ dst->rcv = dst->tag_ops->rcv;
}
+ memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
+
/*
* Do basic register setup.
*/
@@ -309,22 +376,25 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
if (ret < 0)
goto out;
- ds->slave_mii_bus = devm_mdiobus_alloc(parent);
- if (ds->slave_mii_bus == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- dsa_slave_mii_bus_init(ds);
-
- ret = mdiobus_register(ds->slave_mii_bus);
- if (ret < 0)
- goto out;
+ if (!ds->slave_mii_bus && drv->phy_read) {
+ ds->slave_mii_bus = devm_mdiobus_alloc(parent);
+ if (!ds->slave_mii_bus) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ dsa_slave_mii_bus_init(ds);
+ ret = mdiobus_register(ds->slave_mii_bus);
+ if (ret < 0)
+ goto out;
+ }
/*
* Create network devices for physical switch ports.
*/
for (i = 0; i < DSA_MAX_PORTS; i++) {
+ ds->ports[i].dn = cd->port_dn[i];
+
if (!(ds->enabled_port_mask & (1 << i)))
continue;
@@ -337,13 +407,17 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
}
/* Perform configuration of the CPU and DSA ports */
- ret = dsa_cpu_dsa_setup(ds, dst->master_netdev);
+ ret = dsa_cpu_dsa_setups(ds, parent);
if (ret < 0) {
netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n",
index);
ret = 0;
}
+ ret = dsa_cpu_port_ethtool_setup(ds);
+ if (ret)
+ return ret;
+
#ifdef CONFIG_NET_DSA_HWMON
/* If the switch provides a temperature sensor,
* register with hardware monitoring subsystem.
@@ -420,11 +494,21 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
return ds;
}
-static void dsa_switch_destroy(struct dsa_switch *ds)
+void dsa_cpu_dsa_destroy(struct device_node *port_dn)
{
- struct device_node *port_dn;
struct phy_device *phydev;
- struct dsa_chip_data *cd = ds->cd;
+
+ if (of_phy_is_fixed_link(port_dn)) {
+ phydev = of_phy_find_device(port_dn);
+ if (phydev) {
+ phy_device_free(phydev);
+ fixed_phy_unregister(phydev);
+ }
+ }
+}
+
+static void dsa_switch_destroy(struct dsa_switch *ds)
+{
int port;
#ifdef CONFIG_NET_DSA_HWMON
@@ -437,26 +521,25 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
if (!(ds->enabled_port_mask & (1 << port)))
continue;
- if (!ds->ports[port])
+ if (!ds->ports[port].netdev)
continue;
- dsa_slave_destroy(ds->ports[port]);
+ dsa_slave_destroy(ds->ports[port].netdev);
}
- /* Remove any fixed link PHYs */
+ /* Disable configuration of the CPU and DSA ports */
for (port = 0; port < DSA_MAX_PORTS; port++) {
- port_dn = cd->port_dn[port];
- if (of_phy_is_fixed_link(port_dn)) {
- phydev = of_phy_find_device(port_dn);
- if (phydev) {
- phy_device_free(phydev);
- of_node_put(port_dn);
- fixed_phy_unregister(phydev);
- }
- }
+ if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
+ continue;
+ dsa_cpu_dsa_destroy(ds->ports[port].dn);
+
+ /* Clearing a bit which is not set does no harm */
+ ds->cpu_port_mask |= ~(1 << port);
+ ds->dsa_port_mask |= ~(1 << port);
}
- mdiobus_unregister(ds->slave_mii_bus);
+ if (ds->slave_mii_bus && ds->drv->phy_read)
+ mdiobus_unregister(ds->slave_mii_bus);
}
#ifdef CONFIG_PM_SLEEP
@@ -469,7 +552,7 @@ static int dsa_switch_suspend(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_suspend(ds->ports[i]);
+ ret = dsa_slave_suspend(ds->ports[i].netdev);
if (ret)
return ret;
}
@@ -495,7 +578,7 @@ static int dsa_switch_resume(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_resume(ds->ports[i]);
+ ret = dsa_slave_resume(ds->ports[i].netdev);
if (ret)
return ret;
}
@@ -587,17 +670,6 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
if (link_sw_addr >= pd->nr_chips)
return -EINVAL;
- /* First time routing table allocation */
- if (!cd->rtable) {
- cd->rtable = kmalloc_array(pd->nr_chips, sizeof(s8),
- GFP_KERNEL);
- if (!cd->rtable)
- return -ENOMEM;
-
- /* default to no valid uplink/downlink */
- memset(cd->rtable, -1, pd->nr_chips * sizeof(s8));
- }
-
cd->rtable[link_sw_addr] = port_index;
return 0;
@@ -639,7 +711,6 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
kfree(pd->chip[i].port_names[port_index]);
port_index++;
}
- kfree(pd->chip[i].rtable);
/* Drop our reference to the MDIO bus device */
if (pd->chip[i].host_dev)
@@ -931,6 +1002,8 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
dsa_switch_destroy(ds);
}
+ dsa_cpu_port_ethtool_restore(dst->ds[0]);
+
dev_put(dst->master_netdev);
}
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
new file mode 100644
index 000000000000..83b95fc4cede
--- /dev/null
+++ b/net/dsa/dsa2.c
@@ -0,0 +1,690 @@
+/*
+ * net/dsa/dsa2.c - Hardware switch handling, binding version 2
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
+ * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <net/dsa.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include "dsa_priv.h"
+
+static LIST_HEAD(dsa_switch_trees);
+static DEFINE_MUTEX(dsa2_mutex);
+
+static struct dsa_switch_tree *dsa_get_dst(u32 tree)
+{
+ struct dsa_switch_tree *dst;
+
+ list_for_each_entry(dst, &dsa_switch_trees, list)
+ if (dst->tree == tree)
+ return dst;
+ return NULL;
+}
+
+static void dsa_free_dst(struct kref *ref)
+{
+ struct dsa_switch_tree *dst = container_of(ref, struct dsa_switch_tree,
+ refcount);
+
+ list_del(&dst->list);
+ kfree(dst);
+}
+
+static void dsa_put_dst(struct dsa_switch_tree *dst)
+{
+ kref_put(&dst->refcount, dsa_free_dst);
+}
+
+static struct dsa_switch_tree *dsa_add_dst(u32 tree)
+{
+ struct dsa_switch_tree *dst;
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return NULL;
+ dst->tree = tree;
+ dst->cpu_switch = -1;
+ INIT_LIST_HEAD(&dst->list);
+ list_add_tail(&dsa_switch_trees, &dst->list);
+ kref_init(&dst->refcount);
+
+ return dst;
+}
+
+static void dsa_dst_add_ds(struct dsa_switch_tree *dst,
+ struct dsa_switch *ds, u32 index)
+{
+ kref_get(&dst->refcount);
+ dst->ds[index] = ds;
+}
+
+static void dsa_dst_del_ds(struct dsa_switch_tree *dst,
+ struct dsa_switch *ds, u32 index)
+{
+ dst->ds[index] = NULL;
+ kref_put(&dst->refcount, dsa_free_dst);
+}
+
+static bool dsa_port_is_dsa(struct device_node *port)
+{
+ const char *name;
+
+ name = of_get_property(port, "label", NULL);
+ if (!name)
+ return false;
+
+ if (!strcmp(name, "dsa"))
+ return true;
+
+ return false;
+}
+
+static bool dsa_port_is_cpu(struct device_node *port)
+{
+ const char *name;
+
+ name = of_get_property(port, "label", NULL);
+ if (!name)
+ return false;
+
+ if (!strcmp(name, "cpu"))
+ return true;
+
+ return false;
+}
+
+static bool dsa_ds_find_port(struct dsa_switch *ds,
+ struct device_node *port)
+{
+ u32 index;
+
+ for (index = 0; index < DSA_MAX_PORTS; index++)
+ if (ds->ports[index].dn == port)
+ return true;
+ return false;
+}
+
+static struct dsa_switch *dsa_dst_find_port(struct dsa_switch_tree *dst,
+ struct device_node *port)
+{
+ struct dsa_switch *ds;
+ u32 index;
+
+ for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+ ds = dst->ds[index];
+ if (!ds)
+ continue;
+
+ if (dsa_ds_find_port(ds, port))
+ return ds;
+ }
+
+ return NULL;
+}
+
+static int dsa_port_complete(struct dsa_switch_tree *dst,
+ struct dsa_switch *src_ds,
+ struct device_node *port,
+ u32 src_port)
+{
+ struct device_node *link;
+ int index;
+ struct dsa_switch *dst_ds;
+
+ for (index = 0;; index++) {
+ link = of_parse_phandle(port, "link", index);
+ if (!link)
+ break;
+
+ dst_ds = dsa_dst_find_port(dst, link);
+ of_node_put(link);
+
+ if (!dst_ds)
+ return 1;
+
+ src_ds->rtable[dst_ds->index] = src_port;
+ }
+
+ return 0;
+}
+
+/* A switch is complete if all the DSA ports phandles point to ports
+ * known in the tree. A return value of 1 means the tree is not
+ * complete. This is not an error condition. A value of 0 is
+ * success.
+ */
+static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+ struct device_node *port;
+ u32 index;
+ int err;
+
+ for (index = 0; index < DSA_MAX_PORTS; index++) {
+ port = ds->ports[index].dn;
+ if (!port)
+ continue;
+
+ if (!dsa_port_is_dsa(port))
+ continue;
+
+ err = dsa_port_complete(dst, ds, port, index);
+ if (err != 0)
+ return err;
+
+ ds->dsa_port_mask |= BIT(index);
+ }
+
+ return 0;
+}
+
+/* A tree is complete if all the DSA ports phandles point to ports
+ * known in the tree. A return value of 1 means the tree is not
+ * complete. This is not an error condition. A value of 0 is
+ * success.
+ */
+static int dsa_dst_complete(struct dsa_switch_tree *dst)
+{
+ struct dsa_switch *ds;
+ u32 index;
+ int err;
+
+ for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+ ds = dst->ds[index];
+ if (!ds)
+ continue;
+
+ err = dsa_ds_complete(dst, ds);
+ if (err != 0)
+ return err;
+ }
+
+ return 0;
+}
+
+static int dsa_dsa_port_apply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ int err;
+
+ err = dsa_cpu_dsa_setup(ds, ds->dev, port, index);
+ if (err) {
+ dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
+ index, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void dsa_dsa_port_unapply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ dsa_cpu_dsa_destroy(port);
+}
+
+static int dsa_cpu_port_apply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ int err;
+
+ err = dsa_cpu_dsa_setup(ds, ds->dev, port, index);
+ if (err) {
+ dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
+ index, err);
+ return err;
+ }
+
+ ds->cpu_port_mask |= BIT(index);
+
+ return 0;
+}
+
+static void dsa_cpu_port_unapply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ dsa_cpu_dsa_destroy(port);
+ ds->cpu_port_mask &= ~BIT(index);
+
+}
+
+static int dsa_user_port_apply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ const char *name;
+ int err;
+
+ name = of_get_property(port, "label", NULL);
+
+ err = dsa_slave_create(ds, ds->dev, index, name);
+ if (err) {
+ dev_warn(ds->dev, "Failed to create slave %d: %d\n",
+ index, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void dsa_user_port_unapply(struct device_node *port, u32 index,
+ struct dsa_switch *ds)
+{
+ if (ds->ports[index].netdev) {
+ dsa_slave_destroy(ds->ports[index].netdev);
+ ds->ports[index].netdev = NULL;
+ ds->enabled_port_mask &= ~(1 << index);
+ }
+}
+
+static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+ struct device_node *port;
+ u32 index;
+ int err;
+
+ /* Initialize ds->phys_mii_mask before registering the slave MDIO bus
+ * driver and before drv->setup() has run, since the switch drivers and
+ * the slave MDIO bus driver rely on these values for probing PHY
+ * devices or not
+ */
+ ds->phys_mii_mask = ds->enabled_port_mask;
+
+ err = ds->drv->setup(ds);
+ if (err < 0)
+ return err;
+
+ err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+ if (err < 0)
+ return err;
+
+ err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+ if (err < 0)
+ return err;
+
+ if (!ds->slave_mii_bus && ds->drv->phy_read) {
+ ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
+ if (!ds->slave_mii_bus)
+ return -ENOMEM;
+
+ dsa_slave_mii_bus_init(ds);
+
+ err = mdiobus_register(ds->slave_mii_bus);
+ if (err < 0)
+ return err;
+ }
+
+ for (index = 0; index < DSA_MAX_PORTS; index++) {
+ port = ds->ports[index].dn;
+ if (!port)
+ continue;
+
+ if (dsa_port_is_dsa(port)) {
+ err = dsa_dsa_port_apply(port, index, ds);
+ if (err)
+ return err;
+ continue;
+ }
+
+ if (dsa_port_is_cpu(port)) {
+ err = dsa_cpu_port_apply(port, index, ds);
+ if (err)
+ return err;
+ continue;
+ }
+
+ err = dsa_user_port_apply(port, index, ds);
+ if (err)
+ continue;
+ }
+
+ return 0;
+}
+
+static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+ struct device_node *port;
+ u32 index;
+
+ for (index = 0; index < DSA_MAX_PORTS; index++) {
+ port = ds->ports[index].dn;
+ if (!port)
+ continue;
+
+ if (dsa_port_is_dsa(port)) {
+ dsa_dsa_port_unapply(port, index, ds);
+ continue;
+ }
+
+ if (dsa_port_is_cpu(port)) {
+ dsa_cpu_port_unapply(port, index, ds);
+ continue;
+ }
+
+ dsa_user_port_unapply(port, index, ds);
+ }
+
+ if (ds->slave_mii_bus && ds->drv->phy_read)
+ mdiobus_unregister(ds->slave_mii_bus);
+}
+
+static int dsa_dst_apply(struct dsa_switch_tree *dst)
+{
+ struct dsa_switch *ds;
+ u32 index;
+ int err;
+
+ for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+ ds = dst->ds[index];
+ if (!ds)
+ continue;
+
+ err = dsa_ds_apply(dst, ds);
+ if (err)
+ return err;
+ }
+
+ err = dsa_cpu_port_ethtool_setup(dst->ds[0]);
+ if (err)
+ return err;
+
+ /* If we use a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point on get
+ * sent to the tag format's receive function.
+ */
+ wmb();
+ dst->master_netdev->dsa_ptr = (void *)dst;
+ dst->applied = true;
+
+ return 0;
+}
+
+static void dsa_dst_unapply(struct dsa_switch_tree *dst)
+{
+ struct dsa_switch *ds;
+ u32 index;
+
+ if (!dst->applied)
+ return;
+
+ dst->master_netdev->dsa_ptr = NULL;
+
+ /* If we used a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point get sent
+ * without the tag and go through the regular receive path.
+ */
+ wmb();
+
+ for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+ ds = dst->ds[index];
+ if (!ds)
+ continue;
+
+ dsa_ds_unapply(dst, ds);
+ }
+
+ dsa_cpu_port_ethtool_restore(dst->ds[0]);
+
+ pr_info("DSA: tree %d unapplied\n", dst->tree);
+ dst->applied = false;
+}
+
+static int dsa_cpu_parse(struct device_node *port, u32 index,
+ struct dsa_switch_tree *dst,
+ struct dsa_switch *ds)
+{
+ struct net_device *ethernet_dev;
+ struct device_node *ethernet;
+
+ ethernet = of_parse_phandle(port, "ethernet", 0);
+ if (!ethernet)
+ return -EINVAL;
+
+ ethernet_dev = of_find_net_device_by_node(ethernet);
+ if (!ethernet_dev)
+ return -EPROBE_DEFER;
+
+ if (!ds->master_netdev)
+ ds->master_netdev = ethernet_dev;
+
+ if (!dst->master_netdev)
+ dst->master_netdev = ethernet_dev;
+
+ if (dst->cpu_switch == -1) {
+ dst->cpu_switch = ds->index;
+ dst->cpu_port = index;
+ }
+
+ dst->tag_ops = dsa_resolve_tag_protocol(ds->drv->tag_protocol);
+ if (IS_ERR(dst->tag_ops)) {
+ dev_warn(ds->dev, "No tagger for this switch\n");
+ return PTR_ERR(dst->tag_ops);
+ }
+
+ dst->rcv = dst->tag_ops->rcv;
+
+ return 0;
+}
+
+static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+{
+ struct device_node *port;
+ u32 index;
+ int err;
+
+ for (index = 0; index < DSA_MAX_PORTS; index++) {
+ port = ds->ports[index].dn;
+ if (!port)
+ continue;
+
+ if (dsa_port_is_cpu(port)) {
+ err = dsa_cpu_parse(port, index, dst, ds);
+ if (err)
+ return err;
+ }
+ }
+
+ pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index);
+
+ return 0;
+}
+
+static int dsa_dst_parse(struct dsa_switch_tree *dst)
+{
+ struct dsa_switch *ds;
+ u32 index;
+ int err;
+
+ for (index = 0; index < DSA_MAX_SWITCHES; index++) {
+ ds = dst->ds[index];
+ if (!ds)
+ continue;
+
+ err = dsa_ds_parse(dst, ds);
+ if (err)
+ return err;
+ }
+
+ if (!dst->master_netdev) {
+ pr_warn("Tree has no master device\n");
+ return -EINVAL;
+ }
+
+ pr_info("DSA: tree %d parsed\n", dst->tree);
+
+ return 0;
+}
+
+static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
+{
+ struct device_node *port;
+ int err;
+ u32 reg;
+
+ for_each_available_child_of_node(ports, port) {
+ err = of_property_read_u32(port, "reg", &reg);
+ if (err)
+ return err;
+
+ if (reg >= DSA_MAX_PORTS)
+ return -EINVAL;
+
+ ds->ports[reg].dn = port;
+
+ /* Initialize enabled_port_mask now for drv->setup()
+ * to have access to a correct value, just like what
+ * net/dsa/dsa.c::dsa_switch_setup_one does.
+ */
+ if (!dsa_port_is_cpu(port))
+ ds->enabled_port_mask |= 1 << reg;
+ }
+
+ return 0;
+}
+
+static int dsa_parse_member(struct device_node *np, u32 *tree, u32 *index)
+{
+ int err;
+
+ *tree = *index = 0;
+
+ err = of_property_read_u32_index(np, "dsa,member", 0, tree);
+ if (err) {
+ /* Does not exist, but it is optional */
+ if (err == -EINVAL)
+ return 0;
+ return err;
+ }
+
+ err = of_property_read_u32_index(np, "dsa,member", 1, index);
+ if (err)
+ return err;
+
+ if (*index >= DSA_MAX_SWITCHES)
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct device_node *dsa_get_ports(struct dsa_switch *ds,
+ struct device_node *np)
+{
+ struct device_node *ports;
+
+ ports = of_get_child_by_name(np, "ports");
+ if (!ports) {
+ dev_err(ds->dev, "no ports child node found\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return ports;
+}
+
+static int _dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
+{
+ struct device_node *ports = dsa_get_ports(ds, np);
+ struct dsa_switch_tree *dst;
+ u32 tree, index;
+ int err;
+
+ err = dsa_parse_member(np, &tree, &index);
+ if (err)
+ return err;
+
+ if (IS_ERR(ports))
+ return PTR_ERR(ports);
+
+ err = dsa_parse_ports_dn(ports, ds);
+ if (err)
+ return err;
+
+ dst = dsa_get_dst(tree);
+ if (!dst) {
+ dst = dsa_add_dst(tree);
+ if (!dst)
+ return -ENOMEM;
+ }
+
+ if (dst->ds[index]) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ ds->dst = dst;
+ ds->index = index;
+ dsa_dst_add_ds(dst, ds, index);
+
+ err = dsa_dst_complete(dst);
+ if (err < 0)
+ goto out_del_dst;
+
+ if (err == 1) {
+ /* Not all switches registered yet */
+ err = 0;
+ goto out;
+ }
+
+ if (dst->applied) {
+ pr_info("DSA: Disjoint trees?\n");
+ return -EINVAL;
+ }
+
+ err = dsa_dst_parse(dst);
+ if (err)
+ goto out_del_dst;
+
+ err = dsa_dst_apply(dst);
+ if (err) {
+ dsa_dst_unapply(dst);
+ goto out_del_dst;
+ }
+
+ dsa_put_dst(dst);
+ return 0;
+
+out_del_dst:
+ dsa_dst_del_ds(dst, ds, ds->index);
+out:
+ dsa_put_dst(dst);
+
+ return err;
+}
+
+int dsa_register_switch(struct dsa_switch *ds, struct device_node *np)
+{
+ int err;
+
+ mutex_lock(&dsa2_mutex);
+ err = _dsa_register_switch(ds, np);
+ mutex_unlock(&dsa2_mutex);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(dsa_register_switch);
+
+void _dsa_unregister_switch(struct dsa_switch *ds)
+{
+ struct dsa_switch_tree *dst = ds->dst;
+
+ dsa_dst_unapply(dst);
+
+ dsa_dst_del_ds(dst, ds, ds->index);
+}
+
+void dsa_unregister_switch(struct dsa_switch *ds)
+{
+ mutex_lock(&dsa2_mutex);
+ _dsa_unregister_switch(ds);
+ mutex_unlock(&dsa2_mutex);
+}
+EXPORT_SYMBOL_GPL(dsa_unregister_switch);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index dfa33779d49c..00077a9c97f4 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -50,12 +50,19 @@ struct dsa_slave_priv {
/* dsa.c */
extern char dsa_driver_version[];
+int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
+ struct device_node *port_dn, int port);
+void dsa_cpu_dsa_destroy(struct device_node *port_dn);
+const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
+int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds);
+void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds);
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
+void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops);
int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
- int port, char *name);
+ int port, const char *name);
void dsa_slave_destroy(struct net_device *slave_dev);
int dsa_slave_suspend(struct net_device *slave_dev);
int dsa_slave_resume(struct net_device *slave_dev);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 152436cdab30..7236eb26dc97 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -49,8 +49,8 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
ds->slave_mii_bus->name = "dsa slave smi";
ds->slave_mii_bus->read = dsa_slave_phy_read;
ds->slave_mii_bus->write = dsa_slave_phy_write;
- snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x",
- ds->index, ds->cd->sw_addr);
+ snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d",
+ ds->dst->tree, ds->index);
ds->slave_mii_bus->parent = ds->dev;
ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
}
@@ -522,14 +522,6 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
- struct net_device *dev)
-{
- /* Just return the original SKB */
- return skb;
-}
-
-
/* ethtool operations *******************************************************/
static int
dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
@@ -615,7 +607,7 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- if (ds->cd->eeprom_len)
+ if (ds->cd && ds->cd->eeprom_len)
return ds->cd->eeprom_len;
if (ds->drv->get_eeprom_len)
@@ -873,6 +865,13 @@ static void dsa_slave_poll_controller(struct net_device *dev)
}
#endif
+void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
+{
+ ops->get_sset_count = dsa_cpu_port_get_sset_count;
+ ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats;
+ ops->get_strings = dsa_cpu_port_get_strings;
+}
+
static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_settings = dsa_slave_get_settings,
.set_settings = dsa_slave_set_settings,
@@ -893,8 +892,6 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_eee = dsa_slave_get_eee,
};
-static struct ethtool_ops dsa_cpu_port_ethtool_ops;
-
static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_open = dsa_slave_open,
.ndo_stop = dsa_slave_close,
@@ -999,13 +996,12 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
struct net_device *slave_dev)
{
struct dsa_switch *ds = p->parent;
- struct dsa_chip_data *cd = ds->cd;
struct device_node *phy_dn, *port_dn;
bool phy_is_fixed = false;
u32 phy_flags = 0;
int mode, ret;
- port_dn = cd->port_dn[p->port];
+ port_dn = ds->ports[p->port].dn;
mode = of_get_phy_mode(port_dn);
if (mode < 0)
mode = PHY_INTERFACE_MODE_NA;
@@ -1109,14 +1105,18 @@ int dsa_slave_resume(struct net_device *slave_dev)
}
int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
- int port, char *name)
+ int port, const char *name)
{
- struct net_device *master = ds->dst->master_netdev;
struct dsa_switch_tree *dst = ds->dst;
+ struct net_device *master;
struct net_device *slave_dev;
struct dsa_slave_priv *p;
int ret;
+ master = ds->dst->master_netdev;
+ if (ds->master_netdev)
+ master = ds->master_netdev;
+
slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
NET_NAME_UNKNOWN, ether_setup);
if (slave_dev == NULL)
@@ -1124,19 +1124,6 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
slave_dev->features = master->vlan_features;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
- if (master->ethtool_ops != &dsa_cpu_port_ethtool_ops) {
- memcpy(&dst->master_ethtool_ops, master->ethtool_ops,
- sizeof(struct ethtool_ops));
- memcpy(&dsa_cpu_port_ethtool_ops, &dst->master_ethtool_ops,
- sizeof(struct ethtool_ops));
- dsa_cpu_port_ethtool_ops.get_sset_count =
- dsa_cpu_port_get_sset_count;
- dsa_cpu_port_ethtool_ops.get_ethtool_stats =
- dsa_cpu_port_get_ethtool_stats;
- dsa_cpu_port_ethtool_ops.get_strings =
- dsa_cpu_port_get_strings;
- master->ethtool_ops = &dsa_cpu_port_ethtool_ops;
- }
eth_hw_addr_inherit(slave_dev, master);
slave_dev->priv_flags |= IFF_NO_QUEUE;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
@@ -1147,49 +1134,24 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
NULL);
SET_NETDEV_DEV(slave_dev, parent);
- slave_dev->dev.of_node = ds->cd->port_dn[port];
+ slave_dev->dev.of_node = ds->ports[port].dn;
slave_dev->vlan_features = master->vlan_features;
p = netdev_priv(slave_dev);
p->parent = ds;
p->port = port;
-
- switch (ds->dst->tag_protocol) {
-#ifdef CONFIG_NET_DSA_TAG_DSA
- case DSA_TAG_PROTO_DSA:
- p->xmit = dsa_netdev_ops.xmit;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
- case DSA_TAG_PROTO_EDSA:
- p->xmit = edsa_netdev_ops.xmit;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
- case DSA_TAG_PROTO_TRAILER:
- p->xmit = trailer_netdev_ops.xmit;
- break;
-#endif
-#ifdef CONFIG_NET_DSA_TAG_BRCM
- case DSA_TAG_PROTO_BRCM:
- p->xmit = brcm_netdev_ops.xmit;
- break;
-#endif
- default:
- p->xmit = dsa_slave_notag_xmit;
- break;
- }
+ p->xmit = dst->tag_ops->xmit;
p->old_pause = -1;
p->old_link = -1;
p->old_duplex = -1;
- ds->ports[port] = slave_dev;
+ ds->ports[port].netdev = slave_dev;
ret = register_netdev(slave_dev);
if (ret) {
netdev_err(master, "error %d registering interface %s\n",
ret, slave_dev->name);
- ds->ports[port] = NULL;
+ ds->ports[port].netdev = NULL;
free_netdev(slave_dev);
return ret;
}
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index e2aadb73111d..21bffde6e4bf 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -127,7 +127,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
/* Validate port against switch setup, either the port is totally */
- if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+ if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
goto out_drop;
/* Remove Broadcom tag and update checksum */
@@ -140,7 +140,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
skb_push(skb, ETH_HLEN);
skb->pkt_type = PACKET_HOST;
- skb->dev = ds->ports[source_port];
+ skb->dev = ds->ports[source_port].netdev;
skb->protocol = eth_type_trans(skb, skb->dev);
skb->dev->stats.rx_packets++;
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index aa780e4ac0bd..bce79ffe342b 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -107,10 +107,14 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
* Check that the source device exists and that the source
* port is a registered DSA port.
*/
- if (source_device >= dst->pd->nr_chips)
+ if (source_device >= DSA_MAX_SWITCHES)
goto out_drop;
+
ds = dst->ds[source_device];
- if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+ if (!ds)
+ goto out_drop;
+
+ if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
goto out_drop;
/*
@@ -159,7 +163,7 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->dev = ds->ports[source_port];
+ skb->dev = ds->ports[source_port].netdev;
skb_push(skb, ETH_HLEN);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 2288c8098c42..6c1720e88537 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -120,10 +120,14 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
* Check that the source device exists and that the source
* port is a registered DSA port.
*/
- if (source_device >= dst->pd->nr_chips)
+ if (source_device >= DSA_MAX_SWITCHES)
goto out_drop;
+
ds = dst->ds[source_device];
- if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+ if (!ds)
+ goto out_drop;
+
+ if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
goto out_drop;
/*
@@ -178,7 +182,7 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->dev = ds->ports[source_port];
+ skb->dev = ds->ports[source_port].netdev;
skb_push(skb, ETH_HLEN);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index b6ca0890d018..5e3903eb1afa 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -82,12 +82,12 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
goto out_drop;
source_port = trailer[1] & 7;
- if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+ if (source_port >= DSA_MAX_PORTS || !ds->ports[source_port].netdev)
goto out_drop;
pskb_trim_rcsum(skb, skb->len - 4);
- skb->dev = ds->ports[source_port];
+ skb->dev = ds->ports[source_port].netdev;
skb_push(skb, ETH_HLEN);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index dd085db8580e..8c004a0c8d64 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -58,21 +58,10 @@ static struct header_ops lowpan_header_ops = {
.create = lowpan_header_create,
};
-static struct lock_class_key lowpan_tx_busylock;
-static struct lock_class_key lowpan_netdev_xmit_lock_key;
-
-static void lowpan_set_lockdep_class_one(struct net_device *ldev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock,
- &lowpan_netdev_xmit_lock_key);
-}
-
static int lowpan_dev_init(struct net_device *ldev)
{
- netdev_for_each_tx_queue(ldev, lowpan_set_lockdep_class_one, NULL);
- ldev->qdisc_tx_busylock = &lowpan_tx_busylock;
+ netdev_lockdep_set_classes(ldev);
+
return 0;
}
@@ -92,11 +81,21 @@ static int lowpan_stop(struct net_device *dev)
return 0;
}
+static int lowpan_neigh_construct(struct neighbour *n)
+{
+ struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
+
+ /* default no short_addr is available for a neighbour */
+ neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+ return 0;
+}
+
static const struct net_device_ops lowpan_netdev_ops = {
.ndo_init = lowpan_dev_init,
.ndo_start_xmit = lowpan_xmit,
.ndo_open = lowpan_open,
.ndo_stop = lowpan_stop,
+ .ndo_neigh_construct = lowpan_neigh_construct,
};
static void lowpan_setup(struct net_device *ldev)
@@ -161,6 +160,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
wdev->needed_headroom;
ldev->needed_tailroom = wdev->needed_tailroom;
+ ldev->neigh_priv_len = sizeof(struct lowpan_802154_neigh);
+
ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154);
if (ret < 0) {
dev_put(wdev);
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index e459afd16bb3..dbb476d7d38f 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -9,6 +9,7 @@
*/
#include <net/6lowpan.h>
+#include <net/ndisc.h>
#include <net/ieee802154_netdev.h>
#include <net/mac802154.h>
@@ -17,19 +18,9 @@
#define LOWPAN_FRAG1_HEAD_SIZE 0x4
#define LOWPAN_FRAGN_HEAD_SIZE 0x5
-/* don't save pan id, it's intra pan */
-struct lowpan_addr {
- u8 mode;
- union {
- /* IPv6 needs big endian here */
- __be64 extended_addr;
- __be16 short_addr;
- } u;
-};
-
struct lowpan_addr_info {
- struct lowpan_addr daddr;
- struct lowpan_addr saddr;
+ struct ieee802154_addr daddr;
+ struct ieee802154_addr saddr;
};
static inline struct
@@ -48,12 +39,14 @@ lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
* RAW/DGRAM sockets.
*/
int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
- unsigned short type, const void *_daddr,
- const void *_saddr, unsigned int len)
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
{
- const u8 *saddr = _saddr;
- const u8 *daddr = _daddr;
- struct lowpan_addr_info *info;
+ struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr;
+ struct lowpan_addr_info *info = lowpan_skb_priv(skb);
+ struct lowpan_802154_neigh *llneigh = NULL;
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct neighbour *n;
/* TODO:
* if this package isn't ipv6 one, where should it be routed?
@@ -61,21 +54,50 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
if (type != ETH_P_IPV6)
return 0;
- if (!saddr)
- saddr = ldev->dev_addr;
+ /* intra-pan communication */
+ info->saddr.pan_id = wpan_dev->pan_id;
+ info->daddr.pan_id = info->saddr.pan_id;
- raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
- raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
+ if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) {
+ info->daddr.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+ info->daddr.mode = IEEE802154_ADDR_SHORT;
+ } else {
+ __le16 short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+
+ n = neigh_lookup(&nd_tbl, &hdr->daddr, ldev);
+ if (n) {
+ llneigh = lowpan_802154_neigh(neighbour_priv(n));
+ read_lock_bh(&n->lock);
+ short_addr = llneigh->short_addr;
+ read_unlock_bh(&n->lock);
+ }
- info = lowpan_skb_priv(skb);
+ if (llneigh &&
+ lowpan_802154_is_valid_src_short_addr(short_addr)) {
+ info->daddr.short_addr = short_addr;
+ info->daddr.mode = IEEE802154_ADDR_SHORT;
+ } else {
+ info->daddr.mode = IEEE802154_ADDR_LONG;
+ ieee802154_be64_to_le64(&info->daddr.extended_addr,
+ daddr);
+ }
- /* TODO: Currently we only support extended_addr */
- info->daddr.mode = IEEE802154_ADDR_LONG;
- memcpy(&info->daddr.u.extended_addr, daddr,
- sizeof(info->daddr.u.extended_addr));
- info->saddr.mode = IEEE802154_ADDR_LONG;
- memcpy(&info->saddr.u.extended_addr, saddr,
- sizeof(info->daddr.u.extended_addr));
+ if (n)
+ neigh_release(n);
+ }
+
+ if (!saddr) {
+ if (lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr)) {
+ info->saddr.mode = IEEE802154_ADDR_SHORT;
+ info->saddr.short_addr = wpan_dev->short_addr;
+ } else {
+ info->saddr.mode = IEEE802154_ADDR_LONG;
+ info->saddr.extended_addr = wpan_dev->extended_addr;
+ }
+ } else {
+ info->saddr.mode = IEEE802154_ADDR_LONG;
+ ieee802154_be64_to_le64(&info->saddr.extended_addr, saddr);
+ }
return 0;
}
@@ -209,47 +231,26 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
u16 *dgram_size, u16 *dgram_offset)
{
struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr;
- struct ieee802154_addr sa, da;
struct ieee802154_mac_cb *cb = mac_cb_init(skb);
struct lowpan_addr_info info;
- void *daddr, *saddr;
memcpy(&info, lowpan_skb_priv(skb), sizeof(info));
- /* TODO: Currently we only support extended_addr */
- daddr = &info.daddr.u.extended_addr;
- saddr = &info.saddr.u.extended_addr;
-
*dgram_size = skb->len;
- lowpan_header_compress(skb, ldev, daddr, saddr);
+ lowpan_header_compress(skb, ldev, &info.daddr, &info.saddr);
/* dgram_offset = (saved bytes after compression) + lowpan header len */
*dgram_offset = (*dgram_size - skb->len) + skb_network_header_len(skb);
cb->type = IEEE802154_FC_TYPE_DATA;
- /* prepare wpan address data */
- sa.mode = IEEE802154_ADDR_LONG;
- sa.pan_id = wpan_dev->pan_id;
- sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
-
- /* intra-PAN communications */
- da.pan_id = sa.pan_id;
-
- /* if the destination address is the broadcast address, use the
- * corresponding short address
- */
- if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) {
- da.mode = IEEE802154_ADDR_SHORT;
- da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+ if (info.daddr.mode == IEEE802154_ADDR_SHORT &&
+ ieee802154_is_broadcast_short_addr(info.daddr.short_addr))
cb->ackreq = false;
- } else {
- da.mode = IEEE802154_ADDR_LONG;
- da.extended_addr = ieee802154_devaddr_from_raw(daddr);
+ else
cb->ackreq = wpan_dev->ackreq;
- }
- return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev, &da,
- &sa, 0);
+ return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev,
+ &info.daddr, &info.saddr, 0);
}
netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 238225b0c970..50d6a9b49f6c 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -532,6 +532,22 @@ config TCP_CONG_VEGAS
window. TCP Vegas should provide less packet loss, but it is
not as aggressive as TCP Reno.
+config TCP_CONG_NV
+ tristate "TCP NV"
+ default n
+ ---help---
+ TCP NV is a follow up to TCP Vegas. It has been modified to deal with
+ 10G networks, measurement noise introduced by LRO, GRO and interrupt
+ coalescence. In addition, it will decrease its cwnd multiplicatively
+ instead of linearly.
+
+ Note that in general congestion avoidance (cwnd decreased when # packets
+ queued grows) cannot coexist with congestion control (cwnd decreased only
+ when there is packet loss) due to fairness issues. One scenario when they
+ can coexist safely is when the CA flows have RTTs << CC flows RTTs.
+
+ For further details see http://www.brakmo.org/networking/tcp-nv/
+
config TCP_CONG_SCALABLE
tristate "Scalable TCP"
default n
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bfa133691cde..24629b6f57cc 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o
obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
+obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o
obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f2bda9e89c61..6e9ea69e5f75 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -76,6 +76,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
{
int err = -EAGAIN;
struct fib_table *tbl;
+ u32 tb_id;
switch (rule->action) {
case FR_ACT_TO_TBL:
@@ -94,7 +95,8 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
rcu_read_lock();
- tbl = fib_get_table(rule->fr_net, rule->table);
+ tb_id = fib_rule_get_table(rule, arg);
+ tbl = fib_get_table(rule->fr_net, tb_id);
if (tbl)
err = fib_table_lookup(tbl, &flp->u.ip4,
(struct fib_result *)arg->result,
@@ -180,7 +182,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
if (err)
goto errout;
- if (rule->table == RT_TABLE_UNSPEC) {
+ if (rule->table == RT_TABLE_UNSPEC && !rule->l3mdev) {
if (rule->action == FR_ACT_TO_TBL) {
struct fib_table *table;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 5f9207c039e7..321d57f825ce 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -129,6 +129,36 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+ switch (guehdr->version) {
+ case 0: /* Full GUE header present */
+ break;
+
+ case 1: {
+ /* Direct encasulation of IPv4 or IPv6 */
+
+ int prot;
+
+ switch (((struct iphdr *)guehdr)->version) {
+ case 4:
+ prot = IPPROTO_IPIP;
+ break;
+ case 6:
+ prot = IPPROTO_IPV6;
+ break;
+ default:
+ goto drop;
+ }
+
+ if (fou_recv_pull(skb, fou, sizeof(struct udphdr)))
+ goto drop;
+
+ return -prot;
+ }
+
+ default: /* Undefined version */
+ goto drop;
+ }
+
optlen = guehdr->hlen << 2;
len += optlen;
@@ -289,6 +319,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
int flush = 1;
struct fou *fou = fou_from_sock(sk);
struct gro_remcsum grc;
+ u8 proto;
skb_gro_remcsum_init(&grc);
@@ -302,6 +333,25 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
goto out;
}
+ switch (guehdr->version) {
+ case 0:
+ break;
+ case 1:
+ switch (((struct iphdr *)guehdr)->version) {
+ case 4:
+ proto = IPPROTO_IPIP;
+ break;
+ case 6:
+ proto = IPPROTO_IPV6;
+ break;
+ default:
+ goto out;
+ }
+ goto next_proto;
+ default:
+ goto out;
+ }
+
optlen = guehdr->hlen << 2;
len += optlen;
@@ -370,6 +420,10 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
}
}
+ proto = guehdr->proto_ctype;
+
+next_proto:
+
/* We can clear the encap_mark for GUE as we are essentially doing
* one of two possible things. We are either adding an L4 tunnel
* header to the outer L3 tunnel header, or we are are simply
@@ -383,7 +437,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
- ops = rcu_dereference(offloads[guehdr->proto_ctype]);
+ ops = rcu_dereference(offloads[proto]);
if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
goto out_unlock;
@@ -404,13 +458,30 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
const struct net_offload **offloads;
struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
const struct net_offload *ops;
- unsigned int guehlen;
+ unsigned int guehlen = 0;
u8 proto;
int err = -ENOENT;
- proto = guehdr->proto_ctype;
-
- guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+ switch (guehdr->version) {
+ case 0:
+ proto = guehdr->proto_ctype;
+ guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+ break;
+ case 1:
+ switch (((struct iphdr *)guehdr)->version) {
+ case 4:
+ proto = IPPROTO_IPIP;
+ break;
+ case 6:
+ proto = IPPROTO_IPV6;
+ break;
+ default:
+ return err;
+ }
+ break;
+ default:
+ return err;
+ }
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index de1d119a4497..b798862b6be5 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -117,6 +117,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if ((*(u8 *)options & 0xF0) != 0x40)
hdr_len += 4;
}
+ tpi->hdr_len = hdr_len;
return hdr_len;
}
EXPORT_SYMBOL(gre_parse_header);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 25af1243649b..38c2c47fe0e8 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -44,6 +44,7 @@ struct inet_diag_entry {
u16 dport;
u16 family;
u16 userlocks;
+ u32 ifindex;
};
static DEFINE_MUTEX(inet_diag_table_mutex);
@@ -571,6 +572,14 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
yes = 0;
break;
}
+ case INET_DIAG_BC_DEV_COND: {
+ u32 ifindex;
+
+ ifindex = *((const u32 *)(op + 1));
+ if (ifindex != entry->ifindex)
+ yes = 0;
+ break;
+ }
}
if (yes) {
@@ -613,6 +622,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry_fill_addrs(&entry, sk);
entry.sport = inet->inet_num;
entry.dport = ntohs(inet->inet_dport);
+ entry.ifindex = sk->sk_bound_dev_if;
entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
return inet_diag_bc_run(bc, &entry);
@@ -636,6 +646,17 @@ static int valid_cc(const void *bc, int len, int cc)
return 0;
}
+/* data is u32 ifindex */
+static bool valid_devcond(const struct inet_diag_bc_op *op, int len,
+ int *min_len)
+{
+ /* Check ifindex space. */
+ *min_len += sizeof(u32);
+ if (len < *min_len)
+ return false;
+
+ return true;
+}
/* Validate an inet_diag_hostcond. */
static bool valid_hostcond(const struct inet_diag_bc_op *op, int len,
int *min_len)
@@ -700,6 +721,10 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
if (!valid_hostcond(bc, len, &min_len))
return -EINVAL;
break;
+ case INET_DIAG_BC_DEV_COND:
+ if (!valid_devcond(bc, len, &min_len))
+ return -EINVAL;
+ break;
case INET_DIAG_BC_S_GE:
case INET_DIAG_BC_S_LE:
case INET_DIAG_BC_D_GE:
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 3a88b0c73797..b5e9317eaf9e 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -355,7 +355,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
{
struct inet_frag_queue *q;
- if (frag_mem_limit(nf) > nf->high_thresh) {
+ if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
inet_frag_schedule_worker(f);
return NULL;
}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index cbfb1808fcc4..9f0a7b96646f 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -54,7 +54,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
if (skb->ignore_df)
return false;
- if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;
return true;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 1d000af7f561..5b1481be0282 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -138,6 +138,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
const struct iphdr *iph;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
+ unsigned int data_len = 0;
struct ip_tunnel *t;
switch (type) {
@@ -163,6 +164,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
return;
+ data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
break;
case ICMP_REDIRECT:
@@ -181,6 +183,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
if (!t)
return;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (tpi->proto == htons(ETH_P_IPV6) &&
+ !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
+ type, data_len))
+ return;
+#endif
+
if (t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr))
return;
@@ -837,17 +846,19 @@ out:
return ipgre_tunnel_validate(tb, data);
}
-static void ipgre_netlink_parms(struct net_device *dev,
+static int ipgre_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
struct ip_tunnel_parm *parms)
{
+ struct ip_tunnel *t = netdev_priv(dev);
+
memset(parms, 0, sizeof(*parms));
parms->iph.protocol = IPPROTO_GRE;
if (!data)
- return;
+ return 0;
if (data[IFLA_GRE_LINK])
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
@@ -876,16 +887,26 @@ static void ipgre_netlink_parms(struct net_device *dev,
if (data[IFLA_GRE_TOS])
parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
- if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
+ if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
+ if (t->ignore_df)
+ return -EINVAL;
parms->iph.frag_off = htons(IP_DF);
+ }
if (data[IFLA_GRE_COLLECT_METADATA]) {
- struct ip_tunnel *t = netdev_priv(dev);
-
t->collect_md = true;
if (dev->type == ARPHRD_IPGRE)
dev->type = ARPHRD_NONE;
}
+
+ if (data[IFLA_GRE_IGNORE_DF]) {
+ if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
+ && (parms->iph.frag_off & htons(IP_DF)))
+ return -EINVAL;
+ t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
+ }
+
+ return 0;
}
/* This function returns true when ENCAP attributes are present in the nl msg */
@@ -956,16 +977,19 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
- int err = ip_tunnel_encap_setup(t, &ipencap);
+ err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
- ipgre_netlink_parms(dev, data, tb, &p);
+ err = ipgre_netlink_parms(dev, data, tb, &p);
+ if (err < 0)
+ return err;
return ip_tunnel_newlink(dev, tb, &p);
}
@@ -974,16 +998,19 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
- int err = ip_tunnel_encap_setup(t, &ipencap);
+ err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
- ipgre_netlink_parms(dev, data, tb, &p);
+ err = ipgre_netlink_parms(dev, data, tb, &p);
+ if (err < 0)
+ return err;
return ip_tunnel_changelink(dev, tb, &p);
}
@@ -1020,6 +1047,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_GRE_COLLECT_METADATA */
nla_total_size(0) +
+ /* IFLA_GRE_IGNORE_DF */
+ nla_total_size(1) +
0;
}
@@ -1053,6 +1082,9 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
t->encap.flags))
goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
+ goto nla_put_failure;
+
if (t->collect_md) {
if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
goto nla_put_failure;
@@ -1080,6 +1112,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
+ [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 124bf0a66328..cbac493c913a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -225,7 +225,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
/* common case: locally created skb or seglen is <= mtu */
if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
- skb_gso_network_seglen(skb) <= mtu)
+ skb_gso_validate_mtu(skb, mtu))
return ip_finish_output2(net, sk, skb);
/* Slowpath - GSO segment length is exceeding the dst MTU.
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d8f5e0a269f5..95649ebd2874 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -682,7 +682,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
df = tnl_params->frag_off;
- if (skb->protocol == htons(ETH_P_IP))
+ if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
df |= (inner_iph->frag_off&htons(IP_DF));
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 7e538f71f5fb..10d728b6804c 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -293,7 +293,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
*/
if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) ||
ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- memset(info, 0, sizeof(struct tcp_dctcp_info));
+ memset(&info->dctcp, 0, sizeof(info->dctcp));
if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) {
info->dctcp.dctcp_enabled = 1;
info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
@@ -303,7 +303,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
}
*attr = INET_DIAG_DCTCPINFO;
- return sizeof(*info);
+ return sizeof(info->dctcp);
}
return 0;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d6c8f4cd0800..94d4aff97523 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3115,6 +3115,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
long ca_rtt_us = -1L;
struct sk_buff *skb;
u32 pkts_acked = 0;
+ u32 last_in_flight = 0;
bool rtt_update;
int flag = 0;
@@ -3154,6 +3155,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (!first_ackt.v64)
first_ackt = last_ackt;
+ last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
reord = min(pkts_acked, reord);
if (!after(scb->end_seq, tp->high_seq))
flag |= FLAG_ORIG_SACK_ACKED;
@@ -3250,7 +3252,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (icsk->icsk_ca_ops->pkts_acked) {
struct ack_sample sample = { .pkts_acked = pkts_acked,
- .rtt_us = ca_rtt_us };
+ .rtt_us = ca_rtt_us,
+ .in_flight = last_in_flight };
icsk->icsk_ca_ops->pkts_acked(sk, &sample);
}
@@ -5159,6 +5162,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, int syn_inerr)
{
struct tcp_sock *tp = tcp_sk(sk);
+ bool rst_seq_match = false;
/* RFC1323: H1. Apply PAWS check first. */
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
@@ -5195,13 +5199,32 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
/* Step 2: check RST bit */
if (th->rst) {
- /* RFC 5961 3.2 :
- * If sequence number exactly matches RCV.NXT, then
+ /* RFC 5961 3.2 (extend to match against SACK too if available):
+ * If seq num matches RCV.NXT or the right-most SACK block,
+ * then
* RESET the connection
* else
* Send a challenge ACK
*/
- if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
+ if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+ rst_seq_match = true;
+ } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
+ struct tcp_sack_block *sp = &tp->selective_acks[0];
+ int max_sack = sp[0].end_seq;
+ int this_sack;
+
+ for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
+ ++this_sack) {
+ max_sack = after(sp[this_sack].end_seq,
+ max_sack) ?
+ sp[this_sack].end_seq : max_sack;
+ }
+
+ if (TCP_SKB_CB(skb)->seq == max_sack)
+ rst_seq_match = true;
+ }
+
+ if (rst_seq_match)
tcp_reset(sk);
else
tcp_send_challenge_ack(sk, skb);
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
new file mode 100644
index 000000000000..5de82a8d4d87
--- /dev/null
+++ b/net/ipv4/tcp_nv.c
@@ -0,0 +1,476 @@
+/*
+ * TCP NV: TCP with Congestion Avoidance
+ *
+ * TCP-NV is a successor of TCP-Vegas that has been developed to
+ * deal with the issues that occur in modern networks.
+ * Like TCP-Vegas, TCP-NV supports true congestion avoidance,
+ * the ability to detect congestion before packet losses occur.
+ * When congestion (queue buildup) starts to occur, TCP-NV
+ * predicts what the cwnd size should be for the current
+ * throughput and it reduces the cwnd proportionally to
+ * the difference between the current cwnd and the predicted cwnd.
+ *
+ * NV is only recommeneded for traffic within a data center, and when
+ * all the flows are NV (at least those within the data center). This
+ * is due to the inherent unfairness between flows using losses to
+ * detect congestion (congestion control) and those that use queue
+ * buildup to detect congestion (congestion avoidance).
+ *
+ * Note: High NIC coalescence values may lower the performance of NV
+ * due to the increased noise in RTT values. In particular, we have
+ * seen issues with rx-frames values greater than 8.
+ *
+ * TODO:
+ * 1) Add mechanism to deal with reverse congestion.
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/math64.h>
+#include <net/tcp.h>
+#include <linux/inet_diag.h>
+
+/* TCP NV parameters
+ *
+ * nv_pad Max number of queued packets allowed in network
+ * nv_pad_buffer Do not grow cwnd if this closed to nv_pad
+ * nv_reset_period How often (in) seconds)to reset min_rtt
+ * nv_min_cwnd Don't decrease cwnd below this if there are no losses
+ * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected
+ * nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8
+ * nv_rtt_factor RTT averaging factor
+ * nv_loss_dec_factor Decrease cwnd by this (50%) when losses occur
+ * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd
+ * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd
+ * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping
+ * slow-start due to congestion
+ * nv_stop_rtt_cnt Only grow cwnd for this many RTTs after non-congestion
+ * nv_rtt_min_cnt Wait these many RTTs before making congesion decision
+ * nv_cwnd_growth_rate_neg
+ * nv_cwnd_growth_rate_pos
+ * How quickly to double growth rate (not rate) of cwnd when not
+ * congested. One value (nv_cwnd_growth_rate_neg) for when
+ * rate < 1 pkt/RTT (after losses). The other (nv_cwnd_growth_rate_pos)
+ * otherwise.
+ */
+
+static int nv_pad __read_mostly = 10;
+static int nv_pad_buffer __read_mostly = 2;
+static int nv_reset_period __read_mostly = 5; /* in seconds */
+static int nv_min_cwnd __read_mostly = 2;
+static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */
+static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */
+static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */
+static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */
+static int nv_cwnd_growth_rate_neg __read_mostly = 8;
+static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */
+static int nv_dec_eval_min_calls __read_mostly = 60;
+static int nv_inc_eval_min_calls __read_mostly = 20;
+static int nv_ssthresh_eval_min_calls __read_mostly = 30;
+static int nv_stop_rtt_cnt __read_mostly = 10;
+static int nv_rtt_min_cnt __read_mostly = 2;
+
+module_param(nv_pad, int, 0644);
+MODULE_PARM_DESC(nv_pad, "max queued packets allowed in network");
+module_param(nv_reset_period, int, 0644);
+MODULE_PARM_DESC(nv_reset_period, "nv_min_rtt reset period (secs)");
+module_param(nv_min_cwnd, int, 0644);
+MODULE_PARM_DESC(nv_min_cwnd, "NV will not decrease cwnd below this value"
+ " without losses");
+
+/* TCP NV Parameters */
+struct tcpnv {
+ unsigned long nv_min_rtt_reset_jiffies; /* when to switch to
+ * nv_min_rtt_new */
+ s8 cwnd_growth_factor; /* Current cwnd growth factor,
+ * < 0 => less than 1 packet/RTT */
+ u8 available8;
+ u16 available16;
+ u32 loss_cwnd; /* cwnd at last loss */
+ u8 nv_allow_cwnd_growth:1, /* whether cwnd can grow */
+ nv_reset:1, /* whether to reset values */
+ nv_catchup:1; /* whether we are growing because
+ * of temporary cwnd decrease */
+ u8 nv_eval_call_cnt; /* call count since last eval */
+ u8 nv_min_cwnd; /* nv won't make a ca decision if cwnd is
+ * smaller than this. It may grow to handle
+ * TSO, LRO and interrupt coalescence because
+ * with these a small cwnd cannot saturate
+ * the link. Note that this is different from
+ * the file local nv_min_cwnd */
+ u8 nv_rtt_cnt; /* RTTs without making ca decision */;
+ u32 nv_last_rtt; /* last rtt */
+ u32 nv_min_rtt; /* active min rtt. Used to determine slope */
+ u32 nv_min_rtt_new; /* min rtt for future use */
+ u32 nv_rtt_max_rate; /* max rate seen during current RTT */
+ u32 nv_rtt_start_seq; /* current RTT ends when packet arrives
+ * acking beyond nv_rtt_start_seq */
+ u32 nv_last_snd_una; /* Previous value of tp->snd_una. It is
+ * used to determine bytes acked since last
+ * call to bictcp_acked */
+ u32 nv_no_cong_cnt; /* Consecutive no congestion decisions */
+};
+
+#define NV_INIT_RTT U32_MAX
+#define NV_MIN_CWND 4
+#define NV_MIN_CWND_GROW 2
+#define NV_TSO_CWND_BOUND 80
+
+static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ ca->nv_reset = 0;
+ ca->loss_cwnd = 0;
+ ca->nv_no_cong_cnt = 0;
+ ca->nv_rtt_cnt = 0;
+ ca->nv_last_rtt = 0;
+ ca->nv_rtt_max_rate = 0;
+ ca->nv_rtt_start_seq = tp->snd_una;
+ ca->nv_eval_call_cnt = 0;
+ ca->nv_last_snd_una = tp->snd_una;
+}
+
+static void tcpnv_init(struct sock *sk)
+{
+ struct tcpnv *ca = inet_csk_ca(sk);
+
+ tcpnv_reset(ca, sk);
+
+ ca->nv_allow_cwnd_growth = 1;
+ ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ;
+ ca->nv_min_rtt = NV_INIT_RTT;
+ ca->nv_min_rtt_new = NV_INIT_RTT;
+ ca->nv_min_cwnd = NV_MIN_CWND;
+ ca->nv_catchup = 0;
+ ca->cwnd_growth_factor = 0;
+}
+
+static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcpnv *ca = inet_csk_ca(sk);
+ u32 cnt;
+
+ if (!tcp_is_cwnd_limited(sk))
+ return;
+
+ /* Only grow cwnd if NV has not detected congestion */
+ if (!ca->nv_allow_cwnd_growth)
+ return;
+
+ if (tcp_in_slow_start(tp)) {
+ acked = tcp_slow_start(tp, acked);
+ if (!acked)
+ return;
+ }
+
+ if (ca->cwnd_growth_factor < 0) {
+ cnt = tp->snd_cwnd << -ca->cwnd_growth_factor;
+ tcp_cong_avoid_ai(tp, cnt, acked);
+ } else {
+ cnt = max(4U, tp->snd_cwnd >> ca->cwnd_growth_factor);
+ tcp_cong_avoid_ai(tp, cnt, acked);
+ }
+}
+
+static u32 tcpnv_recalc_ssthresh(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct tcpnv *ca = inet_csk_ca(sk);
+
+ ca->loss_cwnd = tp->snd_cwnd;
+ return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
+}
+
+static u32 tcpnv_undo_cwnd(struct sock *sk)
+{
+ struct tcpnv *ca = inet_csk_ca(sk);
+
+ return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
+static void tcpnv_state(struct sock *sk, u8 new_state)
+{
+ struct tcpnv *ca = inet_csk_ca(sk);
+
+ if (new_state == TCP_CA_Open && ca->nv_reset) {
+ tcpnv_reset(ca, sk);
+ } else if (new_state == TCP_CA_Loss || new_state == TCP_CA_CWR ||
+ new_state == TCP_CA_Recovery) {
+ ca->nv_reset = 1;
+ ca->nv_allow_cwnd_growth = 0;
+ if (new_state == TCP_CA_Loss) {
+ /* Reset cwnd growth factor to Reno value */
+ if (ca->cwnd_growth_factor > 0)
+ ca->cwnd_growth_factor = 0;
+ /* Decrease growth rate if allowed */
+ if (nv_cwnd_growth_rate_neg > 0 &&
+ ca->cwnd_growth_factor > -8)
+ ca->cwnd_growth_factor--;
+ }
+ }
+}
+
+/* Do congestion avoidance calculations for TCP-NV
+ */
+static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcpnv *ca = inet_csk_ca(sk);
+ unsigned long now = jiffies;
+ s64 rate64 = 0;
+ u32 rate, max_win, cwnd_by_slope;
+ u32 avg_rtt;
+ u32 bytes_acked = 0;
+
+ /* Some calls are for duplicates without timetamps */
+ if (sample->rtt_us < 0)
+ return;
+
+ /* If not in TCP_CA_Open or TCP_CA_Disorder states, skip. */
+ if (icsk->icsk_ca_state != TCP_CA_Open &&
+ icsk->icsk_ca_state != TCP_CA_Disorder)
+ return;
+
+ /* Stop cwnd growth if we were in catch up mode */
+ if (ca->nv_catchup && tp->snd_cwnd >= nv_min_cwnd) {
+ ca->nv_catchup = 0;
+ ca->nv_allow_cwnd_growth = 0;
+ }
+
+ bytes_acked = tp->snd_una - ca->nv_last_snd_una;
+ ca->nv_last_snd_una = tp->snd_una;
+
+ if (sample->in_flight == 0)
+ return;
+
+ /* Calculate moving average of RTT */
+ if (nv_rtt_factor > 0) {
+ if (ca->nv_last_rtt > 0) {
+ avg_rtt = (((u64)sample->rtt_us) * nv_rtt_factor +
+ ((u64)ca->nv_last_rtt)
+ * (256 - nv_rtt_factor)) >> 8;
+ } else {
+ avg_rtt = sample->rtt_us;
+ ca->nv_min_rtt = avg_rtt << 1;
+ }
+ ca->nv_last_rtt = avg_rtt;
+ } else {
+ avg_rtt = sample->rtt_us;
+ }
+
+ /* rate in 100's bits per second */
+ rate64 = ((u64)sample->in_flight) * 8000000;
+ rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100));
+
+ /* Remember the maximum rate seen during this RTT
+ * Note: It may be more than one RTT. This function should be
+ * called at least nv_dec_eval_min_calls times.
+ */
+ if (ca->nv_rtt_max_rate < rate)
+ ca->nv_rtt_max_rate = rate;
+
+ /* We have valid information, increment counter */
+ if (ca->nv_eval_call_cnt < 255)
+ ca->nv_eval_call_cnt++;
+
+ /* update min rtt if necessary */
+ if (avg_rtt < ca->nv_min_rtt)
+ ca->nv_min_rtt = avg_rtt;
+
+ /* update future min_rtt if necessary */
+ if (avg_rtt < ca->nv_min_rtt_new)
+ ca->nv_min_rtt_new = avg_rtt;
+
+ /* nv_min_rtt is updated with the minimum (possibley averaged) rtt
+ * seen in the last sysctl_tcp_nv_reset_period seconds (i.e. a
+ * warm reset). This new nv_min_rtt will be continued to be updated
+ * and be used for another sysctl_tcp_nv_reset_period seconds,
+ * when it will be updated again.
+ * In practice we introduce some randomness, so the actual period used
+ * is chosen randomly from the range:
+ * [sysctl_tcp_nv_reset_period*3/4, sysctl_tcp_nv_reset_period*5/4)
+ */
+ if (time_after_eq(now, ca->nv_min_rtt_reset_jiffies)) {
+ unsigned char rand;
+
+ ca->nv_min_rtt = ca->nv_min_rtt_new;
+ ca->nv_min_rtt_new = NV_INIT_RTT;
+ get_random_bytes(&rand, 1);
+ ca->nv_min_rtt_reset_jiffies =
+ now + ((nv_reset_period * (384 + rand) * HZ) >> 9);
+ /* Every so often we decrease ca->nv_min_cwnd in case previous
+ * value is no longer accurate.
+ */
+ ca->nv_min_cwnd = max(ca->nv_min_cwnd / 2, NV_MIN_CWND);
+ }
+
+ /* Once per RTT check if we need to do congestion avoidance */
+ if (before(ca->nv_rtt_start_seq, tp->snd_una)) {
+ ca->nv_rtt_start_seq = tp->snd_nxt;
+ if (ca->nv_rtt_cnt < 0xff)
+ /* Increase counter for RTTs without CA decision */
+ ca->nv_rtt_cnt++;
+
+ /* If this function is only called once within an RTT
+ * the cwnd is probably too small (in some cases due to
+ * tso, lro or interrupt coalescence), so we increase
+ * ca->nv_min_cwnd.
+ */
+ if (ca->nv_eval_call_cnt == 1 &&
+ bytes_acked >= (ca->nv_min_cwnd - 1) * tp->mss_cache &&
+ ca->nv_min_cwnd < (NV_TSO_CWND_BOUND + 1)) {
+ ca->nv_min_cwnd = min(ca->nv_min_cwnd
+ + NV_MIN_CWND_GROW,
+ NV_TSO_CWND_BOUND + 1);
+ ca->nv_rtt_start_seq = tp->snd_nxt +
+ ca->nv_min_cwnd * tp->mss_cache;
+ ca->nv_eval_call_cnt = 0;
+ ca->nv_allow_cwnd_growth = 1;
+ return;
+ }
+
+ /* Find the ideal cwnd for current rate from slope
+ * slope = 80000.0 * mss / nv_min_rtt
+ * cwnd_by_slope = nv_rtt_max_rate / slope
+ */
+ cwnd_by_slope = (u32)
+ div64_u64(((u64)ca->nv_rtt_max_rate) * ca->nv_min_rtt,
+ (u64)(80000 * tp->mss_cache));
+ max_win = cwnd_by_slope + nv_pad;
+
+ /* If cwnd > max_win, decrease cwnd
+ * if cwnd < max_win, grow cwnd
+ * else leave the same
+ */
+ if (tp->snd_cwnd > max_win) {
+ /* there is congestion, check that it is ok
+ * to make a CA decision
+ * 1. We should have at least nv_dec_eval_min_calls
+ * data points before making a CA decision
+ * 2. We only make a congesion decision after
+ * nv_rtt_min_cnt RTTs
+ */
+ if (ca->nv_rtt_cnt < nv_rtt_min_cnt) {
+ return;
+ } else if (tp->snd_ssthresh == TCP_INFINITE_SSTHRESH) {
+ if (ca->nv_eval_call_cnt <
+ nv_ssthresh_eval_min_calls)
+ return;
+ /* otherwise we will decrease cwnd */
+ } else if (ca->nv_eval_call_cnt <
+ nv_dec_eval_min_calls) {
+ if (ca->nv_allow_cwnd_growth &&
+ ca->nv_rtt_cnt > nv_stop_rtt_cnt)
+ ca->nv_allow_cwnd_growth = 0;
+ return;
+ }
+
+ /* We have enough data to determine we are congested */
+ ca->nv_allow_cwnd_growth = 0;
+ tp->snd_ssthresh =
+ (nv_ssthresh_factor * max_win) >> 3;
+ if (tp->snd_cwnd - max_win > 2) {
+ /* gap > 2, we do exponential cwnd decrease */
+ int dec;
+
+ dec = max(2U, ((tp->snd_cwnd - max_win) *
+ nv_cong_dec_mult) >> 7);
+ tp->snd_cwnd -= dec;
+ } else if (nv_cong_dec_mult > 0) {
+ tp->snd_cwnd = max_win;
+ }
+ if (ca->cwnd_growth_factor > 0)
+ ca->cwnd_growth_factor = 0;
+ ca->nv_no_cong_cnt = 0;
+ } else if (tp->snd_cwnd <= max_win - nv_pad_buffer) {
+ /* There is no congestion, grow cwnd if allowed*/
+ if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls)
+ return;
+
+ ca->nv_allow_cwnd_growth = 1;
+ ca->nv_no_cong_cnt++;
+ if (ca->cwnd_growth_factor < 0 &&
+ nv_cwnd_growth_rate_neg > 0 &&
+ ca->nv_no_cong_cnt > nv_cwnd_growth_rate_neg) {
+ ca->cwnd_growth_factor++;
+ ca->nv_no_cong_cnt = 0;
+ } else if (ca->cwnd_growth_factor >= 0 &&
+ nv_cwnd_growth_rate_pos > 0 &&
+ ca->nv_no_cong_cnt >
+ nv_cwnd_growth_rate_pos) {
+ ca->cwnd_growth_factor++;
+ ca->nv_no_cong_cnt = 0;
+ }
+ } else {
+ /* cwnd is in-between, so do nothing */
+ return;
+ }
+
+ /* update state */
+ ca->nv_eval_call_cnt = 0;
+ ca->nv_rtt_cnt = 0;
+ ca->nv_rtt_max_rate = 0;
+
+ /* Don't want to make cwnd < nv_min_cwnd
+ * (it wasn't before, if it is now is because nv
+ * decreased it).
+ */
+ if (tp->snd_cwnd < nv_min_cwnd)
+ tp->snd_cwnd = nv_min_cwnd;
+ }
+}
+
+/* Extract info for Tcp socket info provided via netlink */
+size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
+{
+ const struct tcpnv *ca = inet_csk_ca(sk);
+
+ if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+ info->vegas.tcpv_enabled = 1;
+ info->vegas.tcpv_rttcnt = ca->nv_rtt_cnt;
+ info->vegas.tcpv_rtt = ca->nv_last_rtt;
+ info->vegas.tcpv_minrtt = ca->nv_min_rtt;
+
+ *attr = INET_DIAG_VEGASINFO;
+ return sizeof(struct tcpvegas_info);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tcpnv_get_info);
+
+static struct tcp_congestion_ops tcpnv __read_mostly = {
+ .init = tcpnv_init,
+ .ssthresh = tcpnv_recalc_ssthresh,
+ .cong_avoid = tcpnv_cong_avoid,
+ .set_state = tcpnv_state,
+ .undo_cwnd = tcpnv_undo_cwnd,
+ .pkts_acked = tcpnv_acked,
+ .get_info = tcpnv_get_info,
+
+ .owner = THIS_MODULE,
+ .name = "nv",
+};
+
+static int __init tcpnv_register(void)
+{
+ BUILD_BUG_ON(sizeof(struct tcpnv) > ICSK_CA_PRIV_SIZE);
+
+ return tcp_register_congestion_control(&tcpnv);
+}
+
+static void __exit tcpnv_unregister(void)
+{
+ tcp_unregister_congestion_control(&tcpnv);
+}
+
+module_init(tcpnv_register);
+module_exit(tcpnv_unregister);
+
+MODULE_AUTHOR("Lawrence Brakmo");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TCP NV");
+MODULE_VERSION("1.0");
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e00e972c4e6a..b26aa870adc0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -911,9 +911,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
int err;
BUG_ON(!skb || !tcp_skb_pcount(skb));
+ tp = tcp_sk(sk);
if (clone_it) {
skb_mstamp_get(&skb->skb_mstamp);
+ TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
+ - tp->snd_una;
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
@@ -924,7 +927,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
}
inet = inet_sk(sk);
- tp = tcp_sk(sk);
tcb = TCP_SKB_CB(skb);
memset(&opts, 0, sizeof(opts));
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 47f12c73d959..58bd39fb14b4 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -76,6 +76,67 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
}
EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
+void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
+ unsigned short type)
+{
+ struct sock *sk = sock->sk;
+ struct udp_tunnel_info ti;
+
+ if (!dev->netdev_ops->ndo_udp_tunnel_add)
+ return;
+
+ ti.type = type;
+ ti.sa_family = sk->sk_family;
+ ti.port = inet_sk(sk)->inet_sport;
+
+ dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
+
+/* Notify netdevs that UDP port started listening */
+void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
+{
+ struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
+ struct udp_tunnel_info ti;
+ struct net_device *dev;
+
+ ti.type = type;
+ ti.sa_family = sk->sk_family;
+ ti.port = inet_sk(sk)->inet_sport;
+
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ if (!dev->netdev_ops->ndo_udp_tunnel_add)
+ continue;
+ dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port);
+
+/* Notify netdevs that UDP port is no more listening */
+void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type)
+{
+ struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
+ struct udp_tunnel_info ti;
+ struct net_device *dev;
+
+ ti.type = type;
+ ti.sa_family = sk->sk_family;
+ ti.port = inet_sk(sk)->inet_sport;
+
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ if (!dev->netdev_ops->ndo_udp_tunnel_del)
+ continue;
+ dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port);
+
void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
__be16 df, __be16 src_port, __be16 dst_port,
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 7b0edb37a115..b644a23c3db0 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -295,7 +295,7 @@ static struct ctl_table xfrm4_policy_table[] = {
{ }
};
-static int __net_init xfrm4_net_sysctl_init(struct net *net)
+static __net_init int xfrm4_net_sysctl_init(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
@@ -323,7 +323,7 @@ err_alloc:
return -ENOMEM;
}
-static void __net_exit xfrm4_net_sysctl_exit(struct net *net)
+static __net_exit void xfrm4_net_sysctl_exit(struct net *net)
{
struct ctl_table *table;
@@ -336,12 +336,12 @@ static void __net_exit xfrm4_net_sysctl_exit(struct net *net)
kfree(table);
}
#else /* CONFIG_SYSCTL */
-static int inline xfrm4_net_sysctl_init(struct net *net)
+static inline int xfrm4_net_sysctl_init(struct net *net)
{
return 0;
}
-static void inline xfrm4_net_sysctl_exit(struct net *net)
+static inline void xfrm4_net_sysctl_exit(struct net *net)
{
}
#endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 47f837a58e0a..a1f6b7b31531 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1524,6 +1524,28 @@ out:
return hiscore_idx;
}
+static int ipv6_get_saddr_master(struct net *net,
+ const struct net_device *dst_dev,
+ const struct net_device *master,
+ struct ipv6_saddr_dst *dst,
+ struct ipv6_saddr_score *scores,
+ int hiscore_idx)
+{
+ struct inet6_dev *idev;
+
+ idev = __in6_dev_get(dst_dev);
+ if (idev)
+ hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
+ scores, hiscore_idx);
+
+ idev = __in6_dev_get(master);
+ if (idev)
+ hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
+ scores, hiscore_idx);
+
+ return hiscore_idx;
+}
+
int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
const struct in6_addr *daddr, unsigned int prefs,
struct in6_addr *saddr)
@@ -1577,13 +1599,39 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
if (idev)
hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
} else {
+ const struct net_device *master;
+ int master_idx = 0;
+
+ /* if dst_dev exists and is enslaved to an L3 device, then
+ * prefer addresses from dst_dev and then the master over
+ * any other enslaved devices in the L3 domain.
+ */
+ master = l3mdev_master_dev_rcu(dst_dev);
+ if (master) {
+ master_idx = master->ifindex;
+
+ hiscore_idx = ipv6_get_saddr_master(net, dst_dev,
+ master, &dst,
+ scores, hiscore_idx);
+
+ if (scores[hiscore_idx].ifa)
+ goto out;
+ }
+
for_each_netdev_rcu(net, dev) {
+ /* only consider addresses on devices in the
+ * same L3 domain
+ */
+ if (l3mdev_master_ifindex_rcu(dev) != master_idx)
+ continue;
idev = __in6_dev_get(dev);
if (!idev)
continue;
hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
}
}
+
+out:
rcu_read_unlock();
hiscore = &scores[hiscore_idx];
@@ -2254,7 +2302,7 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
return ERR_PTR(-EACCES);
/* Add default multicast route */
- if (!(dev->flags & IFF_LOOPBACK))
+ if (!(dev->flags & IFF_LOOPBACK) && !netif_is_l3_master(dev))
addrconf_add_mroute(dev);
return idev;
@@ -2333,12 +2381,109 @@ static bool is_addr_mode_generate_stable(struct inet6_dev *idev)
idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM;
}
+int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ const struct in6_addr *addr, int addr_type,
+ u32 addr_flags, bool sllao, bool tokenized,
+ __u32 valid_lft, u32 prefered_lft)
+{
+ struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
+ int create = 0, update_lft = 0;
+
+ if (!ifp && valid_lft) {
+ int max_addresses = in6_dev->cnf.max_addresses;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (in6_dev->cnf.optimistic_dad &&
+ !net->ipv6.devconf_all->forwarding && sllao)
+ addr_flags |= IFA_F_OPTIMISTIC;
+#endif
+
+ /* Do not allow to create too much of autoconfigured
+ * addresses; this would be too easy way to crash kernel.
+ */
+ if (!max_addresses ||
+ ipv6_count_addresses(in6_dev) < max_addresses)
+ ifp = ipv6_add_addr(in6_dev, addr, NULL,
+ pinfo->prefix_len,
+ addr_type&IPV6_ADDR_SCOPE_MASK,
+ addr_flags, valid_lft,
+ prefered_lft);
+
+ if (IS_ERR_OR_NULL(ifp))
+ return -1;
+
+ update_lft = 0;
+ create = 1;
+ spin_lock_bh(&ifp->lock);
+ ifp->flags |= IFA_F_MANAGETEMPADDR;
+ ifp->cstamp = jiffies;
+ ifp->tokenized = tokenized;
+ spin_unlock_bh(&ifp->lock);
+ addrconf_dad_start(ifp);
+ }
+
+ if (ifp) {
+ u32 flags;
+ unsigned long now;
+ u32 stored_lft;
+
+ /* update lifetime (RFC2462 5.5.3 e) */
+ spin_lock_bh(&ifp->lock);
+ now = jiffies;
+ if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
+ stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
+ else
+ stored_lft = 0;
+ if (!update_lft && !create && stored_lft) {
+ const u32 minimum_lft = min_t(u32,
+ stored_lft, MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
+ }
+
+ if (update_lft) {
+ ifp->valid_lft = valid_lft;
+ ifp->prefered_lft = prefered_lft;
+ ifp->tstamp = now;
+ flags = ifp->flags;
+ ifp->flags &= ~IFA_F_DEPRECATED;
+ spin_unlock_bh(&ifp->lock);
+
+ if (!(flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ifp);
+ } else
+ spin_unlock_bh(&ifp->lock);
+
+ manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
+ create, now);
+
+ in6_ifa_put(ifp);
+ addrconf_verify();
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
+
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
__u32 valid_lft;
__u32 prefered_lft;
- int addr_type;
+ int addr_type, err;
u32 addr_flags = 0;
struct inet6_dev *in6_dev;
struct net *net = dev_net(dev);
@@ -2432,10 +2577,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
/* Try to figure out our local address for this prefix */
if (pinfo->autoconf && in6_dev->cnf.autoconf) {
- struct inet6_ifaddr *ifp;
struct in6_addr addr;
- int create = 0, update_lft = 0;
- bool tokenized = false;
+ bool tokenized = false, dev_addr_generated = false;
if (pinfo->prefix_len == 64) {
memcpy(&addr, &pinfo->prefix, 8);
@@ -2453,106 +2596,36 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
goto ok;
} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
- in6_dev_put(in6_dev);
- return;
+ goto put;
+ } else {
+ dev_addr_generated = true;
}
goto ok;
}
net_dbg_ratelimited("IPv6 addrconf: prefix with wrong length %d\n",
pinfo->prefix_len);
- in6_dev_put(in6_dev);
- return;
+ goto put;
ok:
+ err = addrconf_prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+ &addr, addr_type,
+ addr_flags, sllao,
+ tokenized, valid_lft,
+ prefered_lft);
+ if (err)
+ goto put;
- ifp = ipv6_get_ifaddr(net, &addr, dev, 1);
-
- if (!ifp && valid_lft) {
- int max_addresses = in6_dev->cnf.max_addresses;
-
-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if (in6_dev->cnf.optimistic_dad &&
- !net->ipv6.devconf_all->forwarding && sllao)
- addr_flags |= IFA_F_OPTIMISTIC;
-#endif
-
- /* Do not allow to create too much of autoconfigured
- * addresses; this would be too easy way to crash kernel.
- */
- if (!max_addresses ||
- ipv6_count_addresses(in6_dev) < max_addresses)
- ifp = ipv6_add_addr(in6_dev, &addr, NULL,
- pinfo->prefix_len,
- addr_type&IPV6_ADDR_SCOPE_MASK,
- addr_flags, valid_lft,
- prefered_lft);
-
- if (IS_ERR_OR_NULL(ifp)) {
- in6_dev_put(in6_dev);
- return;
- }
-
- update_lft = 0;
- create = 1;
- spin_lock_bh(&ifp->lock);
- ifp->flags |= IFA_F_MANAGETEMPADDR;
- ifp->cstamp = jiffies;
- ifp->tokenized = tokenized;
- spin_unlock_bh(&ifp->lock);
- addrconf_dad_start(ifp);
- }
-
- if (ifp) {
- u32 flags;
- unsigned long now;
- u32 stored_lft;
-
- /* update lifetime (RFC2462 5.5.3 e) */
- spin_lock_bh(&ifp->lock);
- now = jiffies;
- if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
- stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
- else
- stored_lft = 0;
- if (!update_lft && !create && stored_lft) {
- const u32 minimum_lft = min_t(u32,
- stored_lft, MIN_VALID_LIFETIME);
- valid_lft = max(valid_lft, minimum_lft);
-
- /* RFC4862 Section 5.5.3e:
- * "Note that the preferred lifetime of the
- * corresponding address is always reset to
- * the Preferred Lifetime in the received
- * Prefix Information option, regardless of
- * whether the valid lifetime is also reset or
- * ignored."
- *
- * So we should always update prefered_lft here.
- */
- update_lft = 1;
- }
-
- if (update_lft) {
- ifp->valid_lft = valid_lft;
- ifp->prefered_lft = prefered_lft;
- ifp->tstamp = now;
- flags = ifp->flags;
- ifp->flags &= ~IFA_F_DEPRECATED;
- spin_unlock_bh(&ifp->lock);
-
- if (!(flags&IFA_F_TENTATIVE))
- ipv6_ifa_notify(0, ifp);
- } else
- spin_unlock_bh(&ifp->lock);
-
- manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
- create, now);
-
- in6_ifa_put(ifp);
- addrconf_verify();
- }
+ /* Ignore error case here because previous prefix add addr was
+ * successful which will be notified.
+ */
+ ndisc_ops_prefix_rcv_add_addr(net, dev, pinfo, in6_dev, &addr,
+ addr_type, addr_flags, sllao,
+ tokenized, valid_lft,
+ prefered_lft,
+ dev_addr_generated);
}
inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
+put:
in6_dev_put(in6_dev);
}
@@ -2947,8 +3020,8 @@ static void init_loopback(struct net_device *dev)
}
}
-static void addrconf_add_linklocal(struct inet6_dev *idev,
- const struct in6_addr *addr, u32 flags)
+void addrconf_add_linklocal(struct inet6_dev *idev,
+ const struct in6_addr *addr, u32 flags)
{
struct inet6_ifaddr *ifp;
u32 addr_flags = flags | IFA_F_PERMANENT;
@@ -2967,6 +3040,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev,
in6_ifa_put(ifp);
}
}
+EXPORT_SYMBOL_GPL(addrconf_add_linklocal);
static bool ipv6_reserved_interfaceid(struct in6_addr address)
{
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index bfa86f040c16..2076c21107d0 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -92,6 +92,12 @@ MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
+bool ipv6_mod_enabled(void)
+{
+ return disable_ipv6_mod == 0;
+}
+EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
+
static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ed33abf57abd..5857c1fc8b67 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -67,6 +67,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
struct net *net = rule->fr_net;
pol_lookup_t lookup = arg->lookup_ptr;
int err = 0;
+ u32 tb_id;
switch (rule->action) {
case FR_ACT_TO_TBL:
@@ -86,7 +87,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto discard_pkt;
}
- table = fib6_get_table(net, rule->table);
+ tb_id = fib_rule_get_table(rule, arg);
+ table = fib6_get_table(net, tb_id);
if (!table) {
err = -EAGAIN;
goto out;
@@ -199,7 +201,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
- if (rule->action == FR_ACT_TO_TBL) {
+ if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC)
goto errout;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index a4fa84076969..bd59c343d35f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -388,7 +388,8 @@ relookup_failed:
/*
* Send an ICMP message in response to a packet in error
*/
-static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+ const struct in6_addr *force_saddr)
{
struct net *net = dev_net(skb->dev);
struct inet6_dev *idev = NULL;
@@ -475,6 +476,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.daddr = hdr->saddr;
+ if (force_saddr)
+ saddr = force_saddr;
if (saddr)
fl6.saddr = *saddr;
fl6.flowi6_mark = mark;
@@ -502,12 +505,14 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
+ ipc6.tclass = np->tclass;
+ fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
goto out;
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- ipc6.tclass = np->tclass;
ipc6.dontfrag = np->dontfrag;
ipc6.opt = NULL;
@@ -549,10 +554,75 @@ out:
*/
void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
- icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
+ icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
kfree_skb(skb);
}
+/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
+ * if sufficient data bytes are available
+ * @nhs is the size of the tunnel header(s) :
+ * Either an IPv4 header for SIT encap
+ * an IPv4 header + GRE header for GRE encap
+ */
+int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
+ unsigned int data_len)
+{
+ struct in6_addr temp_saddr;
+ struct rt6_info *rt;
+ struct sk_buff *skb2;
+ u32 info = 0;
+
+ if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
+ return 1;
+
+ /* RFC 4884 (partial) support for ICMP extensions */
+ if (data_len < 128 || (data_len & 7) || skb->len < data_len)
+ data_len = 0;
+
+ skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
+
+ if (!skb2)
+ return 1;
+
+ skb_dst_drop(skb2);
+ skb_pull(skb2, nhs);
+ skb_reset_network_header(skb2);
+
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+
+ if (rt && rt->dst.dev)
+ skb2->dev = rt->dst.dev;
+
+ ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
+
+ if (data_len) {
+ /* RFC 4884 (partial) support :
+ * insert 0 padding at the end, before the extensions
+ */
+ __skb_push(skb2, nhs);
+ skb_reset_network_header(skb2);
+ memmove(skb2->data, skb2->data + nhs, data_len - nhs);
+ memset(skb2->data + data_len - nhs, 0, nhs);
+ /* RFC 4884 4.5 : Length is measured in 64-bit words,
+ * and stored in reserved[0]
+ */
+ info = (data_len/8) << 24;
+ }
+ if (type == ICMP_TIME_EXCEEDED)
+ icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
+ info, &temp_saddr);
+ else
+ icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
+ info, &temp_saddr);
+ if (rt)
+ ip6_rt_put(rt);
+
+ kfree_skb(skb2);
+
+ return 0;
+}
+EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
+
static void icmpv6_echo_reply(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
@@ -585,7 +655,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
fl6.saddr = *saddr;
- fl6.flowi6_oif = l3mdev_fib_oif(skb->dev);
+ fl6.flowi6_oif = skb->dev->ifindex;
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index d08fd2d48a78..e0170f62bc39 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -109,7 +109,8 @@ static inline bool ila_csum_neutral_set(struct ila_identifier ident)
return !!(ident.csum_neutral);
}
-void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
+ bool set_csum_neutral);
void ila_init_saved_csum(struct ila_params *p);
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 0e94042d1289..ec9efbcdad35 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -34,12 +34,12 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr,
if (p->locator_match.v64) {
diff = p->csum_diff;
} else {
- diff = compute_csum_diff8((__be32 *)iaddr,
- (__be32 *)&p->locator);
+ diff = compute_csum_diff8((__be32 *)&p->locator,
+ (__be32 *)iaddr);
}
fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
- ~CSUM_NEUTRAL_FLAG : CSUM_NEUTRAL_FLAG);
+ CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
diff = csum_add(diff, fval);
@@ -103,7 +103,8 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
iaddr->loc = p->locator;
}
-void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
+ bool set_csum_neutral)
{
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
@@ -114,7 +115,8 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
* is a locator being translated to a SIR address.
* Perform (receiver) checksum-neutral translation.
*/
- ila_csum_do_neutral(iaddr, p);
+ if (!set_csum_neutral)
+ ila_csum_do_neutral(iaddr, p);
} else {
switch (p->csum_mode) {
case ILA_CSUM_ADJUST_TRANSPORT:
@@ -138,8 +140,8 @@ void ila_init_saved_csum(struct ila_params *p)
return;
p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator_match,
- (__be32 *)&p->locator);
+ (__be32 *)&p->locator,
+ (__be32 *)&p->locator_match);
}
static int __init ila_init(void)
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 1dfb64166d7d..c8314c6b6154 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -26,7 +26,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), true);
return dst->lwtstate->orig_output(net, sk, skb);
@@ -42,7 +42,7 @@ static int ila_input(struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false);
return dst->lwtstate->orig_input(skb);
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index a90e57229c6c..e6eca5fdf4c9 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -210,14 +210,14 @@ static void ila_free_cb(void *ptr, void *arg)
}
}
-static int ila_xlat_addr(struct sk_buff *skb);
+static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral);
static unsigned int
ila_nf_input(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- ila_xlat_addr(skb);
+ ila_xlat_addr(skb, false);
return NF_ACCEPT;
}
@@ -597,7 +597,7 @@ static struct pernet_operations ila_net_ops = {
.size = sizeof(struct ila_net),
};
-static int ila_xlat_addr(struct sk_buff *skb)
+static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
{
struct ila_map *ila;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -616,7 +616,7 @@ static int ila_xlat_addr(struct sk_buff *skb)
ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
if (ila)
- ila_update_ipv6_locator(skb, &ila->xp.ip);
+ ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral);
rcu_read_unlock();
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 14dacc544c3e..713676f14a0e 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -39,7 +39,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
if (!send)
goto out;
- send(skb, type, code, info);
+ send(skb, type, code, info, NULL);
out:
rcu_read_unlock();
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 94611e450ec9..aacfb4bce153 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -323,6 +323,7 @@ int ip6_input(struct sk_buff *skb)
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
ip6_input_finish);
}
+EXPORT_SYMBOL_GPL(ip6_input);
int ip6_mc_input(struct sk_buff *skb)
{
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 635b8d340cdb..1dfc402d9ad1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -368,7 +368,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
if (skb->ignore_df)
return false;
- if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;
return true;
@@ -910,6 +910,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
int err;
int flags = 0;
+ if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif &&
+ (!*dst || !(*dst)->error)) {
+ err = l3mdev_get_saddr6(net, sk, fl6);
+ if (err)
+ goto out_err;
+ }
+
/* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however,
* the route-specific preferred source forces the
@@ -999,10 +1006,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
return 0;
out_err_release:
- if (err == -ENETUNREACH)
- IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
dst_release(*dst);
*dst = NULL;
+out_err:
+ if (err == -ENETUNREACH)
+ IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
return err;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c245895a3d41..fe65cdc28a45 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -73,15 +73,6 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
-/* Set to 3 to get tracing... */
-#define ND_DEBUG 1
-
-#define ND_PRINTK(val, level, fmt, ...) \
-do { \
- if (val <= ND_DEBUG) \
- net_##level##_ratelimited(fmt, ##__VA_ARGS__); \
-} while (0)
-
static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
__u32 *hash_rnd);
@@ -150,11 +141,10 @@ struct neigh_table nd_tbl = {
};
EXPORT_SYMBOL_GPL(nd_tbl);
-static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
+void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
+ int data_len, int pad)
{
- int pad = ndisc_addr_option_pad(skb->dev->type);
- int data_len = skb->dev->addr_len;
- int space = ndisc_opt_addr_space(skb->dev);
+ int space = __ndisc_opt_addr_space(data_len, pad);
u8 *opt = skb_put(skb, space);
opt[0] = type;
@@ -171,6 +161,23 @@ static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
if (space > 0)
memset(opt, 0, space);
}
+EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);
+
+static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
+ void *data, u8 icmp6_type)
+{
+ __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
+ ndisc_addr_option_pad(skb->dev->type));
+ ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type);
+}
+
+static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb,
+ void *ha,
+ const u8 *ops_data)
+{
+ ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT);
+ ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data);
+}
static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
struct nd_opt_hdr *end)
@@ -185,24 +192,28 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
return cur <= end && cur->nd_opt_type == type ? cur : NULL;
}
-static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
+static inline int ndisc_is_useropt(const struct net_device *dev,
+ struct nd_opt_hdr *opt)
{
return opt->nd_opt_type == ND_OPT_RDNSS ||
- opt->nd_opt_type == ND_OPT_DNSSL;
+ opt->nd_opt_type == ND_OPT_DNSSL ||
+ ndisc_ops_is_useropt(dev, opt->nd_opt_type);
}
-static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
+static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
+ struct nd_opt_hdr *cur,
struct nd_opt_hdr *end)
{
if (!cur || !end || cur >= end)
return NULL;
do {
cur = ((void *)cur) + (cur->nd_opt_len << 3);
- } while (cur < end && !ndisc_is_useropt(cur));
- return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
+ } while (cur < end && !ndisc_is_useropt(dev, cur));
+ return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL;
}
-struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
+ u8 *opt, int opt_len,
struct ndisc_options *ndopts)
{
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
@@ -217,6 +228,8 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
l = nd_opt->nd_opt_len << 3;
if (opt_len < l || l == 0)
return NULL;
+ if (ndisc_ops_parse_options(dev, nd_opt, ndopts))
+ goto next_opt;
switch (nd_opt->nd_opt_type) {
case ND_OPT_SOURCE_LL_ADDR:
case ND_OPT_TARGET_LL_ADDR:
@@ -243,7 +256,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
break;
#endif
default:
- if (ndisc_is_useropt(nd_opt)) {
+ if (ndisc_is_useropt(dev, nd_opt)) {
ndopts->nd_useropts_end = nd_opt;
if (!ndopts->nd_useropts)
ndopts->nd_useropts = nd_opt;
@@ -260,6 +273,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
nd_opt->nd_opt_len);
}
}
+next_opt:
opt_len -= l;
nd_opt = ((void *)nd_opt) + l;
}
@@ -509,7 +523,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
if (!dev->addr_len)
inc_opt = 0;
if (inc_opt)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev,
+ NDISC_NEIGHBOUR_ADVERTISEMENT);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -528,8 +543,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
if (inc_opt)
ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
- dev->dev_addr);
-
+ dev->dev_addr,
+ NDISC_NEIGHBOUR_ADVERTISEMENT);
ndisc_send_skb(skb, daddr, src_addr);
}
@@ -574,7 +589,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
if (ipv6_addr_any(saddr))
inc_opt = false;
if (inc_opt)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev,
+ NDISC_NEIGHBOUR_SOLICITATION);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -590,7 +606,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
if (inc_opt)
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
- dev->dev_addr);
+ dev->dev_addr,
+ NDISC_NEIGHBOUR_SOLICITATION);
ndisc_send_skb(skb, daddr, saddr);
}
@@ -626,7 +643,7 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
}
#endif
if (send_sllao)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -641,7 +658,8 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
if (send_sllao)
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
- dev->dev_addr);
+ dev->dev_addr,
+ NDISC_ROUTER_SOLICITATION);
ndisc_send_skb(skb, daddr, saddr);
}
@@ -702,6 +720,15 @@ static int pndisc_is_router(const void *pkey,
return ret;
}
+void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
+ const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
+ struct ndisc_options *ndopts)
+{
+ neigh_update(neigh, lladdr, new, flags);
+ /* report ndisc ops about neighbour update */
+ ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
+}
+
static void ndisc_recv_ns(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
@@ -738,7 +765,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
return;
}
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND options\n");
return;
}
@@ -856,9 +883,10 @@ have_ifp:
neigh = __neigh_lookup(&nd_tbl, saddr, dev,
!inc || lladdr || !dev->addr_len);
if (neigh)
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
- NEIGH_UPDATE_F_OVERRIDE);
+ NEIGH_UPDATE_F_OVERRIDE,
+ NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
if (neigh || !dev->header_ops) {
ndisc_send_na(dev, saddr, &msg->target, !!is_router,
true, (ifp != NULL && inc), inc);
@@ -911,7 +939,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
idev->cnf.drop_unsolicited_na)
return;
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND option\n");
return;
}
@@ -967,12 +995,13 @@ static void ndisc_recv_na(struct sk_buff *skb)
goto out;
}
- neigh_update(neigh, lladdr,
+ ndisc_update(dev, neigh, lladdr,
msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
(msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
+ (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0),
+ NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts);
if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
/*
@@ -1017,7 +1046,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
goto out;
/* Parse ND options */
- if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n");
goto out;
}
@@ -1031,10 +1060,11 @@ static void ndisc_recv_rs(struct sk_buff *skb)
neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
if (neigh) {
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
- NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
+ NDISC_ROUTER_SOLICITATION, &ndopts);
neigh_release(neigh);
}
out:
@@ -1135,7 +1165,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- if (!ndisc_parse_options(opt, optlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) {
ND_PRINTK(2, warn, "RA: invalid ND options\n");
return;
}
@@ -1329,11 +1359,12 @@ skip_linkparms:
goto out;
}
}
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- NEIGH_UPDATE_F_ISROUTER);
+ NEIGH_UPDATE_F_ISROUTER,
+ NDISC_ROUTER_ADVERTISEMENT, &ndopts);
}
if (!ipv6_accept_ra(in6_dev)) {
@@ -1421,7 +1452,8 @@ skip_routeinfo:
struct nd_opt_hdr *p;
for (p = ndopts.nd_useropts;
p;
- p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
+ p = ndisc_next_useropt(skb->dev, p,
+ ndopts.nd_useropts_end)) {
ndisc_ra_useropt(skb, p);
}
}
@@ -1459,7 +1491,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
}
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
+ if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
return;
if (!ndopts.nd_opts_rh) {
@@ -1504,7 +1536,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
struct dst_entry *dst;
struct flowi6 fl6;
int rd_len;
- u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+ u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
+ ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
int oif = l3mdev_fib_oif(dev);
bool ret;
@@ -1563,7 +1596,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
memcpy(ha_buf, neigh->ha, dev->addr_len);
read_unlock_bh(&neigh->lock);
ha = ha_buf;
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_redirect_opt_addr_space(dev, neigh,
+ ops_data_buf,
+ &ops_data);
} else
read_unlock_bh(&neigh->lock);
@@ -1594,7 +1629,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
*/
if (ha)
- ndisc_fill_addr_option(buff, ND_OPT_TARGET_LL_ADDR, ha);
+ ndisc_fill_redirect_addr_option(buff, ha, ops_data);
/*
* build redirect option and copy skb over to the new packet.
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 3ee3e444a66b..fed40d1ec29b 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -116,6 +116,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
+ ipc6.tclass = np->tclass;
+ fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr);
if (IS_ERR(dst))
return PTR_ERR(dst);
@@ -140,7 +143,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
pfh.family = AF_INET6;
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- ipc6.tclass = np->tclass;
ipc6.dontfrag = np->dontfrag;
ipc6.opt = NULL;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 896350df6423..590dd1f7746f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -878,6 +878,11 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (inet->hdrincl)
fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
+ if (ipc6.tclass < 0)
+ ipc6.tclass = np->tclass;
+
+ fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
@@ -886,9 +891,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (ipc6.hlimit < 0)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (ipc6.tclass < 0)
- ipc6.tclass = np->tclass;
-
if (ipc6.dontfrag < 0)
ipc6.dontfrag = np->dontfrag;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 520b7884d0c2..49817555449e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1042,8 +1042,8 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
return pcpu_rt;
}
-static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
- struct flowi6 *fl6, int flags)
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6, int flags)
{
struct fib6_node *fn, *saved_fn;
struct rt6_info *rt;
@@ -1139,6 +1139,7 @@ redo_rt6_select:
}
}
+EXPORT_SYMBOL_GPL(ip6_pol_route);
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
struct flowi6 *fl6, int flags)
@@ -2200,7 +2201,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
* first-hop router for the specified ICMP Destination Address.
*/
- if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
return;
}
@@ -2235,12 +2236,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
* We have finally decided to accept it.
*/
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
(on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- NEIGH_UPDATE_F_ISROUTER))
- );
+ NEIGH_UPDATE_F_ISROUTER)),
+ NDISC_REDIRECT, &ndopts);
nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
if (!nrt)
@@ -2585,23 +2586,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
return rt;
}
-int ip6_route_get_saddr(struct net *net,
- struct rt6_info *rt,
- const struct in6_addr *daddr,
- unsigned int prefs,
- struct in6_addr *saddr)
-{
- struct inet6_dev *idev =
- rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
- int err = 0;
- if (rt && rt->rt6i_prefsrc.plen)
- *saddr = rt->rt6i_prefsrc.addr;
- else
- err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
- daddr, prefs, saddr);
- return err;
-}
-
/* remove deleted ip from prefsrc entries */
struct arg_dev_net_ip {
struct net_device *dev;
@@ -3306,6 +3290,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
err = -EINVAL;
memset(&fl6, 0, sizeof(fl6));
+ rtm = nlmsg_data(nlh);
+ fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
if (tb[RTA_SRC]) {
if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0619ac70836d..917a5cd4b8fc 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -479,47 +479,12 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
dev_put(dev);
}
-/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
- * if sufficient data bytes are available
- */
-static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb)
-{
- int ihl = ((const struct iphdr *)skb->data)->ihl*4;
- struct rt6_info *rt;
- struct sk_buff *skb2;
-
- if (!pskb_may_pull(skb, ihl + sizeof(struct ipv6hdr) + 8))
- return 1;
-
- skb2 = skb_clone(skb, GFP_ATOMIC);
-
- if (!skb2)
- return 1;
-
- skb_dst_drop(skb2);
- skb_pull(skb2, ihl);
- skb_reset_network_header(skb2);
-
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
-
- if (rt && rt->dst.dev)
- skb2->dev = rt->dst.dev;
-
- icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
-
- if (rt)
- ip6_rt_put(rt);
-
- kfree_skb(skb2);
-
- return 0;
-}
-
static int ipip6_err(struct sk_buff *skb, u32 info)
{
const struct iphdr *iph = (const struct iphdr *)skb->data;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
+ unsigned int data_len = 0;
struct ip_tunnel *t;
int err;
@@ -544,6 +509,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
return 0;
+ data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
break;
case ICMP_REDIRECT:
break;
@@ -571,11 +537,11 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
goto out;
}
- if (t->parms.iph.daddr == 0)
+ err = 0;
+ if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
goto out;
- err = 0;
- if (!ipip6_err_gen_icmpv6_unreach(skb))
+ if (t->parms.iph.daddr == 0)
goto out;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
@@ -825,9 +791,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
u8 protocol = IPPROTO_IPV6;
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- if (skb->protocol != htons(ETH_P_IPV6))
- goto tx_error;
-
if (tos == 1)
tos = ipv6_get_dsfield(iph6);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 005dc82c2138..0a71a312d0d8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1207,6 +1207,11 @@ do_udp_sendmsg:
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ if (ipc6.tclass < 0)
+ ipc6.tclass = np->tclass;
+
+ fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
@@ -1217,9 +1222,6 @@ do_udp_sendmsg:
if (ipc6.hlimit < 0)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (ipc6.tclass < 0)
- ipc6.tclass = np->tclass;
-
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
back_from_confirm:
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index c074771a10f7..6cc97003e4a9 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -366,12 +366,12 @@ static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
kfree(table);
}
#else /* CONFIG_SYSCTL */
-static int inline xfrm6_net_sysctl_init(struct net *net)
+static inline int xfrm6_net_sysctl_init(struct net *net)
{
return 0;
}
-static void inline xfrm6_net_sysctl_exit(struct net *net)
+static inline void xfrm6_net_sysctl_exit(struct net *net)
{
}
#endif
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index fc3598a922b0..37d674e6f8a9 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1033,6 +1033,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
{
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
+ size_t headroom, linear;
struct sk_buff *skb;
struct iucv_message txmsg = {0};
struct cmsghdr *cmsg;
@@ -1110,20 +1111,31 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
* this is fine for SOCK_SEQPACKET (unless we want to support
* segmented records using the MSG_EOR flag), but
* for SOCK_STREAM we might want to improve it in future */
- if (iucv->transport == AF_IUCV_TRANS_HIPER)
- skb = sock_alloc_send_skb(sk,
- len + sizeof(struct af_iucv_trans_hdr) + ETH_HLEN,
- noblock, &err);
- else
- skb = sock_alloc_send_skb(sk, len, noblock, &err);
+ headroom = (iucv->transport == AF_IUCV_TRANS_HIPER)
+ ? sizeof(struct af_iucv_trans_hdr) + ETH_HLEN : 0;
+ if (headroom + len < PAGE_SIZE) {
+ linear = len;
+ } else {
+ /* In nonlinear "classic" iucv skb,
+ * reserve space for iucv_array
+ */
+ if (iucv->transport != AF_IUCV_TRANS_HIPER)
+ headroom += sizeof(struct iucv_array) *
+ (MAX_SKB_FRAGS + 1);
+ linear = PAGE_SIZE - headroom;
+ }
+ skb = sock_alloc_send_pskb(sk, headroom + linear, len - linear,
+ noblock, &err, 0);
if (!skb)
goto out;
- if (iucv->transport == AF_IUCV_TRANS_HIPER)
- skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN);
- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
- err = -EFAULT;
+ if (headroom)
+ skb_reserve(skb, headroom);
+ skb_put(skb, linear);
+ skb->len = len;
+ skb->data_len = len - linear;
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
+ if (err)
goto fail;
- }
/* wait if outstanding messages for iucv path has reached */
timeo = sock_sndtimeo(sk, noblock);
@@ -1148,49 +1160,67 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
atomic_dec(&iucv->msg_sent);
goto fail;
}
- goto release;
- }
- skb_queue_tail(&iucv->send_skb_q, skb);
-
- if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags)
- && skb->len <= 7) {
- err = iucv_send_iprm(iucv->path, &txmsg, skb);
+ } else { /* Classic VM IUCV transport */
+ skb_queue_tail(&iucv->send_skb_q, skb);
+
+ if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) &&
+ skb->len <= 7) {
+ err = iucv_send_iprm(iucv->path, &txmsg, skb);
+
+ /* on success: there is no message_complete callback */
+ /* for an IPRMDATA msg; remove skb from send queue */
+ if (err == 0) {
+ skb_unlink(skb, &iucv->send_skb_q);
+ kfree_skb(skb);
+ }
- /* on success: there is no message_complete callback
- * for an IPRMDATA msg; remove skb from send queue */
- if (err == 0) {
- skb_unlink(skb, &iucv->send_skb_q);
- kfree_skb(skb);
+ /* this error should never happen since the */
+ /* IUCV_IPRMDATA path flag is set... sever path */
+ if (err == 0x15) {
+ pr_iucv->path_sever(iucv->path, NULL);
+ skb_unlink(skb, &iucv->send_skb_q);
+ err = -EPIPE;
+ goto fail;
+ }
+ } else if (skb_is_nonlinear(skb)) {
+ struct iucv_array *iba = (struct iucv_array *)skb->head;
+ int i;
+
+ /* skip iucv_array lying in the headroom */
+ iba[0].address = (u32)(addr_t)skb->data;
+ iba[0].length = (u32)skb_headlen(skb);
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ iba[i + 1].address =
+ (u32)(addr_t)skb_frag_address(frag);
+ iba[i + 1].length = (u32)skb_frag_size(frag);
+ }
+ err = pr_iucv->message_send(iucv->path, &txmsg,
+ IUCV_IPBUFLST, 0,
+ (void *)iba, skb->len);
+ } else { /* non-IPRM Linear skb */
+ err = pr_iucv->message_send(iucv->path, &txmsg,
+ 0, 0, (void *)skb->data, skb->len);
}
-
- /* this error should never happen since the
- * IUCV_IPRMDATA path flag is set... sever path */
- if (err == 0x15) {
- pr_iucv->path_sever(iucv->path, NULL);
+ if (err) {
+ if (err == 3) {
+ user_id[8] = 0;
+ memcpy(user_id, iucv->dst_user_id, 8);
+ appl_id[8] = 0;
+ memcpy(appl_id, iucv->dst_name, 8);
+ pr_err(
+ "Application %s on z/VM guest %s exceeds message limit\n",
+ appl_id, user_id);
+ err = -EAGAIN;
+ } else {
+ err = -EPIPE;
+ }
skb_unlink(skb, &iucv->send_skb_q);
- err = -EPIPE;
goto fail;
}
- } else
- err = pr_iucv->message_send(iucv->path, &txmsg, 0, 0,
- (void *) skb->data, skb->len);
- if (err) {
- if (err == 3) {
- user_id[8] = 0;
- memcpy(user_id, iucv->dst_user_id, 8);
- appl_id[8] = 0;
- memcpy(appl_id, iucv->dst_name, 8);
- pr_err("Application %s on z/VM guest %s"
- " exceeds message limit\n",
- appl_id, user_id);
- err = -EAGAIN;
- } else
- err = -EPIPE;
- skb_unlink(skb, &iucv->send_skb_q);
- goto fail;
}
-release:
release_sock(sk);
return len;
@@ -1201,42 +1231,32 @@ out:
return err;
}
-/* iucv_fragment_skb() - Fragment a single IUCV message into multiple skb's
- *
- * Locking: must be called with message_q.lock held
- */
-static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len)
+static struct sk_buff *alloc_iucv_recv_skb(unsigned long len)
{
- int dataleft, size, copied = 0;
- struct sk_buff *nskb;
-
- dataleft = len;
- while (dataleft) {
- if (dataleft >= sk->sk_rcvbuf / 4)
- size = sk->sk_rcvbuf / 4;
- else
- size = dataleft;
-
- nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA);
- if (!nskb)
- return -ENOMEM;
-
- /* copy target class to control buffer of new skb */
- IUCV_SKB_CB(nskb)->class = IUCV_SKB_CB(skb)->class;
-
- /* copy data fragment */
- memcpy(nskb->data, skb->data + copied, size);
- copied += size;
- dataleft -= size;
-
- skb_reset_transport_header(nskb);
- skb_reset_network_header(nskb);
- nskb->len = size;
+ size_t headroom, linear;
+ struct sk_buff *skb;
+ int err;
- skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, nskb);
+ if (len < PAGE_SIZE) {
+ headroom = 0;
+ linear = len;
+ } else {
+ headroom = sizeof(struct iucv_array) * (MAX_SKB_FRAGS + 1);
+ linear = PAGE_SIZE - headroom;
+ }
+ skb = alloc_skb_with_frags(headroom + linear, len - linear,
+ 0, &err, GFP_ATOMIC | GFP_DMA);
+ WARN_ONCE(!skb,
+ "alloc of recv iucv skb len=%lu failed with errcode=%d\n",
+ len, err);
+ if (skb) {
+ if (headroom)
+ skb_reserve(skb, headroom);
+ skb_put(skb, linear);
+ skb->len = len;
+ skb->data_len = len - linear;
}
-
- return 0;
+ return skb;
}
/* iucv_process_message() - Receive a single outstanding IUCV message
@@ -1263,31 +1283,32 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
skb->len = 0;
}
} else {
- rc = pr_iucv->message_receive(path, msg,
+ if (skb_is_nonlinear(skb)) {
+ struct iucv_array *iba = (struct iucv_array *)skb->head;
+ int i;
+
+ iba[0].address = (u32)(addr_t)skb->data;
+ iba[0].length = (u32)skb_headlen(skb);
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ iba[i + 1].address =
+ (u32)(addr_t)skb_frag_address(frag);
+ iba[i + 1].length = (u32)skb_frag_size(frag);
+ }
+ rc = pr_iucv->message_receive(path, msg,
+ IUCV_IPBUFLST,
+ (void *)iba, len, NULL);
+ } else {
+ rc = pr_iucv->message_receive(path, msg,
msg->flags & IUCV_IPRMDATA,
skb->data, len, NULL);
+ }
if (rc) {
kfree_skb(skb);
return;
}
- /* we need to fragment iucv messages for SOCK_STREAM only;
- * for SOCK_SEQPACKET, it is only relevant if we support
- * record segmentation using MSG_EOR (see also recvmsg()) */
- if (sk->sk_type == SOCK_STREAM &&
- skb->truesize >= sk->sk_rcvbuf / 4) {
- rc = iucv_fragment_skb(sk, skb, len);
- kfree_skb(skb);
- skb = NULL;
- if (rc) {
- pr_iucv->path_sever(path, NULL);
- return;
- }
- skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
- } else {
- skb_reset_transport_header(skb);
- skb_reset_network_header(skb);
- skb->len = len;
- }
+ WARN_ON_ONCE(skb->len != len);
}
IUCV_SKB_CB(skb)->offset = 0;
@@ -1306,7 +1327,7 @@ static void iucv_process_message_q(struct sock *sk)
struct sock_msg_q *p, *n;
list_for_each_entry_safe(p, n, &iucv->message_q.list, list) {
- skb = alloc_skb(iucv_msg_length(&p->msg), GFP_ATOMIC | GFP_DMA);
+ skb = alloc_iucv_recv_skb(iucv_msg_length(&p->msg));
if (!skb)
break;
iucv_process_message(sk, skb, p->path, &p->msg);
@@ -1801,7 +1822,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
if (len > sk->sk_rcvbuf)
goto save_message;
- skb = alloc_skb(iucv_msg_length(msg), GFP_ATOMIC | GFP_DMA);
+ skb = alloc_iucv_recv_skb(iucv_msg_length(msg));
if (!skb)
goto save_message;
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index e253c26f31ac..57fc5a46ce06 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -67,7 +67,6 @@ static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
return net_generic(net, l2tp_eth_net_id);
}
-static struct lock_class_key l2tp_eth_tx_busylock;
static int l2tp_eth_dev_init(struct net_device *dev)
{
struct l2tp_eth *priv = netdev_priv(dev);
@@ -75,7 +74,8 @@ static int l2tp_eth_dev_init(struct net_device *dev)
priv->dev = dev;
eth_hw_addr_random(dev);
eth_broadcast_addr(dev->broadcast);
- dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock;
+ netdev_lockdep_set_classes(dev);
+
return 0;
}
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 6c54e03fe9c1..ea2ae6664cc8 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -611,6 +611,11 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ if (ipc6.tclass < 0)
+ ipc6.tclass = np->tclass;
+
+ fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
@@ -620,9 +625,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (ipc6.hlimit < 0)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (ipc6.tclass < 0)
- ipc6.tclass = np->tclass;
-
if (ipc6.dontfrag < 0)
ipc6.dontfrag = np->dontfrag;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 6651a78e100c..c4a1c3e84e12 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -10,6 +10,7 @@
*/
#include <linux/netdevice.h>
+#include <net/fib_rules.h>
#include <net/l3mdev.h>
/**
@@ -107,7 +108,7 @@ EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
*/
struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
- const struct flowi6 *fl6)
+ struct flowi6 *fl6)
{
struct dst_entry *dst = NULL;
struct net_device *dev;
@@ -160,3 +161,64 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
return rc;
}
EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
+
+int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6)
+{
+ struct net_device *dev;
+ int rc = 0;
+
+ if (fl6->flowi6_oif) {
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
+ if (dev && netif_is_l3_slave(dev))
+ dev = netdev_master_upper_dev_get_rcu(dev);
+
+ if (dev && netif_is_l3_master(dev) &&
+ dev->l3mdev_ops->l3mdev_get_saddr6)
+ rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6);
+
+ rcu_read_unlock();
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(l3mdev_get_saddr6);
+
+/**
+ * l3mdev_fib_rule_match - Determine if flowi references an
+ * L3 master device
+ * @net: network namespace for device index lookup
+ * @fl: flow struct
+ */
+
+int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
+ struct fib_lookup_arg *arg)
+{
+ struct net_device *dev;
+ int rc = 0;
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, fl->flowi_oif);
+ if (dev && netif_is_l3_master(dev) &&
+ dev->l3mdev_ops->l3mdev_fib_table) {
+ arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
+ rc = 1;
+ goto out;
+ }
+
+ dev = dev_get_by_index_rcu(net, fl->flowi_iif);
+ if (dev && netif_is_l3_master(dev) &&
+ dev->l3mdev_ops->l3mdev_fib_table) {
+ arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
+ rc = 1;
+ goto out;
+ }
+
+out:
+ rcu_read_unlock();
+
+ return rc;
+}
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 42fa81031dfa..5650c46bf91a 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -194,17 +194,21 @@ static void
ieee80211_agg_stop_txq(struct sta_info *sta, int tid)
{
struct ieee80211_txq *txq = sta->sta.txq[tid];
+ struct ieee80211_sub_if_data *sdata;
+ struct fq *fq;
struct txq_info *txqi;
if (!txq)
return;
txqi = to_txq_info(txq);
+ sdata = vif_to_sdata(txq->vif);
+ fq = &sdata->local->fq;
/* Lock here to protect against further seqno updates on dequeue */
- spin_lock_bh(&txqi->queue.lock);
+ spin_lock_bh(&fq->lock);
set_bit(IEEE80211_TXQ_STOP, &txqi->flags);
- spin_unlock_bh(&txqi->queue.lock);
+ spin_unlock_bh(&fq->lock);
}
static void
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index b251b2f7f8dd..2906c1004e1a 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -10,6 +10,7 @@
#include <linux/debugfs.h>
#include <linux/rtnetlink.h>
+#include <linux/vmalloc.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
#include "rate.h"
@@ -70,6 +71,177 @@ DEBUGFS_READONLY_FILE(wep_iv, "%#08x",
DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s",
local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver");
+struct aqm_info {
+ struct ieee80211_local *local;
+ size_t size;
+ size_t len;
+ unsigned char buf[0];
+};
+
+#define AQM_HDR_LEN 200
+#define AQM_HW_ENTRY_LEN 40
+#define AQM_TXQ_ENTRY_LEN 110
+
+static int aqm_open(struct inode *inode, struct file *file)
+{
+ struct ieee80211_local *local = inode->i_private;
+ struct ieee80211_sub_if_data *sdata;
+ struct sta_info *sta;
+ struct txq_info *txqi;
+ struct fq *fq = &local->fq;
+ struct aqm_info *info = NULL;
+ int len = 0;
+ int i;
+
+ if (!local->ops->wake_tx_queue)
+ return -EOPNOTSUPP;
+
+ len += AQM_HDR_LEN;
+ len += 6 * AQM_HW_ENTRY_LEN;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdata, &local->interfaces, list)
+ len += AQM_TXQ_ENTRY_LEN;
+ list_for_each_entry_rcu(sta, &local->sta_list, list)
+ len += AQM_TXQ_ENTRY_LEN * ARRAY_SIZE(sta->sta.txq);
+ rcu_read_unlock();
+
+ info = vmalloc(len);
+ if (!info)
+ return -ENOMEM;
+
+ spin_lock_bh(&local->fq.lock);
+ rcu_read_lock();
+
+ file->private_data = info;
+ info->local = local;
+ info->size = len;
+ len = 0;
+
+ len += scnprintf(info->buf + len, info->size - len,
+ "* hw\n"
+ "access name value\n"
+ "R fq_flows_cnt %u\n"
+ "R fq_backlog %u\n"
+ "R fq_overlimit %u\n"
+ "R fq_collisions %u\n"
+ "RW fq_limit %u\n"
+ "RW fq_quantum %u\n",
+ fq->flows_cnt,
+ fq->backlog,
+ fq->overlimit,
+ fq->collisions,
+ fq->limit,
+ fq->quantum);
+
+ len += scnprintf(info->buf + len,
+ info->size - len,
+ "* vif\n"
+ "ifname addr ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n");
+
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ txqi = to_txq_info(sdata->vif.txq);
+ len += scnprintf(info->buf + len, info->size - len,
+ "%s %pM %u %u %u %u %u %u %u %u\n",
+ sdata->name,
+ sdata->vif.addr,
+ txqi->txq.ac,
+ txqi->tin.backlog_bytes,
+ txqi->tin.backlog_packets,
+ txqi->tin.flows,
+ txqi->tin.overlimit,
+ txqi->tin.collisions,
+ txqi->tin.tx_bytes,
+ txqi->tin.tx_packets);
+ }
+
+ len += scnprintf(info->buf + len,
+ info->size - len,
+ "* sta\n"
+ "ifname addr tid ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n");
+
+ list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ sdata = sta->sdata;
+ for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+ txqi = to_txq_info(sta->sta.txq[i]);
+ len += scnprintf(info->buf + len, info->size - len,
+ "%s %pM %d %d %u %u %u %u %u %u %u\n",
+ sdata->name,
+ sta->sta.addr,
+ txqi->txq.tid,
+ txqi->txq.ac,
+ txqi->tin.backlog_bytes,
+ txqi->tin.backlog_packets,
+ txqi->tin.flows,
+ txqi->tin.overlimit,
+ txqi->tin.collisions,
+ txqi->tin.tx_bytes,
+ txqi->tin.tx_packets);
+ }
+ }
+
+ info->len = len;
+
+ rcu_read_unlock();
+ spin_unlock_bh(&local->fq.lock);
+
+ return 0;
+}
+
+static int aqm_release(struct inode *inode, struct file *file)
+{
+ vfree(file->private_data);
+ return 0;
+}
+
+static ssize_t aqm_read(struct file *file,
+ char __user *user_buf,
+ size_t count,
+ loff_t *ppos)
+{
+ struct aqm_info *info = file->private_data;
+
+ return simple_read_from_buffer(user_buf, count, ppos,
+ info->buf, info->len);
+}
+
+static ssize_t aqm_write(struct file *file,
+ const char __user *user_buf,
+ size_t count,
+ loff_t *ppos)
+{
+ struct aqm_info *info = file->private_data;
+ struct ieee80211_local *local = info->local;
+ char buf[100];
+ size_t len;
+
+ if (count > sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(buf, user_buf, count))
+ return -EFAULT;
+
+ buf[sizeof(buf) - 1] = '\0';
+ len = strlen(buf);
+ if (len > 0 && buf[len-1] == '\n')
+ buf[len-1] = 0;
+
+ if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1)
+ return count;
+ else if (sscanf(buf, "fq_quantum %u", &local->fq.quantum) == 1)
+ return count;
+
+ return -EINVAL;
+}
+
+static const struct file_operations aqm_ops = {
+ .write = aqm_write,
+ .read = aqm_read,
+ .open = aqm_open,
+ .release = aqm_release,
+ .llseek = default_llseek,
+};
+
#ifdef CONFIG_PM
static ssize_t reset_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
@@ -256,6 +428,7 @@ void debugfs_hw_add(struct ieee80211_local *local)
DEBUGFS_ADD(hwflags);
DEBUGFS_ADD(user_power);
DEBUGFS_ADD(power);
+ DEBUGFS_ADD_MODE(aqm, 0600);
statsd = debugfs_create_dir("statistics", phyd);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 33dfcbc2bf9c..fd334133ff45 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -328,14 +328,88 @@ STA_OPS(ht_capa);
static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
- char buf[128], *p = buf;
+ char buf[512], *p = buf;
struct sta_info *sta = file->private_data;
struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap;
p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n",
vhtc->vht_supported ? "" : "not ");
if (vhtc->vht_supported) {
- p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.8x\n", vhtc->cap);
+ p += scnprintf(p, sizeof(buf) + buf - p, "cap: %#.8x\n",
+ vhtc->cap);
+#define PFLAG(a, b) \
+ do { \
+ if (vhtc->cap & IEEE80211_VHT_CAP_ ## a) \
+ p += scnprintf(p, sizeof(buf) + buf - p, \
+ "\t\t%s\n", b); \
+ } while (0)
+
+ switch (vhtc->cap & 0x3) {
+ case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895:
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\tMAX-MPDU-3895\n");
+ break;
+ case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\tMAX-MPDU-7991\n");
+ break;
+ case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\tMAX-MPDU-11454\n");
+ break;
+ default:
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\tMAX-MPDU-UNKNOWN\n");
+ };
+ switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
+ case 0:
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\t80Mhz\n");
+ break;
+ case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\t160Mhz\n");
+ break;
+ case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\t80+80Mhz\n");
+ break;
+ default:
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\tUNKNOWN-MHZ: 0x%x\n",
+ (vhtc->cap >> 2) & 0x3);
+ };
+ PFLAG(RXLDPC, "RXLDPC");
+ PFLAG(SHORT_GI_80, "SHORT-GI-80");
+ PFLAG(SHORT_GI_160, "SHORT-GI-160");
+ PFLAG(TXSTBC, "TXSTBC");
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\tRXSTBC_%d\n", (vhtc->cap >> 8) & 0x7);
+ PFLAG(SU_BEAMFORMER_CAPABLE, "SU-BEAMFORMER-CAPABLE");
+ PFLAG(SU_BEAMFORMEE_CAPABLE, "SU-BEAMFORMEE-CAPABLE");
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\tBEAMFORMEE-STS: 0x%x\n",
+ (vhtc->cap & IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK) >>
+ IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT);
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\tSOUNDING-DIMENSIONS: 0x%x\n",
+ (vhtc->cap & IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK)
+ >> IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT);
+ PFLAG(MU_BEAMFORMER_CAPABLE, "MU-BEAMFORMER-CAPABLE");
+ PFLAG(MU_BEAMFORMEE_CAPABLE, "MU-BEAMFORMEE-CAPABLE");
+ PFLAG(VHT_TXOP_PS, "TXOP-PS");
+ PFLAG(HTC_VHT, "HTC-VHT");
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\tMPDU-LENGTH-EXPONENT: 0x%x\n",
+ (vhtc->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK) >>
+ IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT);
+ PFLAG(VHT_LINK_ADAPTATION_VHT_UNSOL_MFB,
+ "LINK-ADAPTATION-VHT-UNSOL-MFB");
+ p += scnprintf(p, sizeof(buf) + buf - p,
+ "\t\tLINK-ADAPTATION-VHT-MRQ-MFB: 0x%x\n",
+ (vhtc->cap & IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB) >> 26);
+ PFLAG(RX_ANTENNA_PATTERN, "RX-ANTENNA-PATTERN");
+ PFLAG(TX_ANTENNA_PATTERN, "TX-ANTENNA-PATTERN");
p += scnprintf(p, sizeof(buf)+buf-p, "RX MCS: %.4x\n",
le16_to_cpu(vhtc->vht_mcs.rx_mcs_map));
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9438c9406687..54edfb6fc1d1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -30,6 +30,7 @@
#include <net/ieee80211_radiotap.h>
#include <net/cfg80211.h>
#include <net/mac80211.h>
+#include <net/fq.h>
#include "key.h"
#include "sta_info.h"
#include "debug.h"
@@ -805,10 +806,19 @@ enum txq_info_flags {
IEEE80211_TXQ_NO_AMSDU,
};
+/**
+ * struct txq_info - per tid queue
+ *
+ * @tin: contains packets split into multiple flows
+ * @def_flow: used as a fallback flow when a packet destined to @tin hashes to
+ * a fq_flow which is already owned by a different tin
+ * @def_cvars: codel vars for @def_flow
+ */
struct txq_info {
- struct sk_buff_head queue;
+ struct fq_tin tin;
+ struct fq_flow def_flow;
+ struct codel_vars def_cvars;
unsigned long flags;
- unsigned long byte_cnt;
/* keep last! */
struct ieee80211_txq txq;
@@ -856,7 +866,7 @@ struct ieee80211_sub_if_data {
bool control_port_no_encrypt;
int encrypt_headroom;
- atomic_t txqs_len[IEEE80211_NUM_ACS];
+ atomic_t num_tx_queued;
struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
struct mac80211_qos_map __rcu *qos_map;
@@ -1099,6 +1109,11 @@ struct ieee80211_local {
* it first anyway so they become a no-op */
struct ieee80211_hw hw;
+ struct fq fq;
+ struct codel_vars *cvars;
+ struct codel_params cparams;
+ struct codel_stats cstats;
+
const struct ieee80211_ops *ops;
/*
@@ -1931,9 +1946,13 @@ static inline bool ieee80211_can_run_worker(struct ieee80211_local *local)
return true;
}
-void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta,
- struct txq_info *txq, int tid);
+int ieee80211_txq_setup_flows(struct ieee80211_local *local);
+void ieee80211_txq_teardown_flows(struct ieee80211_local *local);
+void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct txq_info *txq, int tid);
+void ieee80211_txq_purge(struct ieee80211_local *local,
+ struct txq_info *txqi);
void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
u16 transaction, u16 auth_alg, u16 status,
const u8 *extra, size_t extra_len, const u8 *bssid,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index c59af3eb9fa4..b123a9e325b3 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -779,6 +779,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
bool going_down)
{
struct ieee80211_local *local = sdata->local;
+ struct fq *fq = &local->fq;
unsigned long flags;
struct sk_buff *skb, *tmp;
u32 hw_reconf_flags = 0;
@@ -977,12 +978,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
if (sdata->vif.txq) {
struct txq_info *txqi = to_txq_info(sdata->vif.txq);
- spin_lock_bh(&txqi->queue.lock);
- ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
- txqi->byte_cnt = 0;
- spin_unlock_bh(&txqi->queue.lock);
-
- atomic_set(&sdata->txqs_len[txqi->txq.ac], 0);
+ spin_lock_bh(&fq->lock);
+ ieee80211_txq_purge(local, txqi);
+ spin_unlock_bh(&fq->lock);
}
if (local->open_count == 0)
@@ -1198,6 +1196,12 @@ static void ieee80211_if_setup(struct net_device *dev)
dev->destructor = ieee80211_if_free;
}
+static void ieee80211_if_setup_no_queue(struct net_device *dev)
+{
+ ieee80211_if_setup(dev);
+ dev->priv_flags |= IFF_NO_QUEUE;
+}
+
static void ieee80211_iface_work(struct work_struct *work)
{
struct ieee80211_sub_if_data *sdata =
@@ -1707,6 +1711,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
struct net_device *ndev = NULL;
struct ieee80211_sub_if_data *sdata = NULL;
struct txq_info *txqi;
+ void (*if_setup)(struct net_device *dev);
int ret, i;
int txqs = 1;
@@ -1734,12 +1739,17 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
txq_size += sizeof(struct txq_info) +
local->hw.txq_data_size;
+ if (local->ops->wake_tx_queue)
+ if_setup = ieee80211_if_setup_no_queue;
+ else
+ if_setup = ieee80211_if_setup;
+
if (local->hw.queues >= IEEE80211_NUM_ACS)
txqs = IEEE80211_NUM_ACS;
ndev = alloc_netdev_mqs(size + txq_size,
name, name_assign_type,
- ieee80211_if_setup, txqs, 1);
+ if_setup, txqs, 1);
if (!ndev)
return -ENOMEM;
dev_net_set(ndev, wiphy_net(local->hw.wiphy));
@@ -1780,7 +1790,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
if (txq_size) {
txqi = netdev_priv(ndev) + size;
- ieee80211_init_tx_queue(sdata, NULL, txqi, 0);
+ ieee80211_txq_init(sdata, NULL, txqi, 0);
}
sdata->dev = ndev;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 7ee91d6151d1..d00ea9b13f49 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1055,9 +1055,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
local->dynamic_ps_forced_timeout = -1;
- if (!local->hw.txq_ac_max_pending)
- local->hw.txq_ac_max_pending = 64;
-
result = ieee80211_wep_init(local);
if (result < 0)
wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
@@ -1089,6 +1086,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
rtnl_unlock();
+ result = ieee80211_txq_setup_flows(local);
+ if (result)
+ goto fail_flows;
+
#ifdef CONFIG_INET
local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
result = register_inetaddr_notifier(&local->ifa_notifier);
@@ -1114,6 +1115,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
#if defined(CONFIG_INET) || defined(CONFIG_IPV6)
fail_ifa:
#endif
+ ieee80211_txq_teardown_flows(local);
+ fail_flows:
rtnl_lock();
rate_control_deinitialize(local);
ieee80211_remove_interfaces(local);
@@ -1172,6 +1175,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
skb_queue_purge(&local->skb_queue);
skb_queue_purge(&local->skb_queue_unreliable);
skb_queue_purge(&local->skb_queue_tdls_chsw);
+ ieee80211_txq_teardown_flows(local);
destroy_workqueue(local->workqueue);
wiphy_unregister(local->hw.wiphy);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5e65e838992a..9a1eb70cb120 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1268,7 +1268,7 @@ static void sta_ps_start(struct sta_info *sta)
for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
- if (!skb_queue_len(&txqi->queue))
+ if (!txqi->tin.backlog_packets)
set_bit(tid, &sta->txq_buffered_tids);
else
clear_bit(tid, &sta->txq_buffered_tids);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 5ccfdbd406bd..76b737dcc36f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -90,6 +90,7 @@ static void __cleanup_single_sta(struct sta_info *sta)
struct tid_ampdu_tx *tid_tx;
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
+ struct fq *fq = &local->fq;
struct ps_data *ps;
if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
@@ -113,11 +114,10 @@ static void __cleanup_single_sta(struct sta_info *sta)
if (sta->sta.txq[0]) {
for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
- int n = skb_queue_len(&txqi->queue);
- ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
- atomic_sub(n, &sdata->txqs_len[txqi->txq.ac]);
- txqi->byte_cnt = 0;
+ spin_lock_bh(&fq->lock);
+ ieee80211_txq_purge(local, txqi);
+ spin_unlock_bh(&fq->lock);
}
}
@@ -368,7 +368,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
struct txq_info *txq = txq_data + i * size;
- ieee80211_init_tx_queue(sdata, sta, txq, i);
+ ieee80211_txq_init(sdata, sta, txq, i);
}
}
@@ -1211,7 +1211,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
- if (!skb_queue_len(&txqi->queue))
+ if (!txqi->tin.backlog_packets)
continue;
drv_wake_tx_queue(local, txqi);
@@ -1648,7 +1648,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
- if (!(tids & BIT(tid)) || skb_queue_len(&txqi->queue))
+ if (!(tids & BIT(tid)) || txqi->tin.backlog_packets)
continue;
sta_info_recalc_tim(sta);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 203044379ce0..44ec605a5682 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -24,7 +24,10 @@
#include <net/ieee80211_radiotap.h>
#include <net/cfg80211.h>
#include <net/mac80211.h>
+#include <net/codel.h>
+#include <net/codel_impl.h>
#include <asm/unaligned.h>
+#include <net/fq_impl.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -1236,27 +1239,21 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
return TX_CONTINUE;
}
-static void ieee80211_drv_tx(struct ieee80211_local *local,
- struct ieee80211_vif *vif,
- struct ieee80211_sta *pubsta,
- struct sk_buff *skb)
+static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *pubsta,
+ struct sk_buff *skb)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- struct ieee80211_tx_control control = {
- .sta = pubsta,
- };
struct ieee80211_txq *txq = NULL;
- struct txq_info *txqi;
- u8 ac;
if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
(info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
- goto tx_normal;
+ return NULL;
if (!ieee80211_is_data(hdr->frame_control))
- goto tx_normal;
+ return NULL;
if (pubsta) {
u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
@@ -1267,51 +1264,230 @@ static void ieee80211_drv_tx(struct ieee80211_local *local,
}
if (!txq)
- goto tx_normal;
+ return NULL;
- ac = txq->ac;
- txqi = to_txq_info(txq);
- atomic_inc(&sdata->txqs_len[ac]);
- if (atomic_read(&sdata->txqs_len[ac]) >= local->hw.txq_ac_max_pending)
- netif_stop_subqueue(sdata->dev, ac);
+ return to_txq_info(txq);
+}
- spin_lock_bh(&txqi->queue.lock);
- txqi->byte_cnt += skb->len;
- __skb_queue_tail(&txqi->queue, skb);
- spin_unlock_bh(&txqi->queue.lock);
+static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
+{
+ IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
+}
- drv_wake_tx_queue(local, txqi);
+static void ieee80211_set_skb_vif(struct sk_buff *skb, struct txq_info *txqi)
+{
+ IEEE80211_SKB_CB(skb)->control.vif = txqi->txq.vif;
+}
- return;
+static u32 codel_skb_len_func(const struct sk_buff *skb)
+{
+ return skb->len;
+}
+
+static codel_time_t codel_skb_time_func(const struct sk_buff *skb)
+{
+ const struct ieee80211_tx_info *info;
-tx_normal:
- drv_tx(local, &control, skb);
+ info = (const struct ieee80211_tx_info *)skb->cb;
+ return info->control.enqueue_time;
+}
+
+static struct sk_buff *codel_dequeue_func(struct codel_vars *cvars,
+ void *ctx)
+{
+ struct ieee80211_local *local;
+ struct txq_info *txqi;
+ struct fq *fq;
+ struct fq_flow *flow;
+
+ txqi = ctx;
+ local = vif_to_sdata(txqi->txq.vif)->local;
+ fq = &local->fq;
+
+ if (cvars == &txqi->def_cvars)
+ flow = &txqi->def_flow;
+ else
+ flow = &fq->flows[cvars - local->cvars];
+
+ return fq_flow_dequeue(fq, flow);
+}
+
+static void codel_drop_func(struct sk_buff *skb,
+ void *ctx)
+{
+ struct ieee80211_local *local;
+ struct ieee80211_hw *hw;
+ struct txq_info *txqi;
+
+ txqi = ctx;
+ local = vif_to_sdata(txqi->txq.vif)->local;
+ hw = &local->hw;
+
+ ieee80211_free_txskb(hw, skb);
+}
+
+static struct sk_buff *fq_tin_dequeue_func(struct fq *fq,
+ struct fq_tin *tin,
+ struct fq_flow *flow)
+{
+ struct ieee80211_local *local;
+ struct txq_info *txqi;
+ struct codel_vars *cvars;
+ struct codel_params *cparams;
+ struct codel_stats *cstats;
+
+ local = container_of(fq, struct ieee80211_local, fq);
+ txqi = container_of(tin, struct txq_info, tin);
+ cparams = &local->cparams;
+ cstats = &local->cstats;
+
+ if (flow == &txqi->def_flow)
+ cvars = &txqi->def_cvars;
+ else
+ cvars = &local->cvars[flow - fq->flows];
+
+ return codel_dequeue(txqi,
+ &flow->backlog,
+ cparams,
+ cvars,
+ cstats,
+ codel_skb_len_func,
+ codel_skb_time_func,
+ codel_drop_func,
+ codel_dequeue_func);
+}
+
+static void fq_skb_free_func(struct fq *fq,
+ struct fq_tin *tin,
+ struct fq_flow *flow,
+ struct sk_buff *skb)
+{
+ struct ieee80211_local *local;
+
+ local = container_of(fq, struct ieee80211_local, fq);
+ ieee80211_free_txskb(&local->hw, skb);
+}
+
+static struct fq_flow *fq_flow_get_default_func(struct fq *fq,
+ struct fq_tin *tin,
+ int idx,
+ struct sk_buff *skb)
+{
+ struct txq_info *txqi;
+
+ txqi = container_of(tin, struct txq_info, tin);
+ return &txqi->def_flow;
+}
+
+static void ieee80211_txq_enqueue(struct ieee80211_local *local,
+ struct txq_info *txqi,
+ struct sk_buff *skb)
+{
+ struct fq *fq = &local->fq;
+ struct fq_tin *tin = &txqi->tin;
+
+ ieee80211_set_skb_enqueue_time(skb);
+ fq_tin_enqueue(fq, tin, skb,
+ fq_skb_free_func,
+ fq_flow_get_default_func);
+}
+
+void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct txq_info *txqi, int tid)
+{
+ fq_tin_init(&txqi->tin);
+ fq_flow_init(&txqi->def_flow);
+ codel_vars_init(&txqi->def_cvars);
+
+ txqi->txq.vif = &sdata->vif;
+
+ if (sta) {
+ txqi->txq.sta = &sta->sta;
+ sta->sta.txq[tid] = &txqi->txq;
+ txqi->txq.tid = tid;
+ txqi->txq.ac = ieee802_1d_to_ac[tid & 7];
+ } else {
+ sdata->vif.txq = &txqi->txq;
+ txqi->txq.tid = 0;
+ txqi->txq.ac = IEEE80211_AC_BE;
+ }
+}
+
+void ieee80211_txq_purge(struct ieee80211_local *local,
+ struct txq_info *txqi)
+{
+ struct fq *fq = &local->fq;
+ struct fq_tin *tin = &txqi->tin;
+
+ fq_tin_reset(fq, tin, fq_skb_free_func);
+}
+
+int ieee80211_txq_setup_flows(struct ieee80211_local *local)
+{
+ struct fq *fq = &local->fq;
+ int ret;
+ int i;
+
+ if (!local->ops->wake_tx_queue)
+ return 0;
+
+ ret = fq_init(fq, 4096);
+ if (ret)
+ return ret;
+
+ codel_params_init(&local->cparams);
+ codel_stats_init(&local->cstats);
+ local->cparams.interval = MS2TIME(100);
+ local->cparams.target = MS2TIME(20);
+ local->cparams.ecn = true;
+
+ local->cvars = kcalloc(fq->flows_cnt, sizeof(local->cvars[0]),
+ GFP_KERNEL);
+ if (!local->cvars) {
+ fq_reset(fq, fq_skb_free_func);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < fq->flows_cnt; i++)
+ codel_vars_init(&local->cvars[i]);
+
+ return 0;
+}
+
+void ieee80211_txq_teardown_flows(struct ieee80211_local *local)
+{
+ struct fq *fq = &local->fq;
+
+ if (!local->ops->wake_tx_queue)
+ return;
+
+ kfree(local->cvars);
+ local->cvars = NULL;
+
+ fq_reset(fq, fq_skb_free_func);
}
struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
{
struct ieee80211_local *local = hw_to_local(hw);
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif);
struct txq_info *txqi = container_of(txq, struct txq_info, txq);
struct ieee80211_hdr *hdr;
struct sk_buff *skb = NULL;
- u8 ac = txq->ac;
+ struct fq *fq = &local->fq;
+ struct fq_tin *tin = &txqi->tin;
- spin_lock_bh(&txqi->queue.lock);
+ spin_lock_bh(&fq->lock);
if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
goto out;
- skb = __skb_dequeue(&txqi->queue);
+ skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
if (!skb)
goto out;
- txqi->byte_cnt -= skb->len;
-
- atomic_dec(&sdata->txqs_len[ac]);
- if (__netif_subqueue_stopped(sdata->dev, ac))
- ieee80211_propagate_queue_wake(local, sdata->vif.hw_queue[ac]);
+ ieee80211_set_skb_vif(skb, txqi);
hdr = (struct ieee80211_hdr *)skb->data;
if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) {
@@ -1327,7 +1503,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
}
out:
- spin_unlock_bh(&txqi->queue.lock);
+ spin_unlock_bh(&fq->lock);
if (skb && skb_has_frag_list(skb) &&
!ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
@@ -1343,7 +1519,10 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
struct sk_buff_head *skbs,
bool txpending)
{
+ struct ieee80211_tx_control control = {};
+ struct fq *fq = &local->fq;
struct sk_buff *skb, *tmp;
+ struct txq_info *txqi;
unsigned long flags;
skb_queue_walk_safe(skbs, skb, tmp) {
@@ -1358,6 +1537,21 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
}
#endif
+ txqi = ieee80211_get_txq(local, vif, sta, skb);
+ if (txqi) {
+ info->control.vif = vif;
+
+ __skb_unlink(skb, skbs);
+
+ spin_lock_bh(&fq->lock);
+ ieee80211_txq_enqueue(local, txqi, skb);
+ spin_unlock_bh(&fq->lock);
+
+ drv_wake_tx_queue(local, txqi);
+
+ continue;
+ }
+
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
if (local->queue_stop_reasons[q] ||
(!txpending && !skb_queue_empty(&local->pending[q]))) {
@@ -1400,9 +1594,10 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
info->control.vif = vif;
+ control.sta = sta;
__skb_unlink(skb, skbs);
- ieee80211_drv_tx(local, vif, sta, skb);
+ drv_tx(local, &control, skb);
}
return true;
@@ -2882,6 +3077,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
struct ieee80211_local *local = sdata->local;
+ struct fq *fq = &local->fq;
+ struct fq_tin *tin;
+ struct fq_flow *flow;
u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
struct ieee80211_txq *txq = sta->sta.txq[tid];
struct txq_info *txqi;
@@ -2893,6 +3091,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
__be16 len;
void *data;
bool ret = false;
+ unsigned int orig_len;
int n = 1, nfrags;
if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
@@ -2909,12 +3108,20 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
max_amsdu_len = min_t(int, max_amsdu_len,
sta->sta.max_rc_amsdu_len);
- spin_lock_bh(&txqi->queue.lock);
+ spin_lock_bh(&fq->lock);
+
+ /* TODO: Ideally aggregation should be done on dequeue to remain
+ * responsive to environment changes.
+ */
- head = skb_peek_tail(&txqi->queue);
+ tin = &txqi->tin;
+ flow = fq_flow_classify(fq, tin, skb, fq_flow_get_default_func);
+ head = skb_peek_tail(&flow->queue);
if (!head)
goto out;
+ orig_len = head->len;
+
if (skb->len + head->len > max_amsdu_len)
goto out;
@@ -2953,8 +3160,13 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
head->data_len += skb->len;
*frag_tail = skb;
+ flow->backlog += head->len - orig_len;
+ tin->backlog_bytes += head->len - orig_len;
+
+ fq_recalc_backlog(fq, tin, flow);
+
out:
- spin_unlock_bh(&txqi->queue.lock);
+ spin_unlock_bh(&fq->lock);
return ret;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 905003f75c4d..42bf0b6685e8 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -244,6 +244,9 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
struct ieee80211_sub_if_data *sdata;
int n_acs = IEEE80211_NUM_ACS;
+ if (local->ops->wake_tx_queue)
+ return;
+
if (local->hw.queues < IEEE80211_NUM_ACS)
n_acs = 1;
@@ -260,11 +263,6 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
for (ac = 0; ac < n_acs; ac++) {
int ac_queue = sdata->vif.hw_queue[ac];
- if (local->ops->wake_tx_queue &&
- (atomic_read(&sdata->txqs_len[ac]) >
- local->hw.txq_ac_max_pending))
- continue;
-
if (ac_queue == queue ||
(sdata->vif.cab_queue == queue &&
local->queue_stop_reasons[ac_queue] == 0 &&
@@ -352,6 +350,9 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue]))
return;
+ if (local->ops->wake_tx_queue)
+ return;
+
if (local->hw.queues < IEEE80211_NUM_ACS)
n_acs = 1;
@@ -3388,25 +3389,6 @@ u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo)
return buf;
}
-void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta,
- struct txq_info *txqi, int tid)
-{
- skb_queue_head_init(&txqi->queue);
- txqi->txq.vif = &sdata->vif;
-
- if (sta) {
- txqi->txq.sta = &sta->sta;
- sta->sta.txq[tid] = &txqi->txq;
- txqi->txq.tid = tid;
- txqi->txq.ac = ieee802_1d_to_ac[tid & 7];
- } else {
- sdata->vif.txq = &txqi->txq;
- txqi->txq.tid = 0;
- txqi->txq.ac = IEEE80211_AC_BE;
- }
-}
-
void ieee80211_txq_get_depth(struct ieee80211_txq *txq,
unsigned long *frame_cnt,
unsigned long *byte_cnt)
@@ -3414,9 +3396,9 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq,
struct txq_info *txqi = to_txq_info(txq);
if (frame_cnt)
- *frame_cnt = txqi->queue.qlen;
+ *frame_cnt = txqi->tin.backlog_packets;
if (byte_cnt)
- *byte_cnt = txqi->byte_cnt;
+ *byte_cnt = txqi->tin.backlog_bytes;
}
EXPORT_SYMBOL(ieee80211_txq_get_depth);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 0b80a7140cc4..e9beaa58573c 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -91,7 +91,7 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
if (skb->len <= mtu)
return false;
- if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;
return true;
@@ -1009,9 +1009,10 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
unsigned int flags;
if (event == NETDEV_REGISTER) {
- /* For now just support ethernet devices */
- if ((dev->type == ARPHRD_ETHER) ||
- (dev->type == ARPHRD_LOOPBACK)) {
+ /* For now just support Ethernet and IPGRE devices */
+ if (dev->type == ARPHRD_ETHER ||
+ dev->type == ARPHRD_LOOPBACK ||
+ dev->type == ARPHRD_IPGRE) {
mdev = mpls_add_dev(dev);
if (IS_ERR(mdev))
return notifier_from_errno(PTR_ERR(mdev));
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 604df6fae6fc..515131f9e021 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -137,7 +137,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
cfg.est.ewma_log = info->ewma_log;
ret = gen_new_estimator(&est->bstats, NULL, &est->rstats,
- &est->lock, &cfg.opt);
+ &est->lock, NULL, &cfg.opt);
if (ret < 0)
goto err2;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index e68ef9ccd703..3cfd6cc60504 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -8,20 +8,6 @@
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
-struct netlink_ring {
- void **pg_vec;
- unsigned int head;
- unsigned int frames_per_block;
- unsigned int frame_size;
- unsigned int frame_max;
-
- unsigned int pg_vec_order;
- unsigned int pg_vec_pages;
- unsigned int pg_vec_len;
-
- atomic_t pending;
-};
-
struct netlink_sock {
/* struct sock has to be the first member of netlink_sock */
struct sock sk;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 9a3eb7a0ebf4..1ecbd7715f6d 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -750,6 +750,14 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
if (likely(vport)) {
u16 mru = OVS_CB(skb)->mru;
+ u32 cutlen = OVS_CB(skb)->cutlen;
+
+ if (unlikely(cutlen > 0)) {
+ if (skb->len - cutlen > ETH_HLEN)
+ pskb_trim(skb, skb->len - cutlen);
+ else
+ pskb_trim(skb, ETH_HLEN);
+ }
if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
ovs_vport_send(vport, skb);
@@ -775,7 +783,8 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
- const struct nlattr *actions, int actions_len)
+ const struct nlattr *actions, int actions_len,
+ uint32_t cutlen)
{
struct dp_upcall_info upcall;
const struct nlattr *a;
@@ -822,7 +831,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
} /* End of switch. */
}
- return ovs_dp_upcall(dp, skb, key, &upcall);
+ return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
}
static int sample(struct datapath *dp, struct sk_buff *skb,
@@ -832,6 +841,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *acts_list = NULL;
const struct nlattr *a;
int rem;
+ u32 cutlen = 0;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
@@ -858,13 +868,24 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
return 0;
/* The only known usage of sample action is having a single user-space
+ * action, or having a truncate action followed by a single user-space
* action. Treat this usage as a special case.
* The output_userspace() should clone the skb to be sent to the
* user space. This skb will be consumed by its caller.
*/
+ if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) {
+ struct ovs_action_trunc *trunc = nla_data(a);
+
+ if (skb->len > trunc->max_len)
+ cutlen = skb->len - trunc->max_len;
+
+ a = nla_next(a, &rem);
+ }
+
if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
nla_is_last(a, rem)))
- return output_userspace(dp, skb, key, a, actions, actions_len);
+ return output_userspace(dp, skb, key, a, actions,
+ actions_len, cutlen);
skb = skb_clone(skb, GFP_ATOMIC);
if (!skb)
@@ -1051,6 +1072,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
if (out_skb)
do_output(dp, out_skb, prev_port, key);
+ OVS_CB(skb)->cutlen = 0;
prev_port = -1;
}
@@ -1059,8 +1081,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
prev_port = nla_get_u32(a);
break;
+ case OVS_ACTION_ATTR_TRUNC: {
+ struct ovs_action_trunc *trunc = nla_data(a);
+
+ if (skb->len > trunc->max_len)
+ OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
+ break;
+ }
+
case OVS_ACTION_ATTR_USERSPACE:
- output_userspace(dp, skb, key, a, attr, len);
+ output_userspace(dp, skb, key, a, attr,
+ len, OVS_CB(skb)->cutlen);
+ OVS_CB(skb)->cutlen = 0;
break;
case OVS_ACTION_ATTR_HASH:
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index d84312584ee4..b4069a90e375 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -834,6 +834,17 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
return 0;
}
+static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
+{
+ size_t i;
+
+ for (i = 0; i < sizeof(*labels); i++)
+ if (labels->ct_labels[i])
+ return true;
+
+ return false;
+}
+
/* Lookup connection and confirm if unconfirmed. */
static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
@@ -844,24 +855,32 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
err = __ovs_ct_lookup(net, key, info, skb);
if (err)
return err;
- /* This is a no-op if the connection has already been confirmed. */
+
+ /* Apply changes before confirming the connection so that the initial
+ * conntrack NEW netlink event carries the values given in the CT
+ * action.
+ */
+ if (info->mark.mask) {
+ err = ovs_ct_set_mark(skb, key, info->mark.value,
+ info->mark.mask);
+ if (err)
+ return err;
+ }
+ if (labels_nonzero(&info->labels.mask)) {
+ err = ovs_ct_set_labels(skb, key, &info->labels.value,
+ &info->labels.mask);
+ if (err)
+ return err;
+ }
+ /* This will take care of sending queued events even if the connection
+ * is already confirmed.
+ */
if (nf_conntrack_confirm(skb) != NF_ACCEPT)
return -EINVAL;
return 0;
}
-static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
-{
- size_t i;
-
- for (i = 0; i < sizeof(*labels); i++)
- if (labels->ct_labels[i])
- return true;
-
- return false;
-}
-
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
* value if 'skb' is freed.
*/
@@ -886,19 +905,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
err = ovs_ct_commit(net, key, info, skb);
else
err = ovs_ct_lookup(net, key, info, skb);
- if (err)
- goto err;
- if (info->mark.mask) {
- err = ovs_ct_set_mark(skb, key, info->mark.value,
- info->mark.mask);
- if (err)
- goto err;
- }
- if (labels_nonzero(&info->labels.mask))
- err = ovs_ct_set_labels(skb, key, &info->labels.value,
- &info->labels.mask);
-err:
skb_push(skb, nh_ofs);
if (err)
kfree_skb(skb);
@@ -1155,6 +1162,20 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
}
}
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if (!info->commit && info->mark.mask) {
+ OVS_NLERR(log,
+ "Setting conntrack mark requires 'commit' flag.");
+ return -EINVAL;
+ }
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ if (!info->commit && labels_nonzero(&info->labels.mask)) {
+ OVS_NLERR(log,
+ "Setting conntrack labels requires 'commit' flag.");
+ return -EINVAL;
+ }
+#endif
if (rem > 0) {
OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
return -EINVAL;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 856bd8dba676..524c0fd3078e 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -137,10 +137,12 @@ EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
static struct vport *new_vport(const struct vport_parms *);
static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
const struct sw_flow_key *,
- const struct dp_upcall_info *);
+ const struct dp_upcall_info *,
+ uint32_t cutlen);
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
const struct sw_flow_key *,
- const struct dp_upcall_info *);
+ const struct dp_upcall_info *,
+ uint32_t cutlen);
/* Must be called with rcu_read_lock. */
static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
@@ -275,7 +277,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
upcall.cmd = OVS_PACKET_CMD_MISS;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
upcall.mru = OVS_CB(skb)->mru;
- error = ovs_dp_upcall(dp, skb, key, &upcall);
+ error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
if (unlikely(error))
kfree_skb(skb);
else
@@ -300,7 +302,8 @@ out:
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
- const struct dp_upcall_info *upcall_info)
+ const struct dp_upcall_info *upcall_info,
+ uint32_t cutlen)
{
struct dp_stats_percpu *stats;
int err;
@@ -311,9 +314,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
}
if (!skb_is_gso(skb))
- err = queue_userspace_packet(dp, skb, key, upcall_info);
+ err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
else
- err = queue_gso_packets(dp, skb, key, upcall_info);
+ err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
if (err)
goto err;
@@ -331,7 +334,8 @@ err:
static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
- const struct dp_upcall_info *upcall_info)
+ const struct dp_upcall_info *upcall_info,
+ uint32_t cutlen)
{
unsigned short gso_type = skb_shinfo(skb)->gso_type;
struct sw_flow_key later_key;
@@ -360,7 +364,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
if (gso_type & SKB_GSO_UDP && skb != segs)
key = &later_key;
- err = queue_userspace_packet(dp, skb, key, upcall_info);
+ err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
if (err)
break;
@@ -383,7 +387,8 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
{
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
- + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
+ + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
+ + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
/* OVS_PACKET_ATTR_USERDATA */
if (upcall_info->userdata)
@@ -416,7 +421,8 @@ static void pad_packet(struct datapath *dp, struct sk_buff *skb)
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
- const struct dp_upcall_info *upcall_info)
+ const struct dp_upcall_info *upcall_info,
+ uint32_t cutlen)
{
struct ovs_header *upcall;
struct sk_buff *nskb = NULL;
@@ -461,7 +467,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
else
hlen = skb->len;
- len = upcall_msg_size(upcall_info, hlen);
+ len = upcall_msg_size(upcall_info, hlen - cutlen);
user_skb = genlmsg_new(len, GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
@@ -509,15 +515,25 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
pad_packet(dp, user_skb);
}
+ /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
+ if (cutlen > 0) {
+ if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
+ skb->len)) {
+ err = -ENOBUFS;
+ goto out;
+ }
+ pad_packet(dp, user_skb);
+ }
+
/* Only reserve room for attribute header, packet data is added
* in skb_zerocopy() */
if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
err = -ENOBUFS;
goto out;
}
- nla->nla_len = nla_attr_size(skb->len);
+ nla->nla_len = nla_attr_size(skb->len - cutlen);
- err = skb_zerocopy(user_skb, skb, skb->len, hlen);
+ err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
if (err)
goto out;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 427e39a045cf..ab85c1cae255 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -100,11 +100,13 @@ struct datapath {
* @input_vport: The original vport packet came in on. This value is cached
* when a packet is received by OVS.
* @mru: The maximum received fragement size; 0 if the packet is not
+ * @cutlen: The number of bytes from the packet end to be removed.
* fragmented.
*/
struct ovs_skb_cb {
struct vport *input_vport;
u16 mru;
+ u32 cutlen;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -194,7 +196,8 @@ extern struct genl_family dp_vport_genl_family;
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
- const struct sw_flow_key *, const struct dp_upcall_info *);
+ const struct sw_flow_key *, const struct dp_upcall_info *,
+ uint32_t cutlen);
const char *ovs_dp_name(const struct datapath *dp);
struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 0bb650f4f219..c78a6a1476fb 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2229,6 +2229,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
[OVS_ACTION_ATTR_CT] = (u32)-1,
+ [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -2255,6 +2256,14 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return -EINVAL;
break;
+ case OVS_ACTION_ATTR_TRUNC: {
+ const struct ovs_action_trunc *trunc = nla_data(a);
+
+ if (trunc->max_len < ETH_HLEN)
+ return -EINVAL;
+ break;
+ }
+
case OVS_ACTION_ATTR_HASH: {
const struct ovs_action_hash *act_hash = nla_data(a);
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 2ee48e447b72..434e04c3a189 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -195,7 +195,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
}
vport->dev = alloc_netdev(sizeof(struct internal_dev),
- parms->name, NET_NAME_UNKNOWN, do_setup);
+ parms->name, NET_NAME_USER, do_setup);
if (!vport->dev) {
err = -ENOMEM;
goto error_free_vport;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 31cbc8c5c7db..6b21fd068d87 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -444,6 +444,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
OVS_CB(skb)->input_vport = vport;
OVS_CB(skb)->mru = 0;
+ OVS_CB(skb)->cutlen = 0;
if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
u32 mark;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9bff6ef16fa7..d1f3b9e977e5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1979,40 +1979,8 @@ static int __packet_rcv_vnet(const struct sk_buff *skb,
{
*vnet_hdr = (const struct virtio_net_hdr) { 0 };
- if (skb_is_gso(skb)) {
- struct skb_shared_info *sinfo = skb_shinfo(skb);
-
- /* This is a hint as to how much should be linear. */
- vnet_hdr->hdr_len =
- __cpu_to_virtio16(vio_le(), skb_headlen(skb));
- vnet_hdr->gso_size =
- __cpu_to_virtio16(vio_le(), sinfo->gso_size);
-
- if (sinfo->gso_type & SKB_GSO_TCPV4)
- vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- else if (sinfo->gso_type & SKB_GSO_TCPV6)
- vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
- else if (sinfo->gso_type & SKB_GSO_UDP)
- vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
- else if (sinfo->gso_type & SKB_GSO_FCOE)
- return -EINVAL;
- else
- BUG();
-
- if (sinfo->gso_type & SKB_GSO_TCP_ECN)
- vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
- } else
- vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- vnet_hdr->csum_start = __cpu_to_virtio16(vio_le(),
- skb_checksum_start_offset(skb));
- vnet_hdr->csum_offset = __cpu_to_virtio16(vio_le(),
- skb->csum_offset);
- } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
- } /* else everything is zero */
+ if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le()))
+ BUG();
return 0;
}
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 6641bcf7c185..8398fee7c866 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -235,7 +235,8 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
* therefore trigger warnings.
* Defer the xmit to rds_send_worker() instead.
*/
- queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+ queue_delayed_work(rds_wq,
+ &conn->c_path[0].cp_send_w, 0);
}
}
diff --git a/net/rds/connection.c b/net/rds/connection.c
index e3b118cae81d..a4b07c899d89 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -95,14 +95,16 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
* and receiving over this connection again in the future. It is up to
* the transport to have serialized this call with its send and recv.
*/
-static void rds_conn_reset(struct rds_connection *conn)
+static void rds_conn_path_reset(struct rds_conn_path *cp)
{
+ struct rds_connection *conn = cp->cp_conn;
+
rdsdebug("connection %pI4 to %pI4 reset\n",
&conn->c_laddr, &conn->c_faddr);
rds_stats_inc(s_conn_reset);
- rds_send_reset(conn);
- conn->c_flags = 0;
+ rds_send_path_reset(cp);
+ cp->cp_flags = 0;
/* Do not clear next_rx_seq here, else we cannot distinguish
* retransmitted packets from new packets, and will hand all
@@ -110,6 +112,32 @@ static void rds_conn_reset(struct rds_connection *conn)
* reliability guarantees of RDS. */
}
+static void __rds_conn_path_init(struct rds_connection *conn,
+ struct rds_conn_path *cp, bool is_outgoing)
+{
+ spin_lock_init(&cp->cp_lock);
+ cp->cp_next_tx_seq = 1;
+ init_waitqueue_head(&cp->cp_waitq);
+ INIT_LIST_HEAD(&cp->cp_send_queue);
+ INIT_LIST_HEAD(&cp->cp_retrans);
+
+ cp->cp_conn = conn;
+ atomic_set(&cp->cp_state, RDS_CONN_DOWN);
+ cp->cp_send_gen = 0;
+ /* cp_outgoing is per-path. So we can only set it here
+ * for the single-path transports.
+ */
+ if (!conn->c_trans->t_mp_capable)
+ cp->cp_outgoing = (is_outgoing ? 1 : 0);
+ cp->cp_reconnect_jiffies = 0;
+ INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker);
+ INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker);
+ INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker);
+ INIT_WORK(&cp->cp_down_w, rds_shutdown_worker);
+ mutex_init(&cp->cp_cm_lock);
+ cp->cp_flags = 0;
+}
+
/*
* There is only every one 'conn' for a given pair of addresses in the
* system at a time. They contain messages to be retransmitted and so
@@ -153,13 +181,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
INIT_HLIST_NODE(&conn->c_hash_node);
conn->c_laddr = laddr;
conn->c_faddr = faddr;
- spin_lock_init(&conn->c_lock);
- conn->c_next_tx_seq = 1;
- rds_conn_net_set(conn, net);
- init_waitqueue_head(&conn->c_waitq);
- INIT_LIST_HEAD(&conn->c_send_queue);
- INIT_LIST_HEAD(&conn->c_retrans);
+ rds_conn_net_set(conn, net);
ret = rds_cong_get_maps(conn);
if (ret) {
@@ -195,17 +218,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
goto out;
}
- atomic_set(&conn->c_state, RDS_CONN_DOWN);
- conn->c_send_gen = 0;
- conn->c_outgoing = (is_outgoing ? 1 : 0);
- conn->c_reconnect_jiffies = 0;
- INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
- INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
- INIT_DELAYED_WORK(&conn->c_conn_w, rds_connect_worker);
- INIT_WORK(&conn->c_down_w, rds_shutdown_worker);
- mutex_init(&conn->c_cm_lock);
- conn->c_flags = 0;
-
rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
conn, &laddr, &faddr,
trans->t_name ? trans->t_name : "[unknown]",
@@ -222,7 +234,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
if (parent) {
/* Creating passive conn */
if (parent->c_passive) {
- trans->conn_free(conn->c_transport_data);
+ trans->conn_free(conn->c_path[0].cp_transport_data);
kmem_cache_free(rds_conn_slab, conn);
conn = parent->c_passive;
} else {
@@ -236,10 +248,26 @@ static struct rds_connection *__rds_conn_create(struct net *net,
found = rds_conn_lookup(net, head, laddr, faddr, trans);
if (found) {
- trans->conn_free(conn->c_transport_data);
+ struct rds_conn_path *cp;
+ int i;
+
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ cp = &conn->c_path[i];
+ trans->conn_free(cp->cp_transport_data);
+ if (!trans->t_mp_capable)
+ break;
+ }
kmem_cache_free(rds_conn_slab, conn);
conn = found;
} else {
+ int i;
+
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ __rds_conn_path_init(conn, &conn->c_path[i],
+ is_outgoing);
+ conn->c_path[i].cp_index = i;
+ }
+
hlist_add_head_rcu(&conn->c_hash_node, head);
rds_cong_add_conn(conn);
rds_conn_count++;
@@ -267,10 +295,12 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
}
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
-void rds_conn_shutdown(struct rds_connection *conn)
+void rds_conn_shutdown(struct rds_conn_path *cp)
{
+ struct rds_connection *conn = cp->cp_conn;
+
/* shut it down unless it's down already */
- if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
+ if (!rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
/*
* Quiesce the connection mgmt handlers before we start tearing
* things down. We don't hold the mutex for the entire
@@ -278,35 +308,41 @@ void rds_conn_shutdown(struct rds_connection *conn)
* deadlocking with the CM handler. Instead, the CM event
* handler is supposed to check for state DISCONNECTING
*/
- mutex_lock(&conn->c_cm_lock);
- if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
- && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
- rds_conn_error(conn, "shutdown called in state %d\n",
- atomic_read(&conn->c_state));
- mutex_unlock(&conn->c_cm_lock);
+ mutex_lock(&cp->cp_cm_lock);
+ if (!rds_conn_path_transition(cp, RDS_CONN_UP,
+ RDS_CONN_DISCONNECTING) &&
+ !rds_conn_path_transition(cp, RDS_CONN_ERROR,
+ RDS_CONN_DISCONNECTING)) {
+ rds_conn_path_error(cp,
+ "shutdown called in state %d\n",
+ atomic_read(&cp->cp_state));
+ mutex_unlock(&cp->cp_cm_lock);
return;
}
- mutex_unlock(&conn->c_cm_lock);
+ mutex_unlock(&cp->cp_cm_lock);
- wait_event(conn->c_waitq,
- !test_bit(RDS_IN_XMIT, &conn->c_flags));
- wait_event(conn->c_waitq,
- !test_bit(RDS_RECV_REFILL, &conn->c_flags));
+ wait_event(cp->cp_waitq,
+ !test_bit(RDS_IN_XMIT, &cp->cp_flags));
+ wait_event(cp->cp_waitq,
+ !test_bit(RDS_RECV_REFILL, &cp->cp_flags));
- conn->c_trans->conn_shutdown(conn);
- rds_conn_reset(conn);
+ if (!conn->c_trans->t_mp_capable)
+ conn->c_trans->conn_shutdown(conn);
+ else
+ conn->c_trans->conn_path_shutdown(cp);
+ rds_conn_path_reset(cp);
- if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
+ if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING,
+ RDS_CONN_DOWN)) {
/* This can happen - eg when we're in the middle of tearing
* down the connection, and someone unloads the rds module.
* Quite reproduceable with loopback connections.
* Mostly harmless.
*/
- rds_conn_error(conn,
- "%s: failed to transition to state DOWN, "
- "current state is %d\n",
- __func__,
- atomic_read(&conn->c_state));
+ rds_conn_path_error(cp, "%s: failed to transition "
+ "to state DOWN, current state "
+ "is %d\n", __func__,
+ atomic_read(&cp->cp_state));
return;
}
}
@@ -315,18 +351,46 @@ void rds_conn_shutdown(struct rds_connection *conn)
* The passive side of an IB loopback connection is never added
* to the conn hash, so we never trigger a reconnect on this
* conn - the reconnect is always triggered by the active peer. */
- cancel_delayed_work_sync(&conn->c_conn_w);
+ cancel_delayed_work_sync(&cp->cp_conn_w);
rcu_read_lock();
if (!hlist_unhashed(&conn->c_hash_node)) {
rcu_read_unlock();
if (conn->c_trans->t_type != RDS_TRANS_TCP ||
- conn->c_outgoing == 1)
- rds_queue_reconnect(conn);
+ cp->cp_outgoing == 1)
+ rds_queue_reconnect(cp);
} else {
rcu_read_unlock();
}
}
+/* destroy a single rds_conn_path. rds_conn_destroy() iterates over
+ * all paths using rds_conn_path_destroy()
+ */
+static void rds_conn_path_destroy(struct rds_conn_path *cp)
+{
+ struct rds_message *rm, *rtmp;
+
+ rds_conn_path_drop(cp);
+ flush_work(&cp->cp_down_w);
+
+ /* make sure lingering queued work won't try to ref the conn */
+ cancel_delayed_work_sync(&cp->cp_send_w);
+ cancel_delayed_work_sync(&cp->cp_recv_w);
+
+ /* tear down queued messages */
+ list_for_each_entry_safe(rm, rtmp,
+ &cp->cp_send_queue,
+ m_conn_item) {
+ list_del_init(&rm->m_conn_item);
+ BUG_ON(!list_empty(&rm->m_sock_item));
+ rds_message_put(rm);
+ }
+ if (cp->cp_xmit_rm)
+ rds_message_put(cp->cp_xmit_rm);
+
+ cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);
+}
+
/*
* Stop and free a connection.
*
@@ -336,7 +400,6 @@ void rds_conn_shutdown(struct rds_connection *conn)
*/
void rds_conn_destroy(struct rds_connection *conn)
{
- struct rds_message *rm, *rtmp;
unsigned long flags;
rdsdebug("freeing conn %p for %pI4 -> "
@@ -350,25 +413,19 @@ void rds_conn_destroy(struct rds_connection *conn)
synchronize_rcu();
/* shut the connection down */
- rds_conn_drop(conn);
- flush_work(&conn->c_down_w);
-
- /* make sure lingering queued work won't try to ref the conn */
- cancel_delayed_work_sync(&conn->c_send_w);
- cancel_delayed_work_sync(&conn->c_recv_w);
+ if (!conn->c_trans->t_mp_capable) {
+ rds_conn_path_destroy(&conn->c_path[0]);
+ BUG_ON(!list_empty(&conn->c_path[0].cp_retrans));
+ } else {
+ int i;
+ struct rds_conn_path *cp;
- /* tear down queued messages */
- list_for_each_entry_safe(rm, rtmp,
- &conn->c_send_queue,
- m_conn_item) {
- list_del_init(&rm->m_conn_item);
- BUG_ON(!list_empty(&rm->m_sock_item));
- rds_message_put(rm);
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ cp = &conn->c_path[i];
+ rds_conn_path_destroy(cp);
+ BUG_ON(!list_empty(&cp->cp_retrans));
+ }
}
- if (conn->c_xmit_rm)
- rds_message_put(conn->c_xmit_rm);
-
- conn->c_trans->conn_free(conn->c_transport_data);
/*
* The congestion maps aren't freed up here. They're
@@ -377,7 +434,6 @@ void rds_conn_destroy(struct rds_connection *conn)
*/
rds_cong_remove_conn(conn);
- BUG_ON(!list_empty(&conn->c_retrans));
kmem_cache_free(rds_conn_slab, conn);
spin_lock_irqsave(&rds_conn_lock, flags);
@@ -398,6 +454,7 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
unsigned int total = 0;
unsigned long flags;
size_t i;
+ int j;
len /= sizeof(struct rds_info_message);
@@ -406,23 +463,32 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
i++, head++) {
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
- if (want_send)
- list = &conn->c_send_queue;
- else
- list = &conn->c_retrans;
-
- spin_lock_irqsave(&conn->c_lock, flags);
-
- /* XXX too lazy to maintain counts.. */
- list_for_each_entry(rm, list, m_conn_item) {
- total++;
- if (total <= len)
- rds_inc_info_copy(&rm->m_inc, iter,
- conn->c_laddr,
- conn->c_faddr, 0);
+ struct rds_conn_path *cp;
+
+ for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+ cp = &conn->c_path[j];
+ if (want_send)
+ list = &cp->cp_send_queue;
+ else
+ list = &cp->cp_retrans;
+
+ spin_lock_irqsave(&cp->cp_lock, flags);
+
+ /* XXX too lazy to maintain counts.. */
+ list_for_each_entry(rm, list, m_conn_item) {
+ total++;
+ if (total <= len)
+ rds_inc_info_copy(&rm->m_inc,
+ iter,
+ conn->c_laddr,
+ conn->c_faddr,
+ 0);
+ }
+
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
+ if (!conn->c_trans->t_mp_capable)
+ break;
}
-
- spin_unlock_irqrestore(&conn->c_lock, flags);
}
}
rcu_read_unlock();
@@ -484,27 +550,72 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
}
EXPORT_SYMBOL_GPL(rds_for_each_conn_info);
-static int rds_conn_info_visitor(struct rds_connection *conn,
- void *buffer)
+void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens,
+ int (*visitor)(struct rds_conn_path *, void *),
+ size_t item_len)
+{
+ u64 buffer[(item_len + 7) / 8];
+ struct hlist_head *head;
+ struct rds_connection *conn;
+ size_t i;
+ int j;
+
+ rcu_read_lock();
+
+ lens->nr = 0;
+ lens->each = item_len;
+
+ for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
+ i++, head++) {
+ hlist_for_each_entry_rcu(conn, head, c_hash_node) {
+ struct rds_conn_path *cp;
+
+ for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+ cp = &conn->c_path[j];
+
+ /* XXX no cp_lock usage.. */
+ if (!visitor(cp, buffer))
+ continue;
+ if (!conn->c_trans->t_mp_capable)
+ break;
+ }
+
+ /* We copy as much as we can fit in the buffer,
+ * but we count all items so that the caller
+ * can resize the buffer.
+ */
+ if (len >= item_len) {
+ rds_info_copy(iter, buffer, item_len);
+ len -= item_len;
+ }
+ lens->nr++;
+ }
+ }
+ rcu_read_unlock();
+}
+
+static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
{
struct rds_info_connection *cinfo = buffer;
- cinfo->next_tx_seq = conn->c_next_tx_seq;
- cinfo->next_rx_seq = conn->c_next_rx_seq;
- cinfo->laddr = conn->c_laddr;
- cinfo->faddr = conn->c_faddr;
- strncpy(cinfo->transport, conn->c_trans->t_name,
+ cinfo->next_tx_seq = cp->cp_next_tx_seq;
+ cinfo->next_rx_seq = cp->cp_next_rx_seq;
+ cinfo->laddr = cp->cp_conn->c_laddr;
+ cinfo->faddr = cp->cp_conn->c_faddr;
+ strncpy(cinfo->transport, cp->cp_conn->c_trans->t_name,
sizeof(cinfo->transport));
cinfo->flags = 0;
- rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &conn->c_flags),
+ rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags),
SENDING);
/* XXX Future: return the state rather than these funky bits */
rds_conn_info_set(cinfo->flags,
- atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
+ atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING,
CONNECTING);
rds_conn_info_set(cinfo->flags,
- atomic_read(&conn->c_state) == RDS_CONN_UP,
+ atomic_read(&cp->cp_state) == RDS_CONN_UP,
CONNECTED);
return 1;
}
@@ -513,7 +624,7 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
- rds_for_each_conn_info(sock, len, iter, lens,
+ rds_walk_conn_path_info(sock, len, iter, lens,
rds_conn_info_visitor,
sizeof(struct rds_info_connection));
}
@@ -553,10 +664,16 @@ void rds_conn_exit(void)
/*
* Force a disconnect
*/
+void rds_conn_path_drop(struct rds_conn_path *cp)
+{
+ atomic_set(&cp->cp_state, RDS_CONN_ERROR);
+ queue_work(rds_wq, &cp->cp_down_w);
+}
+EXPORT_SYMBOL_GPL(rds_conn_path_drop);
+
void rds_conn_drop(struct rds_connection *conn)
{
- atomic_set(&conn->c_state, RDS_CONN_ERROR);
- queue_work(rds_wq, &conn->c_down_w);
+ rds_conn_path_drop(&conn->c_path[0]);
}
EXPORT_SYMBOL_GPL(rds_conn_drop);
@@ -564,11 +681,17 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
* If the connection is down, trigger a connect. We may have scheduled a
* delayed reconnect however - in this case we should not interfere.
*/
+void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
+{
+ if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
+ !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
+ queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
+}
+
void rds_conn_connect_if_down(struct rds_connection *conn)
{
- if (rds_conn_state(conn) == RDS_CONN_DOWN &&
- !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
- queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
+ WARN_ON(conn->c_trans->t_mp_capable);
+ rds_conn_path_connect_if_down(&conn->c_path[0]);
}
EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
@@ -586,3 +709,15 @@ __rds_conn_error(struct rds_connection *conn, const char *fmt, ...)
rds_conn_drop(conn);
}
+
+void
+__rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vprintk(fmt, ap);
+ va_end(ap);
+
+ rds_conn_path_drop(cp);
+}
diff --git a/net/rds/ib.c b/net/rds/ib.c
index b5342fddaf98..44946a681a8c 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -40,6 +40,7 @@
#include <linux/slab.h>
#include <linux/module.h>
+#include "rds_single_path.h"
#include "rds.h"
#include "ib.h"
#include "ib_mr.h"
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 7c2a65a6af5c..e48bb1ba3dfc 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -36,6 +36,7 @@
#include <linux/vmalloc.h>
#include <linux/ratelimit.h>
+#include "rds_single_path.h"
#include "rds.h"
#include "ib.h"
@@ -273,7 +274,7 @@ static void rds_ib_tasklet_fn_send(unsigned long data)
if (rds_conn_up(conn) &&
(!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
test_bit(0, &conn->c_map_queued)))
- rds_send_xmit(ic->conn);
+ rds_send_xmit(&ic->conn->c_path[0]);
}
static void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq,
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index f7164ac1ffc1..977f69886c00 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -35,6 +35,7 @@
#include <linux/rculist.h>
#include <linux/llist.h>
+#include "rds_single_path.h"
#include "ib_mr.h"
struct workqueue_struct *rds_ib_mr_wq;
@@ -618,7 +619,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
int rds_ib_mr_init(void)
{
- rds_ib_mr_wq = create_workqueue("rds_mr_flushd");
+ rds_ib_mr_wq = alloc_workqueue("rds_mr_flushd", WQ_MEM_RECLAIM, 0);
if (!rds_ib_mr_wq)
return -ENOMEM;
return 0;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index abc8cc805e8d..4ea8cb17cc7a 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -36,6 +36,7 @@
#include <linux/dma-mapping.h>
#include <rdma/rdma_cm.h>
+#include "rds_single_path.h"
#include "rds.h"
#include "ib.h"
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index f27d2c82b036..6e4110aa5135 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -36,6 +36,7 @@
#include <linux/dmapool.h>
#include <linux/ratelimit.h>
+#include "rds_single_path.h"
#include "rds.h"
#include "ib.h"
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 814173b466d9..15f83db78f0c 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -34,6 +34,7 @@
#include <linux/slab.h>
#include <linux/in.h>
+#include "rds_single_path.h"
#include "rds.h"
#include "loop.h"
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 7220bebcf558..345f09059e9f 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -33,6 +33,7 @@
#include <linux/module.h>
#include <rdma/rdma_cm.h>
+#include "rds_single_path.h"
#include "rdma_transport.h"
#include "ib.h"
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 387df5f32e49..2e35b738176f 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -84,56 +84,69 @@ enum {
#define RDS_IN_XMIT 2
#define RDS_RECV_REFILL 3
+/* Max number of multipaths per RDS connection. Must be a power of 2 */
+#define RDS_MPATH_WORKERS 1
+
+/* Per mpath connection state */
+struct rds_conn_path {
+ struct rds_connection *cp_conn;
+ struct rds_message *cp_xmit_rm;
+ unsigned long cp_xmit_sg;
+ unsigned int cp_xmit_hdr_off;
+ unsigned int cp_xmit_data_off;
+ unsigned int cp_xmit_atomic_sent;
+ unsigned int cp_xmit_rdma_sent;
+ unsigned int cp_xmit_data_sent;
+
+ spinlock_t cp_lock; /* protect msg queues */
+ u64 cp_next_tx_seq;
+ struct list_head cp_send_queue;
+ struct list_head cp_retrans;
+
+ u64 cp_next_rx_seq;
+
+ void *cp_transport_data;
+
+ atomic_t cp_state;
+ unsigned long cp_send_gen;
+ unsigned long cp_flags;
+ unsigned long cp_reconnect_jiffies;
+ struct delayed_work cp_send_w;
+ struct delayed_work cp_recv_w;
+ struct delayed_work cp_conn_w;
+ struct work_struct cp_down_w;
+ struct mutex cp_cm_lock; /* protect cp_state & cm */
+ wait_queue_head_t cp_waitq;
+
+ unsigned int cp_unacked_packets;
+ unsigned int cp_unacked_bytes;
+ unsigned int cp_outgoing:1,
+ cp_pad_to_32:31;
+ unsigned int cp_index;
+};
+
+/* One rds_connection per RDS address pair */
struct rds_connection {
struct hlist_node c_hash_node;
__be32 c_laddr;
__be32 c_faddr;
unsigned int c_loopback:1,
- c_outgoing:1,
- c_pad_to_32:30;
+ c_pad_to_32:31;
+ int c_npaths;
struct rds_connection *c_passive;
+ struct rds_transport *c_trans;
struct rds_cong_map *c_lcong;
struct rds_cong_map *c_fcong;
- struct rds_message *c_xmit_rm;
- unsigned long c_xmit_sg;
- unsigned int c_xmit_hdr_off;
- unsigned int c_xmit_data_off;
- unsigned int c_xmit_atomic_sent;
- unsigned int c_xmit_rdma_sent;
- unsigned int c_xmit_data_sent;
-
- spinlock_t c_lock; /* protect msg queues */
- u64 c_next_tx_seq;
- struct list_head c_send_queue;
- struct list_head c_retrans;
-
- u64 c_next_rx_seq;
-
- struct rds_transport *c_trans;
- void *c_transport_data;
-
- atomic_t c_state;
- unsigned long c_send_gen;
- unsigned long c_flags;
- unsigned long c_reconnect_jiffies;
- struct delayed_work c_send_w;
- struct delayed_work c_recv_w;
- struct delayed_work c_conn_w;
- struct work_struct c_down_w;
- struct mutex c_cm_lock; /* protect conn state & cm */
- wait_queue_head_t c_waitq;
+ /* Protocol version */
+ unsigned int c_version;
+ possible_net_t c_net;
struct list_head c_map_item;
unsigned long c_map_queued;
- unsigned int c_unacked_packets;
- unsigned int c_unacked_bytes;
-
- /* Protocol version */
- unsigned int c_version;
- possible_net_t c_net;
+ struct rds_conn_path c_path[RDS_MPATH_WORKERS];
};
static inline
@@ -218,6 +231,7 @@ struct rds_incoming {
atomic_t i_refcount;
struct list_head i_item;
struct rds_connection *i_conn;
+ struct rds_conn_path *i_conn_path;
struct rds_header i_hdr;
unsigned long i_rx_jiffies;
__be32 i_saddr;
@@ -433,7 +447,8 @@ struct rds_transport {
char t_name[TRANSNAMSIZ];
struct list_head t_item;
struct module *t_owner;
- unsigned int t_prefer_loopback:1;
+ unsigned int t_prefer_loopback:1,
+ t_mp_capable:1;
unsigned int t_type;
int (*laddr_check)(struct net *net, __be32 addr);
@@ -441,8 +456,11 @@ struct rds_transport {
void (*conn_free)(void *data);
int (*conn_connect)(struct rds_connection *conn);
void (*conn_shutdown)(struct rds_connection *conn);
+ void (*conn_path_shutdown)(struct rds_conn_path *conn);
void (*xmit_prepare)(struct rds_connection *conn);
+ void (*xmit_path_prepare)(struct rds_conn_path *cp);
void (*xmit_complete)(struct rds_connection *conn);
+ void (*xmit_path_complete)(struct rds_conn_path *cp);
int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
unsigned int hdr_off, unsigned int sg, unsigned int off);
int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
@@ -636,10 +654,12 @@ struct rds_connection *rds_conn_create(struct net *net,
struct rds_connection *rds_conn_create_outgoing(struct net *net,
__be32 laddr, __be32 faddr,
struct rds_transport *trans, gfp_t gfp);
-void rds_conn_shutdown(struct rds_connection *conn);
+void rds_conn_shutdown(struct rds_conn_path *cpath);
void rds_conn_destroy(struct rds_connection *conn);
void rds_conn_drop(struct rds_connection *conn);
+void rds_conn_path_drop(struct rds_conn_path *cpath);
void rds_conn_connect_if_down(struct rds_connection *conn);
+void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
void rds_for_each_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
@@ -650,28 +670,60 @@ void __rds_conn_error(struct rds_connection *conn, const char *, ...);
#define rds_conn_error(conn, fmt...) \
__rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
+void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
+#define rds_conn_path_error(cp, fmt...) \
+ __rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt)
+
+static inline int
+rds_conn_path_transition(struct rds_conn_path *cp, int old, int new)
+{
+ return atomic_cmpxchg(&cp->cp_state, old, new) == old;
+}
+
static inline int
rds_conn_transition(struct rds_connection *conn, int old, int new)
{
- return atomic_cmpxchg(&conn->c_state, old, new) == old;
+ WARN_ON(conn->c_trans->t_mp_capable);
+ return rds_conn_path_transition(&conn->c_path[0], old, new);
+}
+
+static inline int
+rds_conn_path_state(struct rds_conn_path *cp)
+{
+ return atomic_read(&cp->cp_state);
}
static inline int
rds_conn_state(struct rds_connection *conn)
{
- return atomic_read(&conn->c_state);
+ WARN_ON(conn->c_trans->t_mp_capable);
+ return rds_conn_path_state(&conn->c_path[0]);
+}
+
+static inline int
+rds_conn_path_up(struct rds_conn_path *cp)
+{
+ return atomic_read(&cp->cp_state) == RDS_CONN_UP;
}
static inline int
rds_conn_up(struct rds_connection *conn)
{
- return atomic_read(&conn->c_state) == RDS_CONN_UP;
+ WARN_ON(conn->c_trans->t_mp_capable);
+ return rds_conn_path_up(&conn->c_path[0]);
+}
+
+static inline int
+rds_conn_path_connecting(struct rds_conn_path *cp)
+{
+ return atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING;
}
static inline int
rds_conn_connecting(struct rds_connection *conn)
{
- return atomic_read(&conn->c_state) == RDS_CONN_CONNECTING;
+ WARN_ON(conn->c_trans->t_mp_capable);
+ return rds_conn_path_connecting(&conn->c_path[0]);
}
/* message.c */
@@ -720,6 +772,8 @@ void rds_page_exit(void);
/* recv.c */
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
__be32 saddr);
+void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *conn,
+ __be32 saddr);
void rds_inc_put(struct rds_incoming *inc);
void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
struct rds_incoming *inc, gfp_t gfp);
@@ -733,16 +787,16 @@ void rds_inc_info_copy(struct rds_incoming *inc,
/* send.c */
int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
-void rds_send_reset(struct rds_connection *conn);
-int rds_send_xmit(struct rds_connection *conn);
+void rds_send_path_reset(struct rds_conn_path *conn);
+int rds_send_xmit(struct rds_conn_path *cp);
struct sockaddr_in;
void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
is_acked_func is_acked);
-int rds_send_pong(struct rds_connection *conn, __be16 dport);
-struct rds_message *rds_send_get_message(struct rds_connection *,
- struct rm_rdma_op *);
+void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack,
+ is_acked_func is_acked);
+int rds_send_pong(struct rds_conn_path *cp, __be16 dport);
/* rdma.c */
void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
@@ -809,12 +863,12 @@ extern unsigned int rds_sysctl_trace_level;
int rds_threads_init(void);
void rds_threads_exit(void);
extern struct workqueue_struct *rds_wq;
-void rds_queue_reconnect(struct rds_connection *conn);
+void rds_queue_reconnect(struct rds_conn_path *cp);
void rds_connect_worker(struct work_struct *);
void rds_shutdown_worker(struct work_struct *);
void rds_send_worker(struct work_struct *);
void rds_recv_worker(struct work_struct *);
-void rds_connect_path_complete(struct rds_connection *conn, int curr);
+void rds_connect_path_complete(struct rds_conn_path *conn, int curr);
void rds_connect_complete(struct rds_connection *conn);
/* transport.c */
diff --git a/net/rds/rds_single_path.h b/net/rds/rds_single_path.h
new file mode 100644
index 000000000000..e1241af7c1ad
--- /dev/null
+++ b/net/rds/rds_single_path.h
@@ -0,0 +1,30 @@
+#ifndef _RDS_RDS_SINGLE_H
+#define _RDS_RDS_SINGLE_H
+
+#define c_xmit_rm c_path[0].cp_xmit_rm
+#define c_xmit_sg c_path[0].cp_xmit_sg
+#define c_xmit_hdr_off c_path[0].cp_xmit_hdr_off
+#define c_xmit_data_off c_path[0].cp_xmit_data_off
+#define c_xmit_atomic_sent c_path[0].cp_xmit_atomic_sent
+#define c_xmit_rdma_sent c_path[0].cp_xmit_rdma_sent
+#define c_xmit_data_sent c_path[0].cp_xmit_data_sent
+#define c_lock c_path[0].cp_lock
+#define c_next_tx_seq c_path[0].cp_next_tx_seq
+#define c_send_queue c_path[0].cp_send_queue
+#define c_retrans c_path[0].cp_retrans
+#define c_next_rx_seq c_path[0].cp_next_rx_seq
+#define c_transport_data c_path[0].cp_transport_data
+#define c_state c_path[0].cp_state
+#define c_send_gen c_path[0].cp_send_gen
+#define c_flags c_path[0].cp_flags
+#define c_reconnect_jiffies c_path[0].cp_reconnect_jiffies
+#define c_send_w c_path[0].cp_send_w
+#define c_recv_w c_path[0].cp_recv_w
+#define c_conn_w c_path[0].cp_conn_w
+#define c_down_w c_path[0].cp_down_w
+#define c_cm_lock c_path[0].cp_cm_lock
+#define c_waitq c_path[0].cp_waitq
+#define c_unacked_packets c_path[0].cp_unacked_packets
+#define c_unacked_bytes c_path[0].cp_unacked_bytes
+
+#endif /* _RDS_RDS_SINGLE_H */
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 8413f6c99e13..b58f50571782 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -53,6 +53,20 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
}
EXPORT_SYMBOL_GPL(rds_inc_init);
+void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
+ __be32 saddr)
+{
+ atomic_set(&inc->i_refcount, 1);
+ INIT_LIST_HEAD(&inc->i_item);
+ inc->i_conn = cp->cp_conn;
+ inc->i_conn_path = cp;
+ inc->i_saddr = saddr;
+ inc->i_rdma_cookie = 0;
+ inc->i_rx_tstamp.tv_sec = 0;
+ inc->i_rx_tstamp.tv_usec = 0;
+}
+EXPORT_SYMBOL_GPL(rds_inc_path_init);
+
static void rds_inc_addref(struct rds_incoming *inc)
{
rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount));
@@ -164,13 +178,18 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
struct rds_sock *rs = NULL;
struct sock *sk;
unsigned long flags;
+ struct rds_conn_path *cp;
inc->i_conn = conn;
inc->i_rx_jiffies = jiffies;
+ if (conn->c_trans->t_mp_capable)
+ cp = inc->i_conn_path;
+ else
+ cp = &conn->c_path[0];
rdsdebug("conn %p next %llu inc %p seq %llu len %u sport %u dport %u "
"flags 0x%x rx_jiffies %lu\n", conn,
- (unsigned long long)conn->c_next_rx_seq,
+ (unsigned long long)cp->cp_next_rx_seq,
inc,
(unsigned long long)be64_to_cpu(inc->i_hdr.h_sequence),
be32_to_cpu(inc->i_hdr.h_len),
@@ -199,16 +218,16 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
* XXX we could spend more on the wire to get more robust failure
* detection, arguably worth it to avoid data corruption.
*/
- if (be64_to_cpu(inc->i_hdr.h_sequence) < conn->c_next_rx_seq &&
+ if (be64_to_cpu(inc->i_hdr.h_sequence) < cp->cp_next_rx_seq &&
(inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) {
rds_stats_inc(s_recv_drop_old_seq);
goto out;
}
- conn->c_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
+ cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
rds_stats_inc(s_recv_ping);
- rds_send_pong(conn, inc->i_hdr.h_sport);
+ rds_send_pong(cp, inc->i_hdr.h_sport);
goto out;
}
diff --git a/net/rds/send.c b/net/rds/send.c
index b1962f8e30f7..ee43d6b2ea8f 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -62,14 +62,14 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status);
* Reset the send state. Callers must ensure that this doesn't race with
* rds_send_xmit().
*/
-void rds_send_reset(struct rds_connection *conn)
+void rds_send_path_reset(struct rds_conn_path *cp)
{
struct rds_message *rm, *tmp;
unsigned long flags;
- if (conn->c_xmit_rm) {
- rm = conn->c_xmit_rm;
- conn->c_xmit_rm = NULL;
+ if (cp->cp_xmit_rm) {
+ rm = cp->cp_xmit_rm;
+ cp->cp_xmit_rm = NULL;
/* Tell the user the RDMA op is no longer mapped by the
* transport. This isn't entirely true (it's flushed out
* independently) but as the connection is down, there's
@@ -78,37 +78,37 @@ void rds_send_reset(struct rds_connection *conn)
rds_message_put(rm);
}
- conn->c_xmit_sg = 0;
- conn->c_xmit_hdr_off = 0;
- conn->c_xmit_data_off = 0;
- conn->c_xmit_atomic_sent = 0;
- conn->c_xmit_rdma_sent = 0;
- conn->c_xmit_data_sent = 0;
+ cp->cp_xmit_sg = 0;
+ cp->cp_xmit_hdr_off = 0;
+ cp->cp_xmit_data_off = 0;
+ cp->cp_xmit_atomic_sent = 0;
+ cp->cp_xmit_rdma_sent = 0;
+ cp->cp_xmit_data_sent = 0;
- conn->c_map_queued = 0;
+ cp->cp_conn->c_map_queued = 0;
- conn->c_unacked_packets = rds_sysctl_max_unacked_packets;
- conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes;
+ cp->cp_unacked_packets = rds_sysctl_max_unacked_packets;
+ cp->cp_unacked_bytes = rds_sysctl_max_unacked_bytes;
/* Mark messages as retransmissions, and move them to the send q */
- spin_lock_irqsave(&conn->c_lock, flags);
- list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
+ spin_lock_irqsave(&cp->cp_lock, flags);
+ list_for_each_entry_safe(rm, tmp, &cp->cp_retrans, m_conn_item) {
set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
set_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags);
}
- list_splice_init(&conn->c_retrans, &conn->c_send_queue);
- spin_unlock_irqrestore(&conn->c_lock, flags);
+ list_splice_init(&cp->cp_retrans, &cp->cp_send_queue);
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
}
-EXPORT_SYMBOL_GPL(rds_send_reset);
+EXPORT_SYMBOL_GPL(rds_send_path_reset);
-static int acquire_in_xmit(struct rds_connection *conn)
+static int acquire_in_xmit(struct rds_conn_path *cp)
{
- return test_and_set_bit(RDS_IN_XMIT, &conn->c_flags) == 0;
+ return test_and_set_bit(RDS_IN_XMIT, &cp->cp_flags) == 0;
}
-static void release_in_xmit(struct rds_connection *conn)
+static void release_in_xmit(struct rds_conn_path *cp)
{
- clear_bit(RDS_IN_XMIT, &conn->c_flags);
+ clear_bit(RDS_IN_XMIT, &cp->cp_flags);
smp_mb__after_atomic();
/*
* We don't use wait_on_bit()/wake_up_bit() because our waking is in a
@@ -116,8 +116,8 @@ static void release_in_xmit(struct rds_connection *conn)
* the system-wide hashed waitqueue buckets in the fast path only to
* almost never find waiters.
*/
- if (waitqueue_active(&conn->c_waitq))
- wake_up_all(&conn->c_waitq);
+ if (waitqueue_active(&cp->cp_waitq))
+ wake_up_all(&cp->cp_waitq);
}
/*
@@ -134,8 +134,9 @@ static void release_in_xmit(struct rds_connection *conn)
* - small message latency is higher behind queued large messages
* - large message latency isn't starved by intervening small sends
*/
-int rds_send_xmit(struct rds_connection *conn)
+int rds_send_xmit(struct rds_conn_path *cp)
{
+ struct rds_connection *conn = cp->cp_conn;
struct rds_message *rm;
unsigned long flags;
unsigned int tmp;
@@ -155,7 +156,7 @@ restart:
* avoids blocking the caller and trading per-connection data between
* caches per message.
*/
- if (!acquire_in_xmit(conn)) {
+ if (!acquire_in_xmit(cp)) {
rds_stats_inc(s_send_lock_contention);
ret = -ENOMEM;
goto out;
@@ -169,21 +170,25 @@ restart:
* The acquire_in_xmit() check above ensures that only one
* caller can increment c_send_gen at any time.
*/
- conn->c_send_gen++;
- send_gen = conn->c_send_gen;
+ cp->cp_send_gen++;
+ send_gen = cp->cp_send_gen;
/*
* rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
* we do the opposite to avoid races.
*/
- if (!rds_conn_up(conn)) {
- release_in_xmit(conn);
+ if (!rds_conn_path_up(cp)) {
+ release_in_xmit(cp);
ret = 0;
goto out;
}
- if (conn->c_trans->xmit_prepare)
+ if (conn->c_trans->t_mp_capable) {
+ if (conn->c_trans->xmit_path_prepare)
+ conn->c_trans->xmit_path_prepare(cp);
+ } else if (conn->c_trans->xmit_prepare) {
conn->c_trans->xmit_prepare(conn);
+ }
/*
* spin trying to push headers and data down the connection until
@@ -191,7 +196,7 @@ restart:
*/
while (1) {
- rm = conn->c_xmit_rm;
+ rm = cp->cp_xmit_rm;
/*
* If between sending messages, we can send a pending congestion
@@ -204,14 +209,16 @@ restart:
break;
}
rm->data.op_active = 1;
+ rm->m_inc.i_conn_path = cp;
+ rm->m_inc.i_conn = cp->cp_conn;
- conn->c_xmit_rm = rm;
+ cp->cp_xmit_rm = rm;
}
/*
* If not already working on one, grab the next message.
*
- * c_xmit_rm holds a ref while we're sending this message down
+ * cp_xmit_rm holds a ref while we're sending this message down
* the connction. We can use this ref while holding the
* send_sem.. rds_send_reset() is serialized with it.
*/
@@ -228,10 +235,10 @@ restart:
if (batch_count >= send_batch_count)
goto over_batch;
- spin_lock_irqsave(&conn->c_lock, flags);
+ spin_lock_irqsave(&cp->cp_lock, flags);
- if (!list_empty(&conn->c_send_queue)) {
- rm = list_entry(conn->c_send_queue.next,
+ if (!list_empty(&cp->cp_send_queue)) {
+ rm = list_entry(cp->cp_send_queue.next,
struct rds_message,
m_conn_item);
rds_message_addref(rm);
@@ -240,10 +247,11 @@ restart:
* Move the message from the send queue to the retransmit
* list right away.
*/
- list_move_tail(&rm->m_conn_item, &conn->c_retrans);
+ list_move_tail(&rm->m_conn_item,
+ &cp->cp_retrans);
}
- spin_unlock_irqrestore(&conn->c_lock, flags);
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
if (!rm)
break;
@@ -257,32 +265,34 @@ restart:
*/
if (rm->rdma.op_active &&
test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
- spin_lock_irqsave(&conn->c_lock, flags);
+ spin_lock_irqsave(&cp->cp_lock, flags);
if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
list_move(&rm->m_conn_item, &to_be_dropped);
- spin_unlock_irqrestore(&conn->c_lock, flags);
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
continue;
}
/* Require an ACK every once in a while */
len = ntohl(rm->m_inc.i_hdr.h_len);
- if (conn->c_unacked_packets == 0 ||
- conn->c_unacked_bytes < len) {
+ if (cp->cp_unacked_packets == 0 ||
+ cp->cp_unacked_bytes < len) {
__set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
- conn->c_unacked_packets = rds_sysctl_max_unacked_packets;
- conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes;
+ cp->cp_unacked_packets =
+ rds_sysctl_max_unacked_packets;
+ cp->cp_unacked_bytes =
+ rds_sysctl_max_unacked_bytes;
rds_stats_inc(s_send_ack_required);
} else {
- conn->c_unacked_bytes -= len;
- conn->c_unacked_packets--;
+ cp->cp_unacked_bytes -= len;
+ cp->cp_unacked_packets--;
}
- conn->c_xmit_rm = rm;
+ cp->cp_xmit_rm = rm;
}
/* The transport either sends the whole rdma or none of it */
- if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) {
+ if (rm->rdma.op_active && !cp->cp_xmit_rdma_sent) {
rm->m_final_op = &rm->rdma;
/* The transport owns the mapped memory for now.
* You can't unmap it while it's on the send queue
@@ -294,11 +304,11 @@ restart:
wake_up_interruptible(&rm->m_flush_wait);
break;
}
- conn->c_xmit_rdma_sent = 1;
+ cp->cp_xmit_rdma_sent = 1;
}
- if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
+ if (rm->atomic.op_active && !cp->cp_xmit_atomic_sent) {
rm->m_final_op = &rm->atomic;
/* The transport owns the mapped memory for now.
* You can't unmap it while it's on the send queue
@@ -310,7 +320,7 @@ restart:
wake_up_interruptible(&rm->m_flush_wait);
break;
}
- conn->c_xmit_atomic_sent = 1;
+ cp->cp_xmit_atomic_sent = 1;
}
@@ -336,41 +346,42 @@ restart:
rm->data.op_active = 0;
}
- if (rm->data.op_active && !conn->c_xmit_data_sent) {
+ if (rm->data.op_active && !cp->cp_xmit_data_sent) {
rm->m_final_op = &rm->data;
+
ret = conn->c_trans->xmit(conn, rm,
- conn->c_xmit_hdr_off,
- conn->c_xmit_sg,
- conn->c_xmit_data_off);
+ cp->cp_xmit_hdr_off,
+ cp->cp_xmit_sg,
+ cp->cp_xmit_data_off);
if (ret <= 0)
break;
- if (conn->c_xmit_hdr_off < sizeof(struct rds_header)) {
+ if (cp->cp_xmit_hdr_off < sizeof(struct rds_header)) {
tmp = min_t(int, ret,
sizeof(struct rds_header) -
- conn->c_xmit_hdr_off);
- conn->c_xmit_hdr_off += tmp;
+ cp->cp_xmit_hdr_off);
+ cp->cp_xmit_hdr_off += tmp;
ret -= tmp;
}
- sg = &rm->data.op_sg[conn->c_xmit_sg];
+ sg = &rm->data.op_sg[cp->cp_xmit_sg];
while (ret) {
tmp = min_t(int, ret, sg->length -
- conn->c_xmit_data_off);
- conn->c_xmit_data_off += tmp;
+ cp->cp_xmit_data_off);
+ cp->cp_xmit_data_off += tmp;
ret -= tmp;
- if (conn->c_xmit_data_off == sg->length) {
- conn->c_xmit_data_off = 0;
+ if (cp->cp_xmit_data_off == sg->length) {
+ cp->cp_xmit_data_off = 0;
sg++;
- conn->c_xmit_sg++;
- BUG_ON(ret != 0 &&
- conn->c_xmit_sg == rm->data.op_nents);
+ cp->cp_xmit_sg++;
+ BUG_ON(ret != 0 && cp->cp_xmit_sg ==
+ rm->data.op_nents);
}
}
- if (conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
- (conn->c_xmit_sg == rm->data.op_nents))
- conn->c_xmit_data_sent = 1;
+ if (cp->cp_xmit_hdr_off == sizeof(struct rds_header) &&
+ (cp->cp_xmit_sg == rm->data.op_nents))
+ cp->cp_xmit_data_sent = 1;
}
/*
@@ -378,23 +389,27 @@ restart:
* if there is a data op. Thus, if the data is sent (or there was
* none), then we're done with the rm.
*/
- if (!rm->data.op_active || conn->c_xmit_data_sent) {
- conn->c_xmit_rm = NULL;
- conn->c_xmit_sg = 0;
- conn->c_xmit_hdr_off = 0;
- conn->c_xmit_data_off = 0;
- conn->c_xmit_rdma_sent = 0;
- conn->c_xmit_atomic_sent = 0;
- conn->c_xmit_data_sent = 0;
+ if (!rm->data.op_active || cp->cp_xmit_data_sent) {
+ cp->cp_xmit_rm = NULL;
+ cp->cp_xmit_sg = 0;
+ cp->cp_xmit_hdr_off = 0;
+ cp->cp_xmit_data_off = 0;
+ cp->cp_xmit_rdma_sent = 0;
+ cp->cp_xmit_atomic_sent = 0;
+ cp->cp_xmit_data_sent = 0;
rds_message_put(rm);
}
}
over_batch:
- if (conn->c_trans->xmit_complete)
+ if (conn->c_trans->t_mp_capable) {
+ if (conn->c_trans->xmit_path_complete)
+ conn->c_trans->xmit_path_complete(cp);
+ } else if (conn->c_trans->xmit_complete) {
conn->c_trans->xmit_complete(conn);
- release_in_xmit(conn);
+ }
+ release_in_xmit(cp);
/* Nuke any messages we decided not to retransmit. */
if (!list_empty(&to_be_dropped)) {
@@ -422,12 +437,12 @@ over_batch:
if (ret == 0) {
smp_mb();
if ((test_bit(0, &conn->c_map_queued) ||
- !list_empty(&conn->c_send_queue)) &&
- send_gen == conn->c_send_gen) {
+ !list_empty(&cp->cp_send_queue)) &&
+ send_gen == cp->cp_send_gen) {
rds_stats_inc(s_send_lock_queue_raced);
if (batch_count < send_batch_count)
goto restart;
- queue_delayed_work(rds_wq, &conn->c_send_w, 1);
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
}
}
out:
@@ -560,42 +575,6 @@ __rds_send_complete(struct rds_sock *rs, struct rds_message *rm, int status)
}
/*
- * This is called from the IB send completion when we detect
- * a RDMA operation that failed with remote access error.
- * So speed is not an issue here.
- */
-struct rds_message *rds_send_get_message(struct rds_connection *conn,
- struct rm_rdma_op *op)
-{
- struct rds_message *rm, *tmp, *found = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&conn->c_lock, flags);
-
- list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
- if (&rm->rdma == op) {
- atomic_inc(&rm->m_refcount);
- found = rm;
- goto out;
- }
- }
-
- list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
- if (&rm->rdma == op) {
- atomic_inc(&rm->m_refcount);
- found = rm;
- break;
- }
- }
-
-out:
- spin_unlock_irqrestore(&conn->c_lock, flags);
-
- return found;
-}
-EXPORT_SYMBOL_GPL(rds_send_get_message);
-
-/*
* This removes messages from the socket's list if they're on it. The list
* argument must be private to the caller, we must be able to modify it
* without locks. The messages must have a reference held for their
@@ -685,16 +664,16 @@ unlock_and_drop:
* assigned the m_ack_seq yet - but that's fine as long as tcp_is_acked
* checks the RDS_MSG_HAS_ACK_SEQ bit.
*/
-void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
- is_acked_func is_acked)
+void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack,
+ is_acked_func is_acked)
{
struct rds_message *rm, *tmp;
unsigned long flags;
LIST_HEAD(list);
- spin_lock_irqsave(&conn->c_lock, flags);
+ spin_lock_irqsave(&cp->cp_lock, flags);
- list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
+ list_for_each_entry_safe(rm, tmp, &cp->cp_retrans, m_conn_item) {
if (!rds_send_is_acked(rm, ack, is_acked))
break;
@@ -706,17 +685,26 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
if (!list_empty(&list))
smp_mb__after_atomic();
- spin_unlock_irqrestore(&conn->c_lock, flags);
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
/* now remove the messages from the sock list as needed */
rds_send_remove_from_sock(&list, RDS_RDMA_SUCCESS);
}
+EXPORT_SYMBOL_GPL(rds_send_path_drop_acked);
+
+void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
+ is_acked_func is_acked)
+{
+ WARN_ON(conn->c_trans->t_mp_capable);
+ rds_send_path_drop_acked(&conn->c_path[0], ack, is_acked);
+}
EXPORT_SYMBOL_GPL(rds_send_drop_acked);
void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
{
struct rds_message *rm, *tmp;
struct rds_connection *conn;
+ struct rds_conn_path *cp;
unsigned long flags;
LIST_HEAD(list);
@@ -745,22 +733,26 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
list_for_each_entry(rm, &list, m_sock_item) {
conn = rm->m_inc.i_conn;
+ if (conn->c_trans->t_mp_capable)
+ cp = rm->m_inc.i_conn_path;
+ else
+ cp = &conn->c_path[0];
- spin_lock_irqsave(&conn->c_lock, flags);
+ spin_lock_irqsave(&cp->cp_lock, flags);
/*
* Maybe someone else beat us to removing rm from the conn.
* If we race with their flag update we'll get the lock and
* then really see that the flag has been cleared.
*/
if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
- spin_unlock_irqrestore(&conn->c_lock, flags);
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
spin_lock_irqsave(&rm->m_rs_lock, flags);
rm->m_rs = NULL;
spin_unlock_irqrestore(&rm->m_rs_lock, flags);
continue;
}
list_del_init(&rm->m_conn_item);
- spin_unlock_irqrestore(&conn->c_lock, flags);
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
/*
* Couldn't grab m_rs_lock in top loop (lock ordering),
@@ -809,6 +801,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
* message from the flow with RDS_CANCEL_SENT_TO.
*/
static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
+ struct rds_conn_path *cp,
struct rds_message *rm, __be16 sport,
__be16 dport, int *queued)
{
@@ -852,13 +845,14 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
trying to minimize the time we hold c_lock */
rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport, 0);
rm->m_inc.i_conn = conn;
+ rm->m_inc.i_conn_path = cp;
rds_message_addref(rm);
- spin_lock(&conn->c_lock);
- rm->m_inc.i_hdr.h_sequence = cpu_to_be64(conn->c_next_tx_seq++);
- list_add_tail(&rm->m_conn_item, &conn->c_send_queue);
+ spin_lock(&cp->cp_lock);
+ rm->m_inc.i_hdr.h_sequence = cpu_to_be64(cp->cp_next_tx_seq++);
+ list_add_tail(&rm->m_conn_item, &cp->cp_send_queue);
set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
- spin_unlock(&conn->c_lock);
+ spin_unlock(&cp->cp_lock);
rdsdebug("queued msg %p len %d, rs %p bytes %d seq %llu\n",
rm, len, rs, rs->rs_snd_bytes,
@@ -990,6 +984,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
int queued = 0, allocated_mr = 0;
int nonblock = msg->msg_flags & MSG_DONTWAIT;
long timeo = sock_sndtimeo(sk, nonblock);
+ struct rds_conn_path *cpath;
/* Mirror Linux UDP mirror of BSD error message compatibility */
/* XXX: Perhaps MSG_MORE someday */
@@ -1088,15 +1083,16 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
goto out;
}
- rds_conn_connect_if_down(conn);
+ cpath = &conn->c_path[0];
+
+ rds_conn_path_connect_if_down(cpath);
ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
if (ret) {
rs->rs_seen_congestion = 1;
goto out;
}
-
- while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
+ while (!rds_send_queue_rm(rs, conn, cpath, rm, rs->rs_bound_port,
dport, &queued)) {
rds_stats_inc(s_send_queue_full);
@@ -1106,7 +1102,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
}
timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
- rds_send_queue_rm(rs, conn, rm,
+ rds_send_queue_rm(rs, conn, cpath, rm,
rs->rs_bound_port,
dport,
&queued),
@@ -1127,9 +1123,9 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
*/
rds_stats_inc(s_send_queued);
- ret = rds_send_xmit(conn);
+ ret = rds_send_xmit(cpath);
if (ret == -ENOMEM || ret == -EAGAIN)
- queue_delayed_work(rds_wq, &conn->c_send_w, 1);
+ queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
rds_message_put(rm);
return payload_len;
@@ -1150,7 +1146,7 @@ out:
* Reply to a ping packet.
*/
int
-rds_send_pong(struct rds_connection *conn, __be16 dport)
+rds_send_pong(struct rds_conn_path *cp, __be16 dport)
{
struct rds_message *rm;
unsigned long flags;
@@ -1162,31 +1158,32 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
goto out;
}
- rm->m_daddr = conn->c_faddr;
+ rm->m_daddr = cp->cp_conn->c_faddr;
rm->data.op_active = 1;
- rds_conn_connect_if_down(conn);
+ rds_conn_path_connect_if_down(cp);
- ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL);
+ ret = rds_cong_wait(cp->cp_conn->c_fcong, dport, 1, NULL);
if (ret)
goto out;
- spin_lock_irqsave(&conn->c_lock, flags);
- list_add_tail(&rm->m_conn_item, &conn->c_send_queue);
+ spin_lock_irqsave(&cp->cp_lock, flags);
+ list_add_tail(&rm->m_conn_item, &cp->cp_send_queue);
set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
rds_message_addref(rm);
- rm->m_inc.i_conn = conn;
+ rm->m_inc.i_conn = cp->cp_conn;
+ rm->m_inc.i_conn_path = cp;
rds_message_populate_header(&rm->m_inc.i_hdr, 0, dport,
- conn->c_next_tx_seq);
- conn->c_next_tx_seq++;
- spin_unlock_irqrestore(&conn->c_lock, flags);
+ cp->cp_next_tx_seq);
+ cp->cp_next_tx_seq++;
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
rds_stats_inc(s_send_queued);
rds_stats_inc(s_send_pong);
/* schedule the send work on rds_wq */
- queue_delayed_work(rds_wq, &conn->c_send_w, 1);
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
rds_message_put(rm);
return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 74ee126a6fe6..5217d49ce6d6 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -38,6 +38,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include "rds_single_path.h"
#include "rds.h"
#include "tcp.h"
@@ -56,8 +57,8 @@ static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp,
loff_t *fpos);
-int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
-int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
+static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
+static int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
static struct ctl_table rds_tcp_sysctl_table[] = {
#define RDS_TCP_SNDBUF 0
@@ -185,7 +186,7 @@ void rds_tcp_reset_callbacks(struct socket *sock,
release_sock(osock->sk);
sock_release(osock);
newsock:
- rds_send_reset(conn);
+ rds_send_path_reset(&conn->c_path[0]);
lock_sock(sock->sk);
write_lock_bh(&sock->sk->sk_callback_lock);
tc->t_sock = sock;
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index f6e95d60db54..96c2c4d17909 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -34,6 +34,7 @@
#include <linux/in.h>
#include <net/tcp.h>
+#include "rds_single_path.h"
#include "rds.h"
#include "tcp.h"
@@ -60,7 +61,8 @@ void rds_tcp_state_change(struct sock *sk)
case TCP_SYN_RECV:
break;
case TCP_ESTABLISHED:
- rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
+ rds_connect_path_complete(&conn->c_path[0],
+ RDS_CONN_CONNECTING);
break;
case TCP_CLOSE_WAIT:
case TCP_CLOSE:
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 245542ca4718..f9cc945a77b3 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -35,6 +35,7 @@
#include <linux/in.h>
#include <net/tcp.h>
+#include "rds_single_path.h"
#include "rds.h"
#include "tcp.h"
@@ -132,17 +133,19 @@ int rds_tcp_accept_one(struct socket *sock)
* c_transport_data.
*/
if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) ||
- !conn->c_outgoing) {
+ !conn->c_path[0].cp_outgoing) {
goto rst_nsk;
} else {
rds_tcp_reset_callbacks(new_sock, conn);
- conn->c_outgoing = 0;
+ conn->c_path[0].cp_outgoing = 0;
/* rds_connect_path_complete() marks RDS_CONN_UP */
- rds_connect_path_complete(conn, RDS_CONN_RESETTING);
+ rds_connect_path_complete(&conn->c_path[0],
+ RDS_CONN_RESETTING);
}
} else {
rds_tcp_set_callbacks(new_sock, conn);
- rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
+ rds_connect_path_complete(&conn->c_path[0],
+ RDS_CONN_CONNECTING);
}
new_sock = NULL;
ret = 0;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 6e6a7111a034..4a87d9ef3084 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -34,6 +34,7 @@
#include <linux/slab.h>
#include <net/tcp.h>
+#include "rds_single_path.h"
#include "rds.h"
#include "tcp.h"
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 618be69c9c3b..710f1aae97ad 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -34,6 +34,7 @@
#include <linux/in.h>
#include <net/tcp.h>
+#include "rds_single_path.h"
#include "rds.h"
#include "tcp.h"
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 4a323045719b..9fbe95bb14a9 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -71,30 +71,30 @@
struct workqueue_struct *rds_wq;
EXPORT_SYMBOL_GPL(rds_wq);
-void rds_connect_path_complete(struct rds_connection *conn, int curr)
+void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
{
- if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) {
+ if (!rds_conn_path_transition(cp, curr, RDS_CONN_UP)) {
printk(KERN_WARNING "%s: Cannot transition to state UP, "
"current state is %d\n",
__func__,
- atomic_read(&conn->c_state));
- rds_conn_drop(conn);
+ atomic_read(&cp->cp_state));
+ rds_conn_path_drop(cp);
return;
}
rdsdebug("conn %p for %pI4 to %pI4 complete\n",
- conn, &conn->c_laddr, &conn->c_faddr);
+ cp->cp_conn, &cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr);
- conn->c_reconnect_jiffies = 0;
- set_bit(0, &conn->c_map_queued);
- queue_delayed_work(rds_wq, &conn->c_send_w, 0);
- queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
+ cp->cp_reconnect_jiffies = 0;
+ set_bit(0, &cp->cp_conn->c_map_queued);
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
+ queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
}
EXPORT_SYMBOL_GPL(rds_connect_path_complete);
void rds_connect_complete(struct rds_connection *conn)
{
- rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
+ rds_connect_path_complete(&conn->c_path[0], RDS_CONN_CONNECTING);
}
EXPORT_SYMBOL_GPL(rds_connect_complete);
@@ -116,70 +116,79 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
* We should *always* start with a random backoff; otherwise a broken connection
* will always take several iterations to be re-established.
*/
-void rds_queue_reconnect(struct rds_connection *conn)
+void rds_queue_reconnect(struct rds_conn_path *cp)
{
unsigned long rand;
+ struct rds_connection *conn = cp->cp_conn;
rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n",
conn, &conn->c_laddr, &conn->c_faddr,
- conn->c_reconnect_jiffies);
+ cp->cp_reconnect_jiffies);
- set_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
- if (conn->c_reconnect_jiffies == 0) {
- conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
- queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
+ set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
+ if (cp->cp_reconnect_jiffies == 0) {
+ cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
+ queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
return;
}
get_random_bytes(&rand, sizeof(rand));
rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
- rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies,
+ rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
conn, &conn->c_laddr, &conn->c_faddr);
- queue_delayed_work(rds_wq, &conn->c_conn_w,
- rand % conn->c_reconnect_jiffies);
+ queue_delayed_work(rds_wq, &cp->cp_conn_w,
+ rand % cp->cp_reconnect_jiffies);
- conn->c_reconnect_jiffies = min(conn->c_reconnect_jiffies * 2,
+ cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
rds_sysctl_reconnect_max_jiffies);
}
void rds_connect_worker(struct work_struct *work)
{
- struct rds_connection *conn = container_of(work, struct rds_connection, c_conn_w.work);
+ struct rds_conn_path *cp = container_of(work,
+ struct rds_conn_path,
+ cp_conn_w.work);
+ struct rds_connection *conn = cp->cp_conn;
int ret;
- clear_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
- if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
+ clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
+ if (rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
ret = conn->c_trans->conn_connect(conn);
rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n",
conn, &conn->c_laddr, &conn->c_faddr, ret);
if (ret) {
- if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN))
- rds_queue_reconnect(conn);
+ if (rds_conn_path_transition(cp,
+ RDS_CONN_CONNECTING,
+ RDS_CONN_DOWN))
+ rds_queue_reconnect(cp);
else
- rds_conn_error(conn, "RDS: connect failed\n");
+ rds_conn_path_error(cp,
+ "RDS: connect failed\n");
}
}
}
void rds_send_worker(struct work_struct *work)
{
- struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work);
+ struct rds_conn_path *cp = container_of(work,
+ struct rds_conn_path,
+ cp_send_w.work);
int ret;
- if (rds_conn_state(conn) == RDS_CONN_UP) {
- clear_bit(RDS_LL_SEND_FULL, &conn->c_flags);
- ret = rds_send_xmit(conn);
+ if (rds_conn_path_state(cp) == RDS_CONN_UP) {
+ clear_bit(RDS_LL_SEND_FULL, &cp->cp_flags);
+ ret = rds_send_xmit(cp);
cond_resched();
- rdsdebug("conn %p ret %d\n", conn, ret);
+ rdsdebug("conn %p ret %d\n", cp->cp_conn, ret);
switch (ret) {
case -EAGAIN:
rds_stats_inc(s_send_immediate_retry);
- queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
break;
case -ENOMEM:
rds_stats_inc(s_send_delayed_retry);
- queue_delayed_work(rds_wq, &conn->c_send_w, 2);
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 2);
default:
break;
}
@@ -188,20 +197,22 @@ void rds_send_worker(struct work_struct *work)
void rds_recv_worker(struct work_struct *work)
{
- struct rds_connection *conn = container_of(work, struct rds_connection, c_recv_w.work);
+ struct rds_conn_path *cp = container_of(work,
+ struct rds_conn_path,
+ cp_recv_w.work);
int ret;
- if (rds_conn_state(conn) == RDS_CONN_UP) {
- ret = conn->c_trans->recv(conn);
- rdsdebug("conn %p ret %d\n", conn, ret);
+ if (rds_conn_path_state(cp) == RDS_CONN_UP) {
+ ret = cp->cp_conn->c_trans->recv(cp->cp_conn);
+ rdsdebug("conn %p ret %d\n", cp->cp_conn, ret);
switch (ret) {
case -EAGAIN:
rds_stats_inc(s_recv_immediate_retry);
- queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
+ queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
break;
case -ENOMEM:
rds_stats_inc(s_recv_delayed_retry);
- queue_delayed_work(rds_wq, &conn->c_recv_w, 2);
+ queue_delayed_work(rds_wq, &cp->cp_recv_w, 2);
default:
break;
}
@@ -210,9 +221,11 @@ void rds_recv_worker(struct work_struct *work)
void rds_shutdown_worker(struct work_struct *work)
{
- struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
+ struct rds_conn_path *cp = container_of(work,
+ struct rds_conn_path,
+ cp_down_w);
- rds_conn_shutdown(conn);
+ rds_conn_shutdown(cp);
}
void rds_threads_exit(void)
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index e05a06ef2254..6522e50fb750 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -4,25 +4,27 @@
af-rxrpc-y := \
af_rxrpc.o \
- ar-accept.o \
- ar-ack.o \
- ar-call.o \
- ar-connection.o \
- ar-connevent.o \
- ar-error.o \
- ar-input.o \
- ar-key.o \
- ar-local.o \
- ar-output.o \
- ar-peer.o \
- ar-recvmsg.o \
- ar-security.o \
- ar-skbuff.o \
- ar-transport.o \
+ call_accept.o \
+ call_event.o \
+ call_object.o \
+ conn_client.o \
+ conn_event.o \
+ conn_object.o \
+ input.o \
insecure.o \
- misc.o
+ key.o \
+ local_event.o \
+ local_object.o \
+ misc.o \
+ output.o \
+ peer_event.o \
+ peer_object.o \
+ recvmsg.o \
+ security.o \
+ skbuff.o \
+ utils.o
-af-rxrpc-$(CONFIG_PROC_FS) += ar-proc.o
+af-rxrpc-$(CONFIG_PROC_FS) += proc.o
af-rxrpc-$(CONFIG_RXKAD) += rxkad.o
af-rxrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index e45e94ca030f..5d3e795a7c48 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/net.h>
@@ -31,8 +33,6 @@ unsigned int rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(debug, "RxRPC debugging mask");
-static int sysctl_rxrpc_max_qlen __read_mostly = 10;
-
static struct proto rxrpc_proto;
static const struct proto_ops rxrpc_rpc_ops;
@@ -97,11 +97,13 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
srx->transport_len > len)
return -EINVAL;
- if (srx->transport.family != rx->proto)
+ if (srx->transport.family != rx->family)
return -EAFNOSUPPORT;
switch (srx->transport.family) {
case AF_INET:
+ if (srx->transport_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
_debug("INET: %x @ %pI4",
ntohs(srx->transport.sin.sin_port),
&srx->transport.sin.sin_addr);
@@ -137,33 +139,33 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
lock_sock(&rx->sk);
- if (rx->sk.sk_state != RXRPC_UNCONNECTED) {
+ if (rx->sk.sk_state != RXRPC_UNBOUND) {
ret = -EINVAL;
goto error_unlock;
}
memcpy(&rx->srx, srx, sizeof(rx->srx));
- /* Find or create a local transport endpoint to use */
local = rxrpc_lookup_local(&rx->srx);
if (IS_ERR(local)) {
ret = PTR_ERR(local);
goto error_unlock;
}
- rx->local = local;
- if (srx->srx_service) {
+ if (rx->srx.srx_service) {
write_lock_bh(&local->services_lock);
list_for_each_entry(prx, &local->services, listen_link) {
- if (prx->srx.srx_service == srx->srx_service)
+ if (prx->srx.srx_service == rx->srx.srx_service)
goto service_in_use;
}
+ rx->local = local;
list_add_tail(&rx->listen_link, &local->services);
write_unlock_bh(&local->services_lock);
rx->sk.sk_state = RXRPC_SERVER_BOUND;
} else {
+ rx->local = local;
rx->sk.sk_state = RXRPC_CLIENT_BOUND;
}
@@ -172,8 +174,9 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
return 0;
service_in_use:
- ret = -EADDRINUSE;
write_unlock_bh(&local->services_lock);
+ rxrpc_put_local(local);
+ ret = -EADDRINUSE;
error_unlock:
release_sock(&rx->sk);
error:
@@ -188,6 +191,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
struct rxrpc_sock *rx = rxrpc_sk(sk);
+ unsigned int max;
int ret;
_enter("%p,%d", rx, backlog);
@@ -195,20 +199,24 @@ static int rxrpc_listen(struct socket *sock, int backlog)
lock_sock(&rx->sk);
switch (rx->sk.sk_state) {
- case RXRPC_UNCONNECTED:
+ case RXRPC_UNBOUND:
ret = -EADDRNOTAVAIL;
break;
- case RXRPC_CLIENT_BOUND:
- case RXRPC_CLIENT_CONNECTED:
- default:
- ret = -EBUSY;
- break;
case RXRPC_SERVER_BOUND:
ASSERT(rx->local != NULL);
+ max = READ_ONCE(rxrpc_max_backlog);
+ ret = -EINVAL;
+ if (backlog == INT_MAX)
+ backlog = max;
+ else if (backlog < 0 || backlog > max)
+ break;
sk->sk_max_ack_backlog = backlog;
rx->sk.sk_state = RXRPC_SERVER_LISTENING;
ret = 0;
break;
+ default:
+ ret = -EBUSY;
+ break;
}
release_sock(&rx->sk);
@@ -216,45 +224,10 @@ static int rxrpc_listen(struct socket *sock, int backlog)
return ret;
}
-/*
- * find a transport by address
- */
-static struct rxrpc_transport *rxrpc_name_to_transport(struct socket *sock,
- struct sockaddr *addr,
- int addr_len, int flags,
- gfp_t gfp)
-{
- struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
- struct rxrpc_transport *trans;
- struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
- struct rxrpc_peer *peer;
-
- _enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
-
- ASSERT(rx->local != NULL);
- ASSERT(rx->sk.sk_state > RXRPC_UNCONNECTED);
-
- if (rx->srx.transport_type != srx->transport_type)
- return ERR_PTR(-ESOCKTNOSUPPORT);
- if (rx->srx.transport.family != srx->transport.family)
- return ERR_PTR(-EAFNOSUPPORT);
-
- /* find a remote transport endpoint from the local one */
- peer = rxrpc_get_peer(srx, gfp);
- if (IS_ERR(peer))
- return ERR_CAST(peer);
-
- /* find a transport */
- trans = rxrpc_get_transport(rx->local, peer, gfp);
- rxrpc_put_peer(peer);
- _leave(" = %p", trans);
- return trans;
-}
-
/**
* rxrpc_kernel_begin_call - Allow a kernel service to begin a call
* @sock: The socket on which to make the call
- * @srx: The address of the peer to contact (defaults to socket setting)
+ * @srx: The address of the peer to contact
* @key: The security context to use (defaults to socket setting)
* @user_call_ID: The ID to use
*
@@ -271,51 +244,32 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
unsigned long user_call_ID,
gfp_t gfp)
{
- struct rxrpc_conn_bundle *bundle;
- struct rxrpc_transport *trans;
+ struct rxrpc_conn_parameters cp;
struct rxrpc_call *call;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ int ret;
_enter(",,%x,%lx", key_serial(key), user_call_ID);
- lock_sock(&rx->sk);
+ ret = rxrpc_validate_address(rx, srx, sizeof(*srx));
+ if (ret < 0)
+ return ERR_PTR(ret);
- if (srx) {
- trans = rxrpc_name_to_transport(sock, (struct sockaddr *) srx,
- sizeof(*srx), 0, gfp);
- if (IS_ERR(trans)) {
- call = ERR_CAST(trans);
- trans = NULL;
- goto out_notrans;
- }
- } else {
- trans = rx->trans;
- if (!trans) {
- call = ERR_PTR(-ENOTCONN);
- goto out_notrans;
- }
- atomic_inc(&trans->usage);
- }
+ lock_sock(&rx->sk);
- if (!srx)
- srx = &rx->srx;
if (!key)
key = rx->key;
if (key && !key->payload.data[0])
key = NULL; /* a no-security key */
- bundle = rxrpc_get_bundle(rx, trans, key, srx->srx_service, gfp);
- if (IS_ERR(bundle)) {
- call = ERR_CAST(bundle);
- goto out;
- }
+ memset(&cp, 0, sizeof(cp));
+ cp.local = rx->local;
+ cp.key = key;
+ cp.security_level = 0;
+ cp.exclusive = false;
+ cp.service_id = srx->srx_service;
+ call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
- call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID, true,
- gfp);
- rxrpc_put_bundle(trans, bundle);
-out:
- rxrpc_put_transport(trans);
-out_notrans:
release_sock(&rx->sk);
_leave(" = %p", call);
return call;
@@ -367,11 +321,8 @@ EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages);
static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
int addr_len, int flags)
{
- struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
- struct sock *sk = sock->sk;
- struct rxrpc_transport *trans;
- struct rxrpc_local *local;
- struct rxrpc_sock *rx = rxrpc_sk(sk);
+ struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)addr;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
int ret;
_enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
@@ -384,45 +335,28 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
lock_sock(&rx->sk);
+ ret = -EISCONN;
+ if (test_bit(RXRPC_SOCK_CONNECTED, &rx->flags))
+ goto error;
+
switch (rx->sk.sk_state) {
- case RXRPC_UNCONNECTED:
- /* find a local transport endpoint if we don't have one already */
- ASSERTCMP(rx->local, ==, NULL);
- rx->srx.srx_family = AF_RXRPC;
- rx->srx.srx_service = 0;
- rx->srx.transport_type = srx->transport_type;
- rx->srx.transport_len = sizeof(sa_family_t);
- rx->srx.transport.family = srx->transport.family;
- local = rxrpc_lookup_local(&rx->srx);
- if (IS_ERR(local)) {
- release_sock(&rx->sk);
- return PTR_ERR(local);
- }
- rx->local = local;
- rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+ case RXRPC_UNBOUND:
+ rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
+ case RXRPC_CLIENT_UNBOUND:
case RXRPC_CLIENT_BOUND:
break;
- case RXRPC_CLIENT_CONNECTED:
- release_sock(&rx->sk);
- return -EISCONN;
default:
- release_sock(&rx->sk);
- return -EBUSY; /* server sockets can't connect as well */
- }
-
- trans = rxrpc_name_to_transport(sock, addr, addr_len, flags,
- GFP_KERNEL);
- if (IS_ERR(trans)) {
- release_sock(&rx->sk);
- _leave(" = %ld", PTR_ERR(trans));
- return PTR_ERR(trans);
+ ret = -EBUSY;
+ goto error;
}
- rx->trans = trans;
- rx->sk.sk_state = RXRPC_CLIENT_CONNECTED;
+ rx->connect_srx = *srx;
+ set_bit(RXRPC_SOCK_CONNECTED, &rx->flags);
+ ret = 0;
+error:
release_sock(&rx->sk);
- return 0;
+ return ret;
}
/*
@@ -436,7 +370,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
*/
static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
{
- struct rxrpc_transport *trans;
+ struct rxrpc_local *local;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
int ret;
@@ -453,48 +387,38 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
}
}
- trans = NULL;
lock_sock(&rx->sk);
- if (m->msg_name) {
- ret = -EISCONN;
- trans = rxrpc_name_to_transport(sock, m->msg_name,
- m->msg_namelen, 0, GFP_KERNEL);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- trans = NULL;
- goto out;
- }
- } else {
- trans = rx->trans;
- if (trans)
- atomic_inc(&trans->usage);
- }
-
switch (rx->sk.sk_state) {
- case RXRPC_SERVER_LISTENING:
- if (!m->msg_name) {
- ret = rxrpc_server_sendmsg(rx, m, len);
- break;
+ case RXRPC_UNBOUND:
+ local = rxrpc_lookup_local(&rx->srx);
+ if (IS_ERR(local)) {
+ ret = PTR_ERR(local);
+ goto error_unlock;
}
- case RXRPC_SERVER_BOUND:
+
+ rx->local = local;
+ rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
+ /* Fall through */
+
+ case RXRPC_CLIENT_UNBOUND:
case RXRPC_CLIENT_BOUND:
- if (!m->msg_name) {
- ret = -ENOTCONN;
- break;
+ if (!m->msg_name &&
+ test_bit(RXRPC_SOCK_CONNECTED, &rx->flags)) {
+ m->msg_name = &rx->connect_srx;
+ m->msg_namelen = sizeof(rx->connect_srx);
}
- case RXRPC_CLIENT_CONNECTED:
- ret = rxrpc_client_sendmsg(rx, trans, m, len);
+ case RXRPC_SERVER_BOUND:
+ case RXRPC_SERVER_LISTENING:
+ ret = rxrpc_do_sendmsg(rx, m, len);
break;
default:
- ret = -ENOTCONN;
+ ret = -EINVAL;
break;
}
-out:
+error_unlock:
release_sock(&rx->sk);
- if (trans)
- rxrpc_put_transport(trans);
_leave(" = %d", ret);
return ret;
}
@@ -521,9 +445,9 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
if (optlen != 0)
goto error;
ret = -EISCONN;
- if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
- set_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags);
+ rx->exclusive = true;
goto success;
case RXRPC_SECURITY_KEY:
@@ -531,7 +455,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
if (rx->key)
goto error;
ret = -EISCONN;
- if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
ret = rxrpc_request_key(rx, optval, optlen);
goto error;
@@ -541,7 +465,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
if (rx->key)
goto error;
ret = -EISCONN;
- if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
ret = rxrpc_server_keyring(rx, optval, optlen);
goto error;
@@ -551,7 +475,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(unsigned int))
goto error;
ret = -EISCONN;
- if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
ret = get_user(min_sec_level,
(unsigned int __user *) optval);
@@ -630,13 +554,13 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
return -ENOMEM;
sock_init_data(sock, sk);
- sk->sk_state = RXRPC_UNCONNECTED;
+ sk->sk_state = RXRPC_UNBOUND;
sk->sk_write_space = rxrpc_write_space;
- sk->sk_max_ack_backlog = sysctl_rxrpc_max_qlen;
+ sk->sk_max_ack_backlog = 0;
sk->sk_destruct = rxrpc_sock_destructor;
rx = rxrpc_sk(sk);
- rx->proto = protocol;
+ rx->family = protocol;
rx->calls = RB_ROOT;
INIT_LIST_HEAD(&rx->listen_link);
@@ -698,24 +622,8 @@ static int rxrpc_release_sock(struct sock *sk)
flush_workqueue(rxrpc_workqueue);
rxrpc_purge_queue(&sk->sk_receive_queue);
- if (rx->conn) {
- rxrpc_put_connection(rx->conn);
- rx->conn = NULL;
- }
-
- if (rx->bundle) {
- rxrpc_put_bundle(rx->trans, rx->bundle);
- rx->bundle = NULL;
- }
- if (rx->trans) {
- rxrpc_put_transport(rx->trans);
- rx->trans = NULL;
- }
- if (rx->local) {
- rxrpc_put_local(rx->local);
- rx->local = NULL;
- }
-
+ rxrpc_put_local(rx->local);
+ rx->local = NULL;
key_put(rx->key);
rx->key = NULL;
key_put(rx->securities);
@@ -796,49 +704,49 @@ static int __init af_rxrpc_init(void)
"rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!rxrpc_call_jar) {
- printk(KERN_NOTICE "RxRPC: Failed to allocate call jar\n");
+ pr_notice("Failed to allocate call jar\n");
goto error_call_jar;
}
rxrpc_workqueue = alloc_workqueue("krxrpcd", 0, 1);
if (!rxrpc_workqueue) {
- printk(KERN_NOTICE "RxRPC: Failed to allocate work queue\n");
+ pr_notice("Failed to allocate work queue\n");
goto error_work_queue;
}
ret = rxrpc_init_security();
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot initialise security\n");
+ pr_crit("Cannot initialise security\n");
goto error_security;
}
ret = proto_register(&rxrpc_proto, 1);
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot register protocol\n");
+ pr_crit("Cannot register protocol\n");
goto error_proto;
}
ret = sock_register(&rxrpc_family_ops);
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot register socket family\n");
+ pr_crit("Cannot register socket family\n");
goto error_sock;
}
ret = register_key_type(&key_type_rxrpc);
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot register client key type\n");
+ pr_crit("Cannot register client key type\n");
goto error_key_type;
}
ret = register_key_type(&key_type_rxrpc_s);
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot register server key type\n");
+ pr_crit("Cannot register server key type\n");
goto error_key_type_s;
}
ret = rxrpc_sysctl_init();
if (ret < 0) {
- printk(KERN_CRIT "RxRPC: Cannot register sysctls\n");
+ pr_crit("Cannot register sysctls\n");
goto error_sysctls;
}
@@ -880,14 +788,29 @@ static void __exit af_rxrpc_exit(void)
proto_unregister(&rxrpc_proto);
rxrpc_destroy_all_calls();
rxrpc_destroy_all_connections();
- rxrpc_destroy_all_transports();
- rxrpc_destroy_all_peers();
- rxrpc_destroy_all_locals();
ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
+ /* We need to flush the scheduled work twice because the local endpoint
+ * records involve a work item in their destruction as they can only be
+ * destroyed from process context. However, a connection may have a
+ * work item outstanding - and this will pin the local endpoint record
+ * until the connection goes away.
+ *
+ * Peers don't pin locals and calls pin sockets - which prevents the
+ * module from being unloaded - so we should only need two flushes.
+ */
_debug("flush scheduled work");
flush_workqueue(rxrpc_workqueue);
+ _debug("flush scheduled work 2");
+ flush_workqueue(rxrpc_workqueue);
+ _debug("synchronise RCU");
+ rcu_barrier();
+ _debug("destroy locals");
+ ASSERT(idr_is_empty(&rxrpc_client_conn_ids));
+ idr_destroy(&rxrpc_client_conn_ids);
+ rxrpc_destroy_all_locals();
+
remove_proc_entry("rxrpc_conns", init_net.proc_net);
remove_proc_entry("rxrpc_calls", init_net.proc_net);
destroy_workqueue(rxrpc_workqueue);
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
deleted file mode 100644
index 97f4fae74bca..000000000000
--- a/net/rxrpc/ar-connection.c
+++ /dev/null
@@ -1,927 +0,0 @@
-/* RxRPC virtual connection handler
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/crypto.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
-#include "ar-internal.h"
-
-/*
- * Time till a connection expires after last use (in seconds).
- */
-unsigned int rxrpc_connection_expiry = 10 * 60;
-
-static void rxrpc_connection_reaper(struct work_struct *work);
-
-LIST_HEAD(rxrpc_connections);
-DEFINE_RWLOCK(rxrpc_connection_lock);
-static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
-
-/*
- * allocate a new client connection bundle
- */
-static struct rxrpc_conn_bundle *rxrpc_alloc_bundle(gfp_t gfp)
-{
- struct rxrpc_conn_bundle *bundle;
-
- _enter("");
-
- bundle = kzalloc(sizeof(struct rxrpc_conn_bundle), gfp);
- if (bundle) {
- INIT_LIST_HEAD(&bundle->unused_conns);
- INIT_LIST_HEAD(&bundle->avail_conns);
- INIT_LIST_HEAD(&bundle->busy_conns);
- init_waitqueue_head(&bundle->chanwait);
- atomic_set(&bundle->usage, 1);
- }
-
- _leave(" = %p", bundle);
- return bundle;
-}
-
-/*
- * compare bundle parameters with what we're looking for
- * - return -ve, 0 or +ve
- */
-static inline
-int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle,
- struct key *key, u16 service_id)
-{
- return (bundle->service_id - service_id) ?:
- ((unsigned long)bundle->key - (unsigned long)key);
-}
-
-/*
- * get bundle of client connections that a client socket can make use of
- */
-struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
- struct rxrpc_transport *trans,
- struct key *key,
- u16 service_id,
- gfp_t gfp)
-{
- struct rxrpc_conn_bundle *bundle, *candidate;
- struct rb_node *p, *parent, **pp;
-
- _enter("%p{%x},%x,%hx,",
- rx, key_serial(key), trans->debug_id, service_id);
-
- if (rx->trans == trans && rx->bundle) {
- atomic_inc(&rx->bundle->usage);
- return rx->bundle;
- }
-
- /* search the extant bundles first for one that matches the specified
- * user ID */
- spin_lock(&trans->client_lock);
-
- p = trans->bundles.rb_node;
- while (p) {
- bundle = rb_entry(p, struct rxrpc_conn_bundle, node);
-
- if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
- p = p->rb_left;
- else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
- p = p->rb_right;
- else
- goto found_extant_bundle;
- }
-
- spin_unlock(&trans->client_lock);
-
- /* not yet present - create a candidate for a new record and then
- * redo the search */
- candidate = rxrpc_alloc_bundle(gfp);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
- }
-
- candidate->key = key_get(key);
- candidate->service_id = service_id;
-
- spin_lock(&trans->client_lock);
-
- pp = &trans->bundles.rb_node;
- parent = NULL;
- while (*pp) {
- parent = *pp;
- bundle = rb_entry(parent, struct rxrpc_conn_bundle, node);
-
- if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
- pp = &(*pp)->rb_left;
- else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
- pp = &(*pp)->rb_right;
- else
- goto found_extant_second;
- }
-
- /* second search also failed; add the new bundle */
- bundle = candidate;
- candidate = NULL;
-
- rb_link_node(&bundle->node, parent, pp);
- rb_insert_color(&bundle->node, &trans->bundles);
- spin_unlock(&trans->client_lock);
- _net("BUNDLE new on trans %d", trans->debug_id);
- if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
- atomic_inc(&bundle->usage);
- rx->bundle = bundle;
- }
- _leave(" = %p [new]", bundle);
- return bundle;
-
- /* we found the bundle in the list immediately */
-found_extant_bundle:
- atomic_inc(&bundle->usage);
- spin_unlock(&trans->client_lock);
- _net("BUNDLE old on trans %d", trans->debug_id);
- if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
- atomic_inc(&bundle->usage);
- rx->bundle = bundle;
- }
- _leave(" = %p [extant %d]", bundle, atomic_read(&bundle->usage));
- return bundle;
-
- /* we found the bundle on the second time through the list */
-found_extant_second:
- atomic_inc(&bundle->usage);
- spin_unlock(&trans->client_lock);
- kfree(candidate);
- _net("BUNDLE old2 on trans %d", trans->debug_id);
- if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
- atomic_inc(&bundle->usage);
- rx->bundle = bundle;
- }
- _leave(" = %p [second %d]", bundle, atomic_read(&bundle->usage));
- return bundle;
-}
-
-/*
- * release a bundle
- */
-void rxrpc_put_bundle(struct rxrpc_transport *trans,
- struct rxrpc_conn_bundle *bundle)
-{
- _enter("%p,%p{%d}",trans, bundle, atomic_read(&bundle->usage));
-
- if (atomic_dec_and_lock(&bundle->usage, &trans->client_lock)) {
- _debug("Destroy bundle");
- rb_erase(&bundle->node, &trans->bundles);
- spin_unlock(&trans->client_lock);
- ASSERT(list_empty(&bundle->unused_conns));
- ASSERT(list_empty(&bundle->avail_conns));
- ASSERT(list_empty(&bundle->busy_conns));
- ASSERTCMP(bundle->num_conns, ==, 0);
- key_put(bundle->key);
- kfree(bundle);
- }
-
- _leave("");
-}
-
-/*
- * allocate a new connection
- */
-static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
-{
- struct rxrpc_connection *conn;
-
- _enter("");
-
- conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
- if (conn) {
- INIT_WORK(&conn->processor, &rxrpc_process_connection);
- INIT_LIST_HEAD(&conn->bundle_link);
- conn->calls = RB_ROOT;
- skb_queue_head_init(&conn->rx_queue);
- conn->security = &rxrpc_no_security;
- rwlock_init(&conn->lock);
- spin_lock_init(&conn->state_lock);
- atomic_set(&conn->usage, 1);
- conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
- conn->avail_calls = RXRPC_MAXCALLS;
- conn->size_align = 4;
- conn->header_size = sizeof(struct rxrpc_wire_header);
- }
-
- _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
- return conn;
-}
-
-/*
- * assign a connection ID to a connection and add it to the transport's
- * connection lookup tree
- * - called with transport client lock held
- */
-static void rxrpc_assign_connection_id(struct rxrpc_connection *conn)
-{
- struct rxrpc_connection *xconn;
- struct rb_node *parent, **p;
- __be32 epoch;
- u32 cid;
-
- _enter("");
-
- epoch = conn->epoch;
-
- write_lock_bh(&conn->trans->conn_lock);
-
- conn->trans->conn_idcounter += RXRPC_CID_INC;
- if (conn->trans->conn_idcounter < RXRPC_CID_INC)
- conn->trans->conn_idcounter = RXRPC_CID_INC;
- cid = conn->trans->conn_idcounter;
-
-attempt_insertion:
- parent = NULL;
- p = &conn->trans->client_conns.rb_node;
-
- while (*p) {
- parent = *p;
- xconn = rb_entry(parent, struct rxrpc_connection, node);
-
- if (epoch < xconn->epoch)
- p = &(*p)->rb_left;
- else if (epoch > xconn->epoch)
- p = &(*p)->rb_right;
- else if (cid < xconn->cid)
- p = &(*p)->rb_left;
- else if (cid > xconn->cid)
- p = &(*p)->rb_right;
- else
- goto id_exists;
- }
-
- /* we've found a suitable hole - arrange for this connection to occupy
- * it */
- rb_link_node(&conn->node, parent, p);
- rb_insert_color(&conn->node, &conn->trans->client_conns);
-
- conn->cid = cid;
- write_unlock_bh(&conn->trans->conn_lock);
- _leave(" [CID %x]", cid);
- return;
-
- /* we found a connection with the proposed ID - walk the tree from that
- * point looking for the next unused ID */
-id_exists:
- for (;;) {
- cid += RXRPC_CID_INC;
- if (cid < RXRPC_CID_INC) {
- cid = RXRPC_CID_INC;
- conn->trans->conn_idcounter = cid;
- goto attempt_insertion;
- }
-
- parent = rb_next(parent);
- if (!parent)
- goto attempt_insertion;
-
- xconn = rb_entry(parent, struct rxrpc_connection, node);
- if (epoch < xconn->epoch ||
- cid < xconn->cid)
- goto attempt_insertion;
- }
-}
-
-/*
- * add a call to a connection's call-by-ID tree
- */
-static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
- struct rxrpc_call *call)
-{
- struct rxrpc_call *xcall;
- struct rb_node *parent, **p;
- __be32 call_id;
-
- write_lock_bh(&conn->lock);
-
- call_id = call->call_id;
- p = &conn->calls.rb_node;
- parent = NULL;
- while (*p) {
- parent = *p;
- xcall = rb_entry(parent, struct rxrpc_call, conn_node);
-
- if (call_id < xcall->call_id)
- p = &(*p)->rb_left;
- else if (call_id > xcall->call_id)
- p = &(*p)->rb_right;
- else
- BUG();
- }
-
- rb_link_node(&call->conn_node, parent, p);
- rb_insert_color(&call->conn_node, &conn->calls);
-
- write_unlock_bh(&conn->lock);
-}
-
-/*
- * connect a call on an exclusive connection
- */
-static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
- struct rxrpc_transport *trans,
- u16 service_id,
- struct rxrpc_call *call,
- gfp_t gfp)
-{
- struct rxrpc_connection *conn;
- int chan, ret;
-
- _enter("");
-
- conn = rx->conn;
- if (!conn) {
- /* not yet present - create a candidate for a new connection
- * and then redo the check */
- conn = rxrpc_alloc_connection(gfp);
- if (!conn) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
-
- conn->trans = trans;
- conn->bundle = NULL;
- conn->service_id = service_id;
- conn->epoch = rxrpc_epoch;
- conn->in_clientflag = 0;
- conn->out_clientflag = RXRPC_CLIENT_INITIATED;
- conn->cid = 0;
- conn->state = RXRPC_CONN_CLIENT;
- conn->avail_calls = RXRPC_MAXCALLS - 1;
- conn->security_level = rx->min_sec_level;
- conn->key = key_get(rx->key);
-
- ret = rxrpc_init_client_conn_security(conn);
- if (ret < 0) {
- key_put(conn->key);
- kfree(conn);
- _leave(" = %d [key]", ret);
- return ret;
- }
-
- write_lock_bh(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
- write_unlock_bh(&rxrpc_connection_lock);
-
- spin_lock(&trans->client_lock);
- atomic_inc(&trans->usage);
-
- _net("CONNECT EXCL new %d on TRANS %d",
- conn->debug_id, conn->trans->debug_id);
-
- rxrpc_assign_connection_id(conn);
- rx->conn = conn;
- } else {
- spin_lock(&trans->client_lock);
- }
-
- /* we've got a connection with a free channel and we can now attach the
- * call to it
- * - we're holding the transport's client lock
- * - we're holding a reference on the connection
- */
- for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
- if (!conn->channels[chan])
- goto found_channel;
- goto no_free_channels;
-
-found_channel:
- atomic_inc(&conn->usage);
- conn->channels[chan] = call;
- call->conn = conn;
- call->channel = chan;
- call->cid = conn->cid | chan;
- call->call_id = ++conn->call_counter;
-
- _net("CONNECT client on conn %d chan %d as call %x",
- conn->debug_id, chan, call->call_id);
-
- spin_unlock(&trans->client_lock);
-
- rxrpc_add_call_ID_to_conn(conn, call);
- _leave(" = 0");
- return 0;
-
-no_free_channels:
- spin_unlock(&trans->client_lock);
- _leave(" = -ENOSR");
- return -ENOSR;
-}
-
-/*
- * find a connection for a call
- * - called in process context with IRQs enabled
- */
-int rxrpc_connect_call(struct rxrpc_sock *rx,
- struct rxrpc_transport *trans,
- struct rxrpc_conn_bundle *bundle,
- struct rxrpc_call *call,
- gfp_t gfp)
-{
- struct rxrpc_connection *conn, *candidate;
- int chan, ret;
-
- DECLARE_WAITQUEUE(myself, current);
-
- _enter("%p,%lx,", rx, call->user_call_ID);
-
- if (test_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags))
- return rxrpc_connect_exclusive(rx, trans, bundle->service_id,
- call, gfp);
-
- spin_lock(&trans->client_lock);
- for (;;) {
- /* see if the bundle has a call slot available */
- if (!list_empty(&bundle->avail_conns)) {
- _debug("avail");
- conn = list_entry(bundle->avail_conns.next,
- struct rxrpc_connection,
- bundle_link);
- if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- list_del_init(&conn->bundle_link);
- bundle->num_conns--;
- continue;
- }
- if (--conn->avail_calls == 0)
- list_move(&conn->bundle_link,
- &bundle->busy_conns);
- ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
- ASSERT(conn->channels[0] == NULL ||
- conn->channels[1] == NULL ||
- conn->channels[2] == NULL ||
- conn->channels[3] == NULL);
- atomic_inc(&conn->usage);
- break;
- }
-
- if (!list_empty(&bundle->unused_conns)) {
- _debug("unused");
- conn = list_entry(bundle->unused_conns.next,
- struct rxrpc_connection,
- bundle_link);
- if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- list_del_init(&conn->bundle_link);
- bundle->num_conns--;
- continue;
- }
- ASSERTCMP(conn->avail_calls, ==, RXRPC_MAXCALLS);
- conn->avail_calls = RXRPC_MAXCALLS - 1;
- ASSERT(conn->channels[0] == NULL &&
- conn->channels[1] == NULL &&
- conn->channels[2] == NULL &&
- conn->channels[3] == NULL);
- atomic_inc(&conn->usage);
- list_move(&conn->bundle_link, &bundle->avail_conns);
- break;
- }
-
- /* need to allocate a new connection */
- _debug("get new conn [%d]", bundle->num_conns);
-
- spin_unlock(&trans->client_lock);
-
- if (signal_pending(current))
- goto interrupted;
-
- if (bundle->num_conns >= 20) {
- _debug("too many conns");
-
- if (!gfpflags_allow_blocking(gfp)) {
- _leave(" = -EAGAIN");
- return -EAGAIN;
- }
-
- add_wait_queue(&bundle->chanwait, &myself);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (bundle->num_conns < 20 ||
- !list_empty(&bundle->unused_conns) ||
- !list_empty(&bundle->avail_conns))
- break;
- if (signal_pending(current))
- goto interrupted_dequeue;
- schedule();
- }
- remove_wait_queue(&bundle->chanwait, &myself);
- __set_current_state(TASK_RUNNING);
- spin_lock(&trans->client_lock);
- continue;
- }
-
- /* not yet present - create a candidate for a new connection and then
- * redo the check */
- candidate = rxrpc_alloc_connection(gfp);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
-
- candidate->trans = trans;
- candidate->bundle = bundle;
- candidate->service_id = bundle->service_id;
- candidate->epoch = rxrpc_epoch;
- candidate->in_clientflag = 0;
- candidate->out_clientflag = RXRPC_CLIENT_INITIATED;
- candidate->cid = 0;
- candidate->state = RXRPC_CONN_CLIENT;
- candidate->avail_calls = RXRPC_MAXCALLS;
- candidate->security_level = rx->min_sec_level;
- candidate->key = key_get(bundle->key);
-
- ret = rxrpc_init_client_conn_security(candidate);
- if (ret < 0) {
- key_put(candidate->key);
- kfree(candidate);
- _leave(" = %d [key]", ret);
- return ret;
- }
-
- write_lock_bh(&rxrpc_connection_lock);
- list_add_tail(&candidate->link, &rxrpc_connections);
- write_unlock_bh(&rxrpc_connection_lock);
-
- spin_lock(&trans->client_lock);
-
- list_add(&candidate->bundle_link, &bundle->unused_conns);
- bundle->num_conns++;
- atomic_inc(&bundle->usage);
- atomic_inc(&trans->usage);
-
- _net("CONNECT new %d on TRANS %d",
- candidate->debug_id, candidate->trans->debug_id);
-
- rxrpc_assign_connection_id(candidate);
- candidate->security->prime_packet_security(candidate);
-
- /* leave the candidate lurking in zombie mode attached to the
- * bundle until we're ready for it */
- rxrpc_put_connection(candidate);
- candidate = NULL;
- }
-
- /* we've got a connection with a free channel and we can now attach the
- * call to it
- * - we're holding the transport's client lock
- * - we're holding a reference on the connection
- * - we're holding a reference on the bundle
- */
- for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
- if (!conn->channels[chan])
- goto found_channel;
- ASSERT(conn->channels[0] == NULL ||
- conn->channels[1] == NULL ||
- conn->channels[2] == NULL ||
- conn->channels[3] == NULL);
- BUG();
-
-found_channel:
- conn->channels[chan] = call;
- call->conn = conn;
- call->channel = chan;
- call->cid = conn->cid | chan;
- call->call_id = ++conn->call_counter;
-
- _net("CONNECT client on conn %d chan %d as call %x",
- conn->debug_id, chan, call->call_id);
-
- ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
- spin_unlock(&trans->client_lock);
-
- rxrpc_add_call_ID_to_conn(conn, call);
-
- _leave(" = 0");
- return 0;
-
-interrupted_dequeue:
- remove_wait_queue(&bundle->chanwait, &myself);
- __set_current_state(TASK_RUNNING);
-interrupted:
- _leave(" = -ERESTARTSYS");
- return -ERESTARTSYS;
-}
-
-/*
- * get a record of an incoming connection
- */
-struct rxrpc_connection *
-rxrpc_incoming_connection(struct rxrpc_transport *trans,
- struct rxrpc_host_header *hdr)
-{
- struct rxrpc_connection *conn, *candidate = NULL;
- struct rb_node *p, **pp;
- const char *new = "old";
- __be32 epoch;
- u32 cid;
-
- _enter("");
-
- ASSERT(hdr->flags & RXRPC_CLIENT_INITIATED);
-
- epoch = hdr->epoch;
- cid = hdr->cid & RXRPC_CIDMASK;
-
- /* search the connection list first */
- read_lock_bh(&trans->conn_lock);
-
- p = trans->server_conns.rb_node;
- while (p) {
- conn = rb_entry(p, struct rxrpc_connection, node);
-
- _debug("maybe %x", conn->cid);
-
- if (epoch < conn->epoch)
- p = p->rb_left;
- else if (epoch > conn->epoch)
- p = p->rb_right;
- else if (cid < conn->cid)
- p = p->rb_left;
- else if (cid > conn->cid)
- p = p->rb_right;
- else
- goto found_extant_connection;
- }
- read_unlock_bh(&trans->conn_lock);
-
- /* not yet present - create a candidate for a new record and then
- * redo the search */
- candidate = rxrpc_alloc_connection(GFP_NOIO);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
- }
-
- candidate->trans = trans;
- candidate->epoch = hdr->epoch;
- candidate->cid = hdr->cid & RXRPC_CIDMASK;
- candidate->service_id = hdr->serviceId;
- candidate->security_ix = hdr->securityIndex;
- candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
- candidate->out_clientflag = 0;
- candidate->state = RXRPC_CONN_SERVER;
- if (candidate->service_id)
- candidate->state = RXRPC_CONN_SERVER_UNSECURED;
-
- write_lock_bh(&trans->conn_lock);
-
- pp = &trans->server_conns.rb_node;
- p = NULL;
- while (*pp) {
- p = *pp;
- conn = rb_entry(p, struct rxrpc_connection, node);
-
- if (epoch < conn->epoch)
- pp = &(*pp)->rb_left;
- else if (epoch > conn->epoch)
- pp = &(*pp)->rb_right;
- else if (cid < conn->cid)
- pp = &(*pp)->rb_left;
- else if (cid > conn->cid)
- pp = &(*pp)->rb_right;
- else
- goto found_extant_second;
- }
-
- /* we can now add the new candidate to the list */
- conn = candidate;
- candidate = NULL;
- rb_link_node(&conn->node, p, pp);
- rb_insert_color(&conn->node, &trans->server_conns);
- atomic_inc(&conn->trans->usage);
-
- write_unlock_bh(&trans->conn_lock);
-
- write_lock_bh(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
- write_unlock_bh(&rxrpc_connection_lock);
-
- new = "new";
-
-success:
- _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->cid);
-
- _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
- return conn;
-
- /* we found the connection in the list immediately */
-found_extant_connection:
- if (hdr->securityIndex != conn->security_ix) {
- read_unlock_bh(&trans->conn_lock);
- goto security_mismatch;
- }
- atomic_inc(&conn->usage);
- read_unlock_bh(&trans->conn_lock);
- goto success;
-
- /* we found the connection on the second time through the list */
-found_extant_second:
- if (hdr->securityIndex != conn->security_ix) {
- write_unlock_bh(&trans->conn_lock);
- goto security_mismatch;
- }
- atomic_inc(&conn->usage);
- write_unlock_bh(&trans->conn_lock);
- kfree(candidate);
- goto success;
-
-security_mismatch:
- kfree(candidate);
- _leave(" = -EKEYREJECTED");
- return ERR_PTR(-EKEYREJECTED);
-}
-
-/*
- * find a connection based on transport and RxRPC connection ID for an incoming
- * packet
- */
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans,
- struct rxrpc_host_header *hdr)
-{
- struct rxrpc_connection *conn;
- struct rb_node *p;
- u32 epoch, cid;
-
- _enter(",{%x,%x}", hdr->cid, hdr->flags);
-
- read_lock_bh(&trans->conn_lock);
-
- cid = hdr->cid & RXRPC_CIDMASK;
- epoch = hdr->epoch;
-
- if (hdr->flags & RXRPC_CLIENT_INITIATED)
- p = trans->server_conns.rb_node;
- else
- p = trans->client_conns.rb_node;
-
- while (p) {
- conn = rb_entry(p, struct rxrpc_connection, node);
-
- _debug("maybe %x", conn->cid);
-
- if (epoch < conn->epoch)
- p = p->rb_left;
- else if (epoch > conn->epoch)
- p = p->rb_right;
- else if (cid < conn->cid)
- p = p->rb_left;
- else if (cid > conn->cid)
- p = p->rb_right;
- else
- goto found;
- }
-
- read_unlock_bh(&trans->conn_lock);
- _leave(" = NULL");
- return NULL;
-
-found:
- atomic_inc(&conn->usage);
- read_unlock_bh(&trans->conn_lock);
- _leave(" = %p", conn);
- return conn;
-}
-
-/*
- * release a virtual connection
- */
-void rxrpc_put_connection(struct rxrpc_connection *conn)
-{
- _enter("%p{u=%d,d=%d}",
- conn, atomic_read(&conn->usage), conn->debug_id);
-
- ASSERTCMP(atomic_read(&conn->usage), >, 0);
-
- conn->put_time = ktime_get_seconds();
- if (atomic_dec_and_test(&conn->usage)) {
- _debug("zombie");
- rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
- }
-
- _leave("");
-}
-
-/*
- * destroy a virtual connection
- */
-static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
-{
- _enter("%p{%d}", conn, atomic_read(&conn->usage));
-
- ASSERTCMP(atomic_read(&conn->usage), ==, 0);
-
- _net("DESTROY CONN %d", conn->debug_id);
-
- if (conn->bundle)
- rxrpc_put_bundle(conn->trans, conn->bundle);
-
- ASSERT(RB_EMPTY_ROOT(&conn->calls));
- rxrpc_purge_queue(&conn->rx_queue);
-
- conn->security->clear(conn);
- key_put(conn->key);
- key_put(conn->server_key);
-
- rxrpc_put_transport(conn->trans);
- kfree(conn);
- _leave("");
-}
-
-/*
- * reap dead connections
- */
-static void rxrpc_connection_reaper(struct work_struct *work)
-{
- struct rxrpc_connection *conn, *_p;
- unsigned long now, earliest, reap_time;
-
- LIST_HEAD(graveyard);
-
- _enter("");
-
- now = ktime_get_seconds();
- earliest = ULONG_MAX;
-
- write_lock_bh(&rxrpc_connection_lock);
- list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
- _debug("reap CONN %d { u=%d,t=%ld }",
- conn->debug_id, atomic_read(&conn->usage),
- (long) now - (long) conn->put_time);
-
- if (likely(atomic_read(&conn->usage) > 0))
- continue;
-
- spin_lock(&conn->trans->client_lock);
- write_lock(&conn->trans->conn_lock);
- reap_time = conn->put_time + rxrpc_connection_expiry;
-
- if (atomic_read(&conn->usage) > 0) {
- ;
- } else if (reap_time <= now) {
- list_move_tail(&conn->link, &graveyard);
- if (conn->out_clientflag)
- rb_erase(&conn->node,
- &conn->trans->client_conns);
- else
- rb_erase(&conn->node,
- &conn->trans->server_conns);
- if (conn->bundle) {
- list_del_init(&conn->bundle_link);
- conn->bundle->num_conns--;
- }
-
- } else if (reap_time < earliest) {
- earliest = reap_time;
- }
-
- write_unlock(&conn->trans->conn_lock);
- spin_unlock(&conn->trans->client_lock);
- }
- write_unlock_bh(&rxrpc_connection_lock);
-
- if (earliest != ULONG_MAX) {
- _debug("reschedule reaper %ld", (long) earliest - now);
- ASSERTCMP(earliest, >, now);
- rxrpc_queue_delayed_work(&rxrpc_connection_reap,
- (earliest - now) * HZ);
- }
-
- /* then destroy all those pulled out */
- while (!list_empty(&graveyard)) {
- conn = list_entry(graveyard.next, struct rxrpc_connection,
- link);
- list_del_init(&conn->link);
-
- ASSERTCMP(atomic_read(&conn->usage), ==, 0);
- rxrpc_destroy_connection(conn);
- }
-
- _leave("");
-}
-
-/*
- * preemptively destroy all the connection records rather than waiting for them
- * to time out
- */
-void __exit rxrpc_destroy_all_connections(void)
-{
- _enter("");
-
- rxrpc_connection_expiry = 0;
- cancel_delayed_work(&rxrpc_connection_reap);
- rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
-
- _leave("");
-}
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
deleted file mode 100644
index 3e82d6f0313c..000000000000
--- a/net/rxrpc/ar-error.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/* Error message handling (ICMP)
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/errqueue.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
-#include <net/ip.h>
-#include "ar-internal.h"
-
-/*
- * handle an error received on the local endpoint
- */
-void rxrpc_UDP_error_report(struct sock *sk)
-{
- struct sock_exterr_skb *serr;
- struct rxrpc_transport *trans;
- struct rxrpc_local *local = sk->sk_user_data;
- struct rxrpc_peer *peer;
- struct sk_buff *skb;
- __be32 addr;
- __be16 port;
-
- _enter("%p{%d}", sk, local->debug_id);
-
- skb = sock_dequeue_err_skb(sk);
- if (!skb) {
- _leave("UDP socket errqueue empty");
- return;
- }
- serr = SKB_EXT_ERR(skb);
- if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
- _leave("UDP empty message");
- kfree_skb(skb);
- return;
- }
-
- rxrpc_new_skb(skb);
-
- addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset);
- port = serr->port;
-
- _net("Rx UDP Error from %pI4:%hu", &addr, ntohs(port));
- _debug("Msg l:%d d:%d", skb->len, skb->data_len);
-
- peer = rxrpc_find_peer(local, addr, port);
- if (IS_ERR(peer)) {
- rxrpc_free_skb(skb);
- _leave(" [no peer]");
- return;
- }
-
- trans = rxrpc_find_transport(local, peer);
- if (!trans) {
- rxrpc_put_peer(peer);
- rxrpc_free_skb(skb);
- _leave(" [no trans]");
- return;
- }
-
- if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
- serr->ee.ee_type == ICMP_DEST_UNREACH &&
- serr->ee.ee_code == ICMP_FRAG_NEEDED
- ) {
- u32 mtu = serr->ee.ee_info;
-
- _net("Rx Received ICMP Fragmentation Needed (%d)", mtu);
-
- /* wind down the local interface MTU */
- if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
- peer->if_mtu = mtu;
- _net("I/F MTU %u", mtu);
- }
-
- if (mtu == 0) {
- /* they didn't give us a size, estimate one */
- mtu = peer->if_mtu;
- if (mtu > 1500) {
- mtu >>= 1;
- if (mtu < 1500)
- mtu = 1500;
- } else {
- mtu -= 100;
- if (mtu < peer->hdrsize)
- mtu = peer->hdrsize + 4;
- }
- }
-
- if (mtu < peer->mtu) {
- spin_lock_bh(&peer->lock);
- peer->mtu = mtu;
- peer->maxdata = peer->mtu - peer->hdrsize;
- spin_unlock_bh(&peer->lock);
- _net("Net MTU %u (maxdata %u)",
- peer->mtu, peer->maxdata);
- }
- }
-
- rxrpc_put_peer(peer);
-
- /* pass the transport ref to error_handler to release */
- skb_queue_tail(&trans->error_queue, skb);
- rxrpc_queue_work(&trans->error_handler);
- _leave("");
-}
-
-/*
- * deal with UDP error messages
- */
-void rxrpc_UDP_error_handler(struct work_struct *work)
-{
- struct sock_extended_err *ee;
- struct sock_exterr_skb *serr;
- struct rxrpc_transport *trans =
- container_of(work, struct rxrpc_transport, error_handler);
- struct sk_buff *skb;
- int err;
-
- _enter("");
-
- skb = skb_dequeue(&trans->error_queue);
- if (!skb)
- return;
-
- serr = SKB_EXT_ERR(skb);
- ee = &serr->ee;
-
- _net("Rx Error o=%d t=%d c=%d e=%d",
- ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
-
- err = ee->ee_errno;
-
- switch (ee->ee_origin) {
- case SO_EE_ORIGIN_ICMP:
- switch (ee->ee_type) {
- case ICMP_DEST_UNREACH:
- switch (ee->ee_code) {
- case ICMP_NET_UNREACH:
- _net("Rx Received ICMP Network Unreachable");
- break;
- case ICMP_HOST_UNREACH:
- _net("Rx Received ICMP Host Unreachable");
- break;
- case ICMP_PORT_UNREACH:
- _net("Rx Received ICMP Port Unreachable");
- break;
- case ICMP_NET_UNKNOWN:
- _net("Rx Received ICMP Unknown Network");
- break;
- case ICMP_HOST_UNKNOWN:
- _net("Rx Received ICMP Unknown Host");
- break;
- default:
- _net("Rx Received ICMP DestUnreach code=%u",
- ee->ee_code);
- break;
- }
- break;
-
- case ICMP_TIME_EXCEEDED:
- _net("Rx Received ICMP TTL Exceeded");
- break;
-
- default:
- _proto("Rx Received ICMP error { type=%u code=%u }",
- ee->ee_type, ee->ee_code);
- break;
- }
- break;
-
- case SO_EE_ORIGIN_LOCAL:
- _proto("Rx Received local error { error=%d }",
- ee->ee_errno);
- break;
-
- case SO_EE_ORIGIN_NONE:
- case SO_EE_ORIGIN_ICMP6:
- default:
- _proto("Rx Received error report { orig=%u }",
- ee->ee_origin);
- break;
- }
-
- /* terminate all the affected calls if there's an unrecoverable
- * error */
- if (err) {
- struct rxrpc_call *call, *_n;
-
- _debug("ISSUE ERROR %d", err);
-
- spin_lock_bh(&trans->peer->lock);
- trans->peer->net_error = err;
-
- list_for_each_entry_safe(call, _n, &trans->peer->error_targets,
- error_link) {
- write_lock(&call->state_lock);
- if (call->state != RXRPC_CALL_COMPLETE &&
- call->state < RXRPC_CALL_NETWORK_ERROR) {
- call->state = RXRPC_CALL_NETWORK_ERROR;
- set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
- rxrpc_queue_call(call);
- }
- write_unlock(&call->state_lock);
- list_del_init(&call->error_link);
- }
-
- spin_unlock_bh(&trans->peer->lock);
- }
-
- if (!skb_queue_empty(&trans->error_queue))
- rxrpc_queue_work(&trans->error_handler);
-
- rxrpc_free_skb(skb);
- rxrpc_put_transport(trans);
- _leave("");
-}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index f0b807a163fa..702db72196fb 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -9,7 +9,9 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/atomic.h>
#include <net/sock.h>
+#include <net/af_rxrpc.h>
#include <rxrpc/packet.h>
#if 0
@@ -35,13 +37,15 @@ struct rxrpc_crypt {
#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor)
#define rxrpc_queue_conn(CONN) rxrpc_queue_work(&(CONN)->processor)
+struct rxrpc_connection;
+
/*
* sk_state for RxRPC sockets
*/
enum {
- RXRPC_UNCONNECTED = 0,
+ RXRPC_UNBOUND = 0,
+ RXRPC_CLIENT_UNBOUND, /* Unbound socket used as client */
RXRPC_CLIENT_BOUND, /* client local address bound */
- RXRPC_CLIENT_CONNECTED, /* client is connected */
RXRPC_SERVER_BOUND, /* server local address bound */
RXRPC_SERVER_LISTENING, /* server listening for connections */
RXRPC_CLOSE, /* socket is being closed */
@@ -55,9 +59,6 @@ struct rxrpc_sock {
struct sock sk;
rxrpc_interceptor_t interceptor; /* kernel service Rx interceptor function */
struct rxrpc_local *local; /* local endpoint */
- struct rxrpc_transport *trans; /* transport handler */
- struct rxrpc_conn_bundle *bundle; /* virtual connection bundle */
- struct rxrpc_connection *conn; /* exclusive virtual connection */
struct list_head listen_link; /* link in the local endpoint's listen list */
struct list_head secureq; /* calls awaiting connection security clearance */
struct list_head acceptq; /* calls awaiting acceptance */
@@ -65,12 +66,14 @@ struct rxrpc_sock {
struct key *securities; /* list of server security descriptors */
struct rb_root calls; /* outstanding calls on this socket */
unsigned long flags;
-#define RXRPC_SOCK_EXCLUSIVE_CONN 1 /* exclusive connection for a client socket */
+#define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */
rwlock_t call_lock; /* lock for calls */
u32 min_sec_level; /* minimum security level */
#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT
+ bool exclusive; /* Exclusive connection for a client socket */
+ sa_family_t family; /* Protocol family created with */
struct sockaddr_rxrpc srx; /* local address */
- sa_family_t proto; /* protocol created with */
+ struct sockaddr_rxrpc connect_srx; /* Default client address from connect() */
};
#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk)
@@ -168,46 +171,52 @@ struct rxrpc_security {
};
/*
- * RxRPC local transport endpoint definition
- * - matched by local port, address and protocol type
+ * RxRPC local transport endpoint description
+ * - owned by a single AF_RXRPC socket
+ * - pointed to by transport socket struct sk_user_data
*/
struct rxrpc_local {
+ struct rcu_head rcu;
+ atomic_t usage;
+ struct list_head link;
struct socket *socket; /* my UDP socket */
- struct work_struct destroyer; /* endpoint destroyer */
- struct work_struct acceptor; /* incoming call processor */
- struct work_struct rejecter; /* packet reject writer */
- struct work_struct event_processor; /* endpoint event processor */
+ struct work_struct processor;
struct list_head services; /* services listening on this endpoint */
- struct list_head link; /* link in endpoint list */
struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */
struct sk_buff_head reject_queue; /* packets awaiting rejection */
struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */
+ struct rb_root client_conns; /* Client connections by socket params */
+ spinlock_t client_conns_lock; /* Lock for client_conns */
spinlock_t lock; /* access lock */
rwlock_t services_lock; /* lock for services list */
- atomic_t usage;
int debug_id; /* debug ID for printks */
- volatile char error_rcvd; /* T if received ICMP error outstanding */
+ bool dead;
struct sockaddr_rxrpc srx; /* local address */
};
/*
* RxRPC remote transport endpoint definition
- * - matched by remote port, address and protocol type
- * - holds the connection ID counter for connections between the two endpoints
+ * - matched by local endpoint, remote port, address and protocol type
*/
struct rxrpc_peer {
- struct work_struct destroyer; /* peer destroyer */
- struct list_head link; /* link in master peer list */
- struct list_head error_targets; /* targets for net error distribution */
- spinlock_t lock; /* access lock */
+ struct rcu_head rcu; /* This must be first */
atomic_t usage;
+ unsigned long hash_key;
+ struct hlist_node hash_link;
+ struct rxrpc_local *local;
+ struct hlist_head error_targets; /* targets for net error distribution */
+ struct work_struct error_distributor;
+ struct rb_root service_conns; /* Service connections */
+ rwlock_t conn_lock;
+ spinlock_t lock; /* access lock */
unsigned int if_mtu; /* interface MTU for this peer */
unsigned int mtu; /* network MTU for this peer */
unsigned int maxdata; /* data size (MTU - hdrsize) */
unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */
int debug_id; /* debug ID for printks */
- int net_error; /* network error distributed */
+ int error_report; /* Net (+0) or local (+1000000) to distribute */
+#define RXRPC_LOCAL_ERROR_OFFSET 1000000
struct sockaddr_rxrpc srx; /* remote address */
/* calculated RTT cache */
@@ -219,68 +228,63 @@ struct rxrpc_peer {
};
/*
- * RxRPC point-to-point transport / connection manager definition
- * - handles a bundle of connections between two endpoints
- * - matched by { local, peer }
- */
-struct rxrpc_transport {
- struct rxrpc_local *local; /* local transport endpoint */
- struct rxrpc_peer *peer; /* remote transport endpoint */
- struct work_struct error_handler; /* network error distributor */
- struct rb_root bundles; /* client connection bundles on this transport */
- struct rb_root client_conns; /* client connections on this transport */
- struct rb_root server_conns; /* server connections on this transport */
- struct list_head link; /* link in master session list */
- struct sk_buff_head error_queue; /* error packets awaiting processing */
- unsigned long put_time; /* time at which to reap */
- spinlock_t client_lock; /* client connection allocation lock */
- rwlock_t conn_lock; /* lock for active/dead connections */
- atomic_t usage;
- int debug_id; /* debug ID for printks */
- unsigned int conn_idcounter; /* connection ID counter (client) */
+ * Keys for matching a connection.
+ */
+struct rxrpc_conn_proto {
+ unsigned long hash_key;
+ struct rxrpc_local *local; /* Representation of local endpoint */
+ u32 epoch; /* epoch of this connection */
+ u32 cid; /* connection ID */
+ u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */
+ u8 addr_size; /* Size of the address */
+ sa_family_t family; /* Transport protocol */
+ __be16 port; /* Peer UDP/UDP6 port */
+ union { /* Peer address */
+ struct in_addr ipv4_addr;
+ struct in6_addr ipv6_addr;
+ u32 raw_addr[0];
+ };
};
-/*
- * RxRPC client connection bundle
- * - matched by { transport, service_id, key }
- */
-struct rxrpc_conn_bundle {
- struct rb_node node; /* node in transport's lookup tree */
- struct list_head unused_conns; /* unused connections in this bundle */
- struct list_head avail_conns; /* available connections in this bundle */
- struct list_head busy_conns; /* busy connections in this bundle */
- struct key *key; /* security for this bundle */
- wait_queue_head_t chanwait; /* wait for channel to become available */
- atomic_t usage;
- int debug_id; /* debug ID for printks */
- unsigned short num_conns; /* number of connections in this bundle */
- u16 service_id; /* Service ID for this bundle */
- u8 security_ix; /* security type */
+struct rxrpc_conn_parameters {
+ struct rxrpc_local *local; /* Representation of local endpoint */
+ struct rxrpc_peer *peer; /* Remote endpoint */
+ struct key *key; /* Security details */
+ bool exclusive; /* T if conn is exclusive */
+ u16 service_id; /* Service ID for this connection */
+ u32 security_level; /* Security level selected */
};
/*
* RxRPC connection definition
- * - matched by { transport, service_id, conn_id, direction, key }
+ * - matched by { local, peer, epoch, conn_id, direction }
* - each connection can only handle four simultaneous calls
*/
struct rxrpc_connection {
- struct rxrpc_transport *trans; /* transport session */
- struct rxrpc_conn_bundle *bundle; /* connection bundle (client) */
+ struct rxrpc_conn_proto proto;
+ struct rxrpc_conn_parameters params;
+
+ spinlock_t channel_lock;
+ struct rxrpc_call *channels[RXRPC_MAXCALLS]; /* active calls */
+ wait_queue_head_t channel_wq; /* queue to wait for channel to become available */
+
struct work_struct processor; /* connection event processor */
- struct rb_node node; /* node in transport's lookup tree */
+ union {
+ struct rb_node client_node; /* Node in local->client_conns */
+ struct rb_node service_node; /* Node in peer->service_conns */
+ };
struct list_head link; /* link in master connection list */
- struct list_head bundle_link; /* link in bundle */
struct rb_root calls; /* calls on this connection */
struct sk_buff_head rx_queue; /* received conn-level packets */
- struct rxrpc_call *channels[RXRPC_MAXCALLS]; /* channels (active calls) */
const struct rxrpc_security *security; /* applied security module */
- struct key *key; /* security for this connection (client) */
struct key *server_key; /* security for this service */
struct crypto_skcipher *cipher; /* encryption handle */
struct rxrpc_crypt csum_iv; /* packet checksum base */
+ unsigned long flags;
+#define RXRPC_CONN_HAS_IDR 0 /* - Has a client conn ID assigned */
unsigned long events;
#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
- unsigned long put_time; /* time at which to reap */
+ unsigned long put_time; /* Time at which last put */
rwlock_t lock; /* access lock */
spinlock_t state_lock; /* state-change lock */
atomic_t usage;
@@ -301,17 +305,12 @@ struct rxrpc_connection {
unsigned int call_counter; /* call ID counter */
atomic_t serial; /* packet serial number counter */
atomic_t hi_serial; /* highest serial number received */
- u8 avail_calls; /* number of calls available */
+ atomic_t avail_chans; /* number of channels available */
u8 size_align; /* data size alignment (for security) */
u8 header_size; /* rxrpc + security header size */
u8 security_size; /* security header size */
- u32 security_level; /* security level negotiated */
u32 security_nonce; /* response re-use preventer */
- u32 epoch; /* epoch of this connection */
- u32 cid; /* connection ID */
- u16 service_id; /* service ID for this connection */
u8 security_ix; /* security type */
- u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */
u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
};
@@ -357,6 +356,8 @@ enum rxrpc_call_event {
* The states that a call can be in.
*/
enum rxrpc_call_state {
+ RXRPC_CALL_UNINITIALISED,
+ RXRPC_CALL_CLIENT_AWAIT_CONN, /* - client waiting for connection to become available */
RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
@@ -390,7 +391,7 @@ struct rxrpc_call {
struct work_struct destroyer; /* call destroyer */
struct work_struct processor; /* packet processor and ACK generator */
struct list_head link; /* link in master call list */
- struct list_head error_link; /* link in error distribution list */
+ struct hlist_node error_link; /* link in error distribution list */
struct list_head accept_link; /* calls awaiting acceptance */
struct rb_node sock_node; /* node in socket call tree */
struct rb_node conn_node; /* node in connection call tree */
@@ -408,7 +409,8 @@ struct rxrpc_call {
atomic_t sequence; /* Tx data packet sequence counter */
u32 local_abort; /* local abort code */
u32 remote_abort; /* remote abort code */
- int error; /* local error incurred */
+ int error_report; /* Network error (ICMP/local transport) */
+ int error; /* Local error incurred */
enum rxrpc_call_state state : 8; /* current state of call */
int debug_id; /* debug ID for printks */
u8 channel; /* connection channel occupied by this call */
@@ -444,7 +446,7 @@ struct rxrpc_call {
unsigned long hash_key; /* Full hash key */
u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */
struct rxrpc_local *local; /* Local endpoint. Used for hashing. */
- sa_family_t proto; /* Frame protocol */
+ sa_family_t family; /* Frame protocol */
u32 call_id; /* call ID on connection */
u32 cid; /* connection ID plus channel index */
u32 epoch; /* epoch of this connection */
@@ -478,21 +480,21 @@ extern atomic_t rxrpc_debug_id;
extern struct workqueue_struct *rxrpc_workqueue;
/*
- * ar-accept.c
+ * call_accept.c
*/
-void rxrpc_accept_incoming_calls(struct work_struct *);
+void rxrpc_accept_incoming_calls(struct rxrpc_local *);
struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long);
int rxrpc_reject_call(struct rxrpc_sock *);
/*
- * ar-ack.c
+ * call_event.c
*/
void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
void rxrpc_process_call(struct work_struct *);
/*
- * ar-call.c
+ * call_object.c
*/
extern unsigned int rxrpc_max_call_lifetime;
extern unsigned int rxrpc_dead_call_expiry;
@@ -502,70 +504,82 @@ extern rwlock_t rxrpc_call_lock;
struct rxrpc_call *rxrpc_find_call_hash(struct rxrpc_host_header *,
void *, sa_family_t, const void *);
-struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
- struct rxrpc_transport *,
- struct rxrpc_conn_bundle *,
- unsigned long, int, gfp_t);
+struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
+struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
+ struct rxrpc_conn_parameters *,
+ struct sockaddr_rxrpc *,
+ unsigned long, gfp_t);
struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
struct rxrpc_connection *,
- struct rxrpc_host_header *);
-struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long);
+ struct sk_buff *);
void rxrpc_release_call(struct rxrpc_call *);
void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
void __rxrpc_put_call(struct rxrpc_call *);
void __exit rxrpc_destroy_all_calls(void);
/*
- * ar-connection.c
+ * conn_client.c
*/
-extern unsigned int rxrpc_connection_expiry;
-extern struct list_head rxrpc_connections;
-extern rwlock_t rxrpc_connection_lock;
+extern struct idr rxrpc_client_conn_ids;
-struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
- struct rxrpc_transport *,
- struct key *, u16, gfp_t);
-void rxrpc_put_bundle(struct rxrpc_transport *, struct rxrpc_conn_bundle *);
-int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
- struct rxrpc_conn_bundle *, struct rxrpc_call *, gfp_t);
-void rxrpc_put_connection(struct rxrpc_connection *);
-void __exit rxrpc_destroy_all_connections(void);
-struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
- struct rxrpc_host_header *);
-extern struct rxrpc_connection *
-rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_host_header *);
+int rxrpc_get_client_connection_id(struct rxrpc_connection *, gfp_t);
+void rxrpc_put_client_connection_id(struct rxrpc_connection *);
/*
- * ar-connevent.c
+ * conn_event.c
*/
void rxrpc_process_connection(struct work_struct *);
void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
-void rxrpc_reject_packets(struct work_struct *);
+void rxrpc_reject_packets(struct rxrpc_local *);
/*
- * ar-error.c
+ * conn_object.c
*/
-void rxrpc_UDP_error_report(struct sock *);
-void rxrpc_UDP_error_handler(struct work_struct *);
+extern unsigned int rxrpc_connection_expiry;
+extern struct list_head rxrpc_connections;
+extern rwlock_t rxrpc_connection_lock;
+
+int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
+ struct sockaddr_rxrpc *, gfp_t);
+struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_local *,
+ struct rxrpc_peer *,
+ struct sk_buff *);
+void rxrpc_disconnect_call(struct rxrpc_call *);
+void rxrpc_put_connection(struct rxrpc_connection *);
+void __exit rxrpc_destroy_all_connections(void);
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
+ struct rxrpc_peer *,
+ struct sk_buff *);
+
+static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
+{
+ return conn->out_clientflag;
+}
+
+static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn)
+{
+ return conn->proto.in_clientflag;
+}
+
+static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
+{
+ atomic_inc(&conn->usage);
+}
/*
- * ar-input.c
+ * input.c
*/
void rxrpc_data_ready(struct sock *);
int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool);
void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
/*
- * ar-local.c
+ * insecure.c
*/
-extern rwlock_t rxrpc_local_lock;
-
-struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
-void rxrpc_put_local(struct rxrpc_local *);
-void __exit rxrpc_destroy_all_locals(void);
+extern const struct rxrpc_security rxrpc_no_security;
/*
- * ar-key.c
+ * key.c
*/
extern struct key_type key_type_rxrpc;
extern struct key_type key_type_rxrpc_s;
@@ -576,80 +590,103 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
u32);
/*
- * ar-output.c
+ * local_event.c
*/
-extern unsigned int rxrpc_resend_timeout;
-
-int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
-int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *,
- struct msghdr *, size_t);
-int rxrpc_server_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
+extern void rxrpc_process_local_events(struct rxrpc_local *);
/*
- * ar-peer.c
+ * local_object.c
*/
-struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t);
-void rxrpc_put_peer(struct rxrpc_peer *);
-struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *, __be32, __be16);
-void __exit rxrpc_destroy_all_peers(void);
+struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *);
+void __rxrpc_put_local(struct rxrpc_local *);
+void __exit rxrpc_destroy_all_locals(void);
-/*
- * ar-proc.c
- */
-extern const char *const rxrpc_call_states[];
-extern const struct file_operations rxrpc_call_seq_fops;
-extern const struct file_operations rxrpc_connection_seq_fops;
+static inline void rxrpc_get_local(struct rxrpc_local *local)
+{
+ atomic_inc(&local->usage);
+}
+
+static inline
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
+{
+ return atomic_inc_not_zero(&local->usage) ? local : NULL;
+}
+
+static inline void rxrpc_put_local(struct rxrpc_local *local)
+{
+ if (local && atomic_dec_and_test(&local->usage))
+ __rxrpc_put_local(local);
+}
/*
- * ar-recvmsg.c
+ * misc.c
*/
-void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
-int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
+extern unsigned int rxrpc_max_backlog __read_mostly;
+extern unsigned int rxrpc_requested_ack_delay;
+extern unsigned int rxrpc_soft_ack_delay;
+extern unsigned int rxrpc_idle_ack_delay;
+extern unsigned int rxrpc_rx_window_size;
+extern unsigned int rxrpc_rx_mtu;
+extern unsigned int rxrpc_rx_jumbo_max;
+
+extern const char *const rxrpc_pkts[];
+extern const s8 rxrpc_ack_priority[];
+
+extern const char *rxrpc_acks(u8 reason);
/*
- * ar-security.c
+ * output.c
*/
-int __init rxrpc_init_security(void);
-void rxrpc_exit_security(void);
-int rxrpc_init_client_conn_security(struct rxrpc_connection *);
-int rxrpc_init_server_conn_security(struct rxrpc_connection *);
+extern unsigned int rxrpc_resend_timeout;
+
+int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *);
+int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
/*
- * ar-skbuff.c
+ * peer_event.c
*/
-void rxrpc_packet_destructor(struct sk_buff *);
+void rxrpc_error_report(struct sock *);
+void rxrpc_peer_error_distributor(struct work_struct *);
/*
- * ar-transport.c
+ * peer_object.c
*/
-extern unsigned int rxrpc_transport_expiry;
+struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
+ const struct sockaddr_rxrpc *);
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
+ struct sockaddr_rxrpc *, gfp_t);
+struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
+
+static inline void rxrpc_get_peer(struct rxrpc_peer *peer)
+{
+ atomic_inc(&peer->usage);
+}
+
+static inline
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
+{
+ return atomic_inc_not_zero(&peer->usage) ? peer : NULL;
+}
-struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
- struct rxrpc_peer *, gfp_t);
-void rxrpc_put_transport(struct rxrpc_transport *);
-void __exit rxrpc_destroy_all_transports(void);
-struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
- struct rxrpc_peer *);
+extern void __rxrpc_put_peer(struct rxrpc_peer *peer);
+static inline void rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+ if (peer && atomic_dec_and_test(&peer->usage))
+ __rxrpc_put_peer(peer);
+}
/*
- * insecure.c
+ * proc.c
*/
-extern const struct rxrpc_security rxrpc_no_security;
+extern const char *const rxrpc_call_states[];
+extern const struct file_operations rxrpc_call_seq_fops;
+extern const struct file_operations rxrpc_connection_seq_fops;
/*
- * misc.c
+ * recvmsg.c
*/
-extern unsigned int rxrpc_requested_ack_delay;
-extern unsigned int rxrpc_soft_ack_delay;
-extern unsigned int rxrpc_idle_ack_delay;
-extern unsigned int rxrpc_rx_window_size;
-extern unsigned int rxrpc_rx_mtu;
-extern unsigned int rxrpc_rx_jumbo_max;
-
-extern const char *const rxrpc_pkts[];
-extern const s8 rxrpc_ack_priority[];
-
-extern const char *rxrpc_acks(u8 reason);
+void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
+int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
/*
* rxkad.c
@@ -659,6 +696,19 @@ extern const struct rxrpc_security rxkad;
#endif
/*
+ * security.c
+ */
+int __init rxrpc_init_security(void);
+void rxrpc_exit_security(void);
+int rxrpc_init_client_conn_security(struct rxrpc_connection *);
+int rxrpc_init_server_conn_security(struct rxrpc_connection *);
+
+/*
+ * skbuff.c
+ */
+void rxrpc_packet_destructor(struct sk_buff *);
+
+/*
* sysctl.c
*/
#ifdef CONFIG_SYSCTL
@@ -670,6 +720,12 @@ static inline void rxrpc_sysctl_exit(void) {}
#endif
/*
+ * utils.c
+ */
+void rxrpc_get_addr_from_skb(struct rxrpc_local *, const struct sk_buff *,
+ struct sockaddr_rxrpc *);
+
+/*
* debug tracing
*/
extern unsigned int rxrpc_debug;
@@ -744,21 +800,18 @@ do { \
#define ASSERT(X) \
do { \
if (unlikely(!(X))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ pr_err("Assertion failed\n"); \
BUG(); \
} \
} while (0)
#define ASSERTCMP(X, OP, Y) \
do { \
- if (unlikely(!((X) OP (Y)))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "RxRPC: Assertion failed\n"); \
- printk(KERN_ERR "%lu " #OP " %lu is false\n", \
- (unsigned long)(X), (unsigned long)(Y)); \
- printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
- (unsigned long)(X), (unsigned long)(Y)); \
+ unsigned long _x = (unsigned long)(X); \
+ unsigned long _y = (unsigned long)(Y); \
+ if (unlikely(!(_x OP _y))) { \
+ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
+ _x, _x, #OP, _y, _y); \
BUG(); \
} \
} while (0)
@@ -766,21 +819,18 @@ do { \
#define ASSERTIF(C, X) \
do { \
if (unlikely((C) && !(X))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ pr_err("Assertion failed\n"); \
BUG(); \
} \
} while (0)
#define ASSERTIFCMP(C, X, OP, Y) \
do { \
- if (unlikely((C) && !((X) OP (Y)))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "RxRPC: Assertion failed\n"); \
- printk(KERN_ERR "%lu " #OP " %lu is false\n", \
- (unsigned long)(X), (unsigned long)(Y)); \
- printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
- (unsigned long)(X), (unsigned long)(Y)); \
+ unsigned long _x = (unsigned long)(X); \
+ unsigned long _y = (unsigned long)(Y); \
+ if (unlikely((C) && !(_x OP _y))) { \
+ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
+ _x, _x, #OP, _y, _y); \
BUG(); \
} \
} while (0)
@@ -844,15 +894,6 @@ static inline void rxrpc_purge_queue(struct sk_buff_head *list)
rxrpc_free_skb(skb);
}
-static inline void __rxrpc_get_local(struct rxrpc_local *local, const char *f)
-{
- CHECK_SLAB_OKAY(&local->usage);
- if (atomic_inc_return(&local->usage) == 1)
- printk("resurrected (%s)\n", f);
-}
-
-#define rxrpc_get_local(LOCAL) __rxrpc_get_local((LOCAL), __func__)
-
#define rxrpc_get_call(CALL) \
do { \
CHECK_SLAB_OKAY(&(CALL)->usage); \
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
deleted file mode 100644
index 4e1e6db0050b..000000000000
--- a/net/rxrpc/ar-local.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/* AF_RXRPC local endpoint management
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
-#include <generated/utsrelease.h>
-#include "ar-internal.h"
-
-static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC";
-
-static LIST_HEAD(rxrpc_locals);
-DEFINE_RWLOCK(rxrpc_local_lock);
-static DECLARE_RWSEM(rxrpc_local_sem);
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_local_wq);
-
-static void rxrpc_destroy_local(struct work_struct *work);
-static void rxrpc_process_local_events(struct work_struct *work);
-
-/*
- * allocate a new local
- */
-static
-struct rxrpc_local *rxrpc_alloc_local(struct sockaddr_rxrpc *srx)
-{
- struct rxrpc_local *local;
-
- local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
- if (local) {
- INIT_WORK(&local->destroyer, &rxrpc_destroy_local);
- INIT_WORK(&local->acceptor, &rxrpc_accept_incoming_calls);
- INIT_WORK(&local->rejecter, &rxrpc_reject_packets);
- INIT_WORK(&local->event_processor, &rxrpc_process_local_events);
- INIT_LIST_HEAD(&local->services);
- INIT_LIST_HEAD(&local->link);
- init_rwsem(&local->defrag_sem);
- skb_queue_head_init(&local->accept_queue);
- skb_queue_head_init(&local->reject_queue);
- skb_queue_head_init(&local->event_queue);
- spin_lock_init(&local->lock);
- rwlock_init(&local->services_lock);
- atomic_set(&local->usage, 1);
- local->debug_id = atomic_inc_return(&rxrpc_debug_id);
- memcpy(&local->srx, srx, sizeof(*srx));
- }
-
- _leave(" = %p", local);
- return local;
-}
-
-/*
- * create the local socket
- * - must be called with rxrpc_local_sem writelocked
- */
-static int rxrpc_create_local(struct rxrpc_local *local)
-{
- struct sock *sock;
- int ret, opt;
-
- _enter("%p{%d}", local, local->srx.transport_type);
-
- /* create a socket to represent the local endpoint */
- ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type,
- IPPROTO_UDP, &local->socket);
- if (ret < 0) {
- _leave(" = %d [socket]", ret);
- return ret;
- }
-
- /* if a local address was supplied then bind it */
- if (local->srx.transport_len > sizeof(sa_family_t)) {
- _debug("bind");
- ret = kernel_bind(local->socket,
- (struct sockaddr *) &local->srx.transport,
- local->srx.transport_len);
- if (ret < 0) {
- _debug("bind failed");
- goto error;
- }
- }
-
- /* we want to receive ICMP errors */
- opt = 1;
- ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
-
- /* we want to set the don't fragment bit */
- opt = IP_PMTUDISC_DO;
- ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
-
- write_lock_bh(&rxrpc_local_lock);
- list_add(&local->link, &rxrpc_locals);
- write_unlock_bh(&rxrpc_local_lock);
-
- /* set the socket up */
- sock = local->socket->sk;
- sock->sk_user_data = local;
- sock->sk_data_ready = rxrpc_data_ready;
- sock->sk_error_report = rxrpc_UDP_error_report;
- _leave(" = 0");
- return 0;
-
-error:
- kernel_sock_shutdown(local->socket, SHUT_RDWR);
- local->socket->sk->sk_user_data = NULL;
- sock_release(local->socket);
- local->socket = NULL;
-
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
- * create a new local endpoint using the specified UDP address
- */
-struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *srx)
-{
- struct rxrpc_local *local;
- int ret;
-
- _enter("{%d,%u,%pI4+%hu}",
- srx->transport_type,
- srx->transport.family,
- &srx->transport.sin.sin_addr,
- ntohs(srx->transport.sin.sin_port));
-
- down_write(&rxrpc_local_sem);
-
- /* see if we have a suitable local local endpoint already */
- read_lock_bh(&rxrpc_local_lock);
-
- list_for_each_entry(local, &rxrpc_locals, link) {
- _debug("CMP {%d,%u,%pI4+%hu}",
- local->srx.transport_type,
- local->srx.transport.family,
- &local->srx.transport.sin.sin_addr,
- ntohs(local->srx.transport.sin.sin_port));
-
- if (local->srx.transport_type != srx->transport_type ||
- local->srx.transport.family != srx->transport.family)
- continue;
-
- switch (srx->transport.family) {
- case AF_INET:
- if (local->srx.transport.sin.sin_port !=
- srx->transport.sin.sin_port)
- continue;
- if (memcmp(&local->srx.transport.sin.sin_addr,
- &srx->transport.sin.sin_addr,
- sizeof(struct in_addr)) != 0)
- continue;
- goto found_local;
-
- default:
- BUG();
- }
- }
-
- read_unlock_bh(&rxrpc_local_lock);
-
- /* we didn't find one, so we need to create one */
- local = rxrpc_alloc_local(srx);
- if (!local) {
- up_write(&rxrpc_local_sem);
- return ERR_PTR(-ENOMEM);
- }
-
- ret = rxrpc_create_local(local);
- if (ret < 0) {
- up_write(&rxrpc_local_sem);
- kfree(local);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
- }
-
- up_write(&rxrpc_local_sem);
-
- _net("LOCAL new %d {%d,%u,%pI4+%hu}",
- local->debug_id,
- local->srx.transport_type,
- local->srx.transport.family,
- &local->srx.transport.sin.sin_addr,
- ntohs(local->srx.transport.sin.sin_port));
-
- _leave(" = %p [new]", local);
- return local;
-
-found_local:
- rxrpc_get_local(local);
- read_unlock_bh(&rxrpc_local_lock);
- up_write(&rxrpc_local_sem);
-
- _net("LOCAL old %d {%d,%u,%pI4+%hu}",
- local->debug_id,
- local->srx.transport_type,
- local->srx.transport.family,
- &local->srx.transport.sin.sin_addr,
- ntohs(local->srx.transport.sin.sin_port));
-
- _leave(" = %p [reuse]", local);
- return local;
-}
-
-/*
- * release a local endpoint
- */
-void rxrpc_put_local(struct rxrpc_local *local)
-{
- _enter("%p{u=%d}", local, atomic_read(&local->usage));
-
- ASSERTCMP(atomic_read(&local->usage), >, 0);
-
- /* to prevent a race, the decrement and the dequeue must be effectively
- * atomic */
- write_lock_bh(&rxrpc_local_lock);
- if (unlikely(atomic_dec_and_test(&local->usage))) {
- _debug("destroy local");
- rxrpc_queue_work(&local->destroyer);
- }
- write_unlock_bh(&rxrpc_local_lock);
- _leave("");
-}
-
-/*
- * destroy a local endpoint
- */
-static void rxrpc_destroy_local(struct work_struct *work)
-{
- struct rxrpc_local *local =
- container_of(work, struct rxrpc_local, destroyer);
-
- _enter("%p{%d}", local, atomic_read(&local->usage));
-
- down_write(&rxrpc_local_sem);
-
- write_lock_bh(&rxrpc_local_lock);
- if (atomic_read(&local->usage) > 0) {
- write_unlock_bh(&rxrpc_local_lock);
- up_read(&rxrpc_local_sem);
- _leave(" [resurrected]");
- return;
- }
-
- list_del(&local->link);
- local->socket->sk->sk_user_data = NULL;
- write_unlock_bh(&rxrpc_local_lock);
-
- downgrade_write(&rxrpc_local_sem);
-
- ASSERT(list_empty(&local->services));
- ASSERT(!work_pending(&local->acceptor));
- ASSERT(!work_pending(&local->rejecter));
- ASSERT(!work_pending(&local->event_processor));
-
- /* finish cleaning up the local descriptor */
- rxrpc_purge_queue(&local->accept_queue);
- rxrpc_purge_queue(&local->reject_queue);
- rxrpc_purge_queue(&local->event_queue);
- kernel_sock_shutdown(local->socket, SHUT_RDWR);
- sock_release(local->socket);
-
- up_read(&rxrpc_local_sem);
-
- _net("DESTROY LOCAL %d", local->debug_id);
- kfree(local);
-
- if (list_empty(&rxrpc_locals))
- wake_up_all(&rxrpc_local_wq);
-
- _leave("");
-}
-
-/*
- * preemptively destroy all local local endpoint rather than waiting for
- * them to be destroyed
- */
-void __exit rxrpc_destroy_all_locals(void)
-{
- DECLARE_WAITQUEUE(myself,current);
-
- _enter("");
-
- /* we simply have to wait for them to go away */
- if (!list_empty(&rxrpc_locals)) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&rxrpc_local_wq, &myself);
-
- while (!list_empty(&rxrpc_locals)) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
-
- remove_wait_queue(&rxrpc_local_wq, &myself);
- set_current_state(TASK_RUNNING);
- }
-
- _leave("");
-}
-
-/*
- * Reply to a version request
- */
-static void rxrpc_send_version_request(struct rxrpc_local *local,
- struct rxrpc_host_header *hdr,
- struct sk_buff *skb)
-{
- struct rxrpc_wire_header whdr;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct sockaddr_in sin;
- struct msghdr msg;
- struct kvec iov[2];
- size_t len;
- int ret;
-
- _enter("");
-
- sin.sin_family = AF_INET;
- sin.sin_port = udp_hdr(skb)->source;
- sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
-
- msg.msg_name = &sin;
- msg.msg_namelen = sizeof(sin);
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
-
- whdr.epoch = htonl(sp->hdr.epoch);
- whdr.cid = htonl(sp->hdr.cid);
- whdr.callNumber = htonl(sp->hdr.callNumber);
- whdr.seq = 0;
- whdr.serial = 0;
- whdr.type = RXRPC_PACKET_TYPE_VERSION;
- whdr.flags = RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED);
- whdr.userStatus = 0;
- whdr.securityIndex = 0;
- whdr._rsvd = 0;
- whdr.serviceId = htons(sp->hdr.serviceId);
-
- iov[0].iov_base = &whdr;
- iov[0].iov_len = sizeof(whdr);
- iov[1].iov_base = (char *)rxrpc_version_string;
- iov[1].iov_len = sizeof(rxrpc_version_string);
-
- len = iov[0].iov_len + iov[1].iov_len;
-
- _proto("Tx VERSION (reply)");
-
- ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
- if (ret < 0)
- _debug("sendmsg failed: %d", ret);
-
- _leave("");
-}
-
-/*
- * Process event packets targetted at a local endpoint.
- */
-static void rxrpc_process_local_events(struct work_struct *work)
-{
- struct rxrpc_local *local = container_of(work, struct rxrpc_local, event_processor);
- struct sk_buff *skb;
- char v;
-
- _enter("");
-
- atomic_inc(&local->usage);
-
- while ((skb = skb_dequeue(&local->event_queue))) {
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- _debug("{%d},{%u}", local->debug_id, sp->hdr.type);
-
- switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_VERSION:
- if (skb_copy_bits(skb, 0, &v, 1) < 0)
- return;
- _proto("Rx VERSION { %02x }", v);
- if (v == 0)
- rxrpc_send_version_request(local, &sp->hdr, skb);
- break;
-
- default:
- /* Just ignore anything we don't understand */
- break;
- }
-
- rxrpc_put_local(local);
- rxrpc_free_skb(skb);
- }
-
- rxrpc_put_local(local);
- _leave("");
-}
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
deleted file mode 100644
index dc089b1976aa..000000000000
--- a/net/rxrpc/ar-peer.c
+++ /dev/null
@@ -1,303 +0,0 @@
-/* RxRPC remote transport endpoint management
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include "ar-internal.h"
-
-static LIST_HEAD(rxrpc_peers);
-static DEFINE_RWLOCK(rxrpc_peer_lock);
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_peer_wq);
-
-static void rxrpc_destroy_peer(struct work_struct *work);
-
-/*
- * assess the MTU size for the network interface through which this peer is
- * reached
- */
-static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
-{
- struct rtable *rt;
- struct flowi4 fl4;
-
- peer->if_mtu = 1500;
-
- rt = ip_route_output_ports(&init_net, &fl4, NULL,
- peer->srx.transport.sin.sin_addr.s_addr, 0,
- htons(7000), htons(7001),
- IPPROTO_UDP, 0, 0);
- if (IS_ERR(rt)) {
- _leave(" [route err %ld]", PTR_ERR(rt));
- return;
- }
-
- peer->if_mtu = dst_mtu(&rt->dst);
- dst_release(&rt->dst);
-
- _leave(" [if_mtu %u]", peer->if_mtu);
-}
-
-/*
- * allocate a new peer
- */
-static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
- gfp_t gfp)
-{
- struct rxrpc_peer *peer;
-
- _enter("");
-
- peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
- if (peer) {
- INIT_WORK(&peer->destroyer, &rxrpc_destroy_peer);
- INIT_LIST_HEAD(&peer->link);
- INIT_LIST_HEAD(&peer->error_targets);
- spin_lock_init(&peer->lock);
- atomic_set(&peer->usage, 1);
- peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
- memcpy(&peer->srx, srx, sizeof(*srx));
-
- rxrpc_assess_MTU_size(peer);
- peer->mtu = peer->if_mtu;
-
- if (srx->transport.family == AF_INET) {
- peer->hdrsize = sizeof(struct iphdr);
- switch (srx->transport_type) {
- case SOCK_DGRAM:
- peer->hdrsize += sizeof(struct udphdr);
- break;
- default:
- BUG();
- break;
- }
- } else {
- BUG();
- }
-
- peer->hdrsize += sizeof(struct rxrpc_wire_header);
- peer->maxdata = peer->mtu - peer->hdrsize;
- }
-
- _leave(" = %p", peer);
- return peer;
-}
-
-/*
- * obtain a remote transport endpoint for the specified address
- */
-struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *srx, gfp_t gfp)
-{
- struct rxrpc_peer *peer, *candidate;
- const char *new = "old";
- int usage;
-
- _enter("{%d,%d,%pI4+%hu}",
- srx->transport_type,
- srx->transport_len,
- &srx->transport.sin.sin_addr,
- ntohs(srx->transport.sin.sin_port));
-
- /* search the peer list first */
- read_lock_bh(&rxrpc_peer_lock);
- list_for_each_entry(peer, &rxrpc_peers, link) {
- _debug("check PEER %d { u=%d t=%d l=%d }",
- peer->debug_id,
- atomic_read(&peer->usage),
- peer->srx.transport_type,
- peer->srx.transport_len);
-
- if (atomic_read(&peer->usage) > 0 &&
- peer->srx.transport_type == srx->transport_type &&
- peer->srx.transport_len == srx->transport_len &&
- memcmp(&peer->srx.transport,
- &srx->transport,
- srx->transport_len) == 0)
- goto found_extant_peer;
- }
- read_unlock_bh(&rxrpc_peer_lock);
-
- /* not yet present - create a candidate for a new record and then
- * redo the search */
- candidate = rxrpc_alloc_peer(srx, gfp);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
- }
-
- write_lock_bh(&rxrpc_peer_lock);
-
- list_for_each_entry(peer, &rxrpc_peers, link) {
- if (atomic_read(&peer->usage) > 0 &&
- peer->srx.transport_type == srx->transport_type &&
- peer->srx.transport_len == srx->transport_len &&
- memcmp(&peer->srx.transport,
- &srx->transport,
- srx->transport_len) == 0)
- goto found_extant_second;
- }
-
- /* we can now add the new candidate to the list */
- peer = candidate;
- candidate = NULL;
- usage = atomic_read(&peer->usage);
-
- list_add_tail(&peer->link, &rxrpc_peers);
- write_unlock_bh(&rxrpc_peer_lock);
- new = "new";
-
-success:
- _net("PEER %s %d {%d,%u,%pI4+%hu}",
- new,
- peer->debug_id,
- peer->srx.transport_type,
- peer->srx.transport.family,
- &peer->srx.transport.sin.sin_addr,
- ntohs(peer->srx.transport.sin.sin_port));
-
- _leave(" = %p {u=%d}", peer, usage);
- return peer;
-
- /* we found the peer in the list immediately */
-found_extant_peer:
- usage = atomic_inc_return(&peer->usage);
- read_unlock_bh(&rxrpc_peer_lock);
- goto success;
-
- /* we found the peer on the second time through the list */
-found_extant_second:
- usage = atomic_inc_return(&peer->usage);
- write_unlock_bh(&rxrpc_peer_lock);
- kfree(candidate);
- goto success;
-}
-
-/*
- * find the peer associated with a packet
- */
-struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *local,
- __be32 addr, __be16 port)
-{
- struct rxrpc_peer *peer;
-
- _enter("");
-
- /* search the peer list */
- read_lock_bh(&rxrpc_peer_lock);
-
- if (local->srx.transport.family == AF_INET &&
- local->srx.transport_type == SOCK_DGRAM
- ) {
- list_for_each_entry(peer, &rxrpc_peers, link) {
- if (atomic_read(&peer->usage) > 0 &&
- peer->srx.transport_type == SOCK_DGRAM &&
- peer->srx.transport.family == AF_INET &&
- peer->srx.transport.sin.sin_port == port &&
- peer->srx.transport.sin.sin_addr.s_addr == addr)
- goto found_UDP_peer;
- }
-
- goto new_UDP_peer;
- }
-
- read_unlock_bh(&rxrpc_peer_lock);
- _leave(" = -EAFNOSUPPORT");
- return ERR_PTR(-EAFNOSUPPORT);
-
-found_UDP_peer:
- _net("Rx UDP DGRAM from peer %d", peer->debug_id);
- atomic_inc(&peer->usage);
- read_unlock_bh(&rxrpc_peer_lock);
- _leave(" = %p", peer);
- return peer;
-
-new_UDP_peer:
- _net("Rx UDP DGRAM from NEW peer");
- read_unlock_bh(&rxrpc_peer_lock);
- _leave(" = -EBUSY [new]");
- return ERR_PTR(-EBUSY);
-}
-
-/*
- * release a remote transport endpoint
- */
-void rxrpc_put_peer(struct rxrpc_peer *peer)
-{
- _enter("%p{u=%d}", peer, atomic_read(&peer->usage));
-
- ASSERTCMP(atomic_read(&peer->usage), >, 0);
-
- if (likely(!atomic_dec_and_test(&peer->usage))) {
- _leave(" [in use]");
- return;
- }
-
- rxrpc_queue_work(&peer->destroyer);
- _leave("");
-}
-
-/*
- * destroy a remote transport endpoint
- */
-static void rxrpc_destroy_peer(struct work_struct *work)
-{
- struct rxrpc_peer *peer =
- container_of(work, struct rxrpc_peer, destroyer);
-
- _enter("%p{%d}", peer, atomic_read(&peer->usage));
-
- write_lock_bh(&rxrpc_peer_lock);
- list_del(&peer->link);
- write_unlock_bh(&rxrpc_peer_lock);
-
- _net("DESTROY PEER %d", peer->debug_id);
- kfree(peer);
-
- if (list_empty(&rxrpc_peers))
- wake_up_all(&rxrpc_peer_wq);
- _leave("");
-}
-
-/*
- * preemptively destroy all the peer records from a transport endpoint rather
- * than waiting for them to time out
- */
-void __exit rxrpc_destroy_all_peers(void)
-{
- DECLARE_WAITQUEUE(myself,current);
-
- _enter("");
-
- /* we simply have to wait for them to go away */
- if (!list_empty(&rxrpc_peers)) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&rxrpc_peer_wq, &myself);
-
- while (!list_empty(&rxrpc_peers)) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
-
- remove_wait_queue(&rxrpc_peer_wq, &myself);
- set_current_state(TASK_RUNNING);
- }
-
- _leave("");
-}
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
deleted file mode 100644
index 66a1a5676446..000000000000
--- a/net/rxrpc/ar-transport.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/* RxRPC point-to-point transport session management
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
-#include "ar-internal.h"
-
-/*
- * Time after last use at which transport record is cleaned up.
- */
-unsigned int rxrpc_transport_expiry = 3600 * 24;
-
-static void rxrpc_transport_reaper(struct work_struct *work);
-
-static LIST_HEAD(rxrpc_transports);
-static DEFINE_RWLOCK(rxrpc_transport_lock);
-static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper);
-
-/*
- * allocate a new transport session manager
- */
-static struct rxrpc_transport *rxrpc_alloc_transport(struct rxrpc_local *local,
- struct rxrpc_peer *peer,
- gfp_t gfp)
-{
- struct rxrpc_transport *trans;
-
- _enter("");
-
- trans = kzalloc(sizeof(struct rxrpc_transport), gfp);
- if (trans) {
- trans->local = local;
- trans->peer = peer;
- INIT_LIST_HEAD(&trans->link);
- trans->bundles = RB_ROOT;
- trans->client_conns = RB_ROOT;
- trans->server_conns = RB_ROOT;
- skb_queue_head_init(&trans->error_queue);
- spin_lock_init(&trans->client_lock);
- rwlock_init(&trans->conn_lock);
- atomic_set(&trans->usage, 1);
- trans->conn_idcounter = peer->srx.srx_service << 16;
- trans->debug_id = atomic_inc_return(&rxrpc_debug_id);
-
- if (peer->srx.transport.family == AF_INET) {
- switch (peer->srx.transport_type) {
- case SOCK_DGRAM:
- INIT_WORK(&trans->error_handler,
- rxrpc_UDP_error_handler);
- break;
- default:
- BUG();
- break;
- }
- } else {
- BUG();
- }
- }
-
- _leave(" = %p", trans);
- return trans;
-}
-
-/*
- * obtain a transport session for the nominated endpoints
- */
-struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *local,
- struct rxrpc_peer *peer,
- gfp_t gfp)
-{
- struct rxrpc_transport *trans, *candidate;
- const char *new = "old";
- int usage;
-
- _enter("{%pI4+%hu},{%pI4+%hu},",
- &local->srx.transport.sin.sin_addr,
- ntohs(local->srx.transport.sin.sin_port),
- &peer->srx.transport.sin.sin_addr,
- ntohs(peer->srx.transport.sin.sin_port));
-
- /* search the transport list first */
- read_lock_bh(&rxrpc_transport_lock);
- list_for_each_entry(trans, &rxrpc_transports, link) {
- if (trans->local == local && trans->peer == peer)
- goto found_extant_transport;
- }
- read_unlock_bh(&rxrpc_transport_lock);
-
- /* not yet present - create a candidate for a new record and then
- * redo the search */
- candidate = rxrpc_alloc_transport(local, peer, gfp);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
- }
-
- write_lock_bh(&rxrpc_transport_lock);
-
- list_for_each_entry(trans, &rxrpc_transports, link) {
- if (trans->local == local && trans->peer == peer)
- goto found_extant_second;
- }
-
- /* we can now add the new candidate to the list */
- trans = candidate;
- candidate = NULL;
- usage = atomic_read(&trans->usage);
-
- rxrpc_get_local(trans->local);
- atomic_inc(&trans->peer->usage);
- list_add_tail(&trans->link, &rxrpc_transports);
- write_unlock_bh(&rxrpc_transport_lock);
- new = "new";
-
-success:
- _net("TRANSPORT %s %d local %d -> peer %d",
- new,
- trans->debug_id,
- trans->local->debug_id,
- trans->peer->debug_id);
-
- _leave(" = %p {u=%d}", trans, usage);
- return trans;
-
- /* we found the transport in the list immediately */
-found_extant_transport:
- usage = atomic_inc_return(&trans->usage);
- read_unlock_bh(&rxrpc_transport_lock);
- goto success;
-
- /* we found the transport on the second time through the list */
-found_extant_second:
- usage = atomic_inc_return(&trans->usage);
- write_unlock_bh(&rxrpc_transport_lock);
- kfree(candidate);
- goto success;
-}
-
-/*
- * find the transport connecting two endpoints
- */
-struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *local,
- struct rxrpc_peer *peer)
-{
- struct rxrpc_transport *trans;
-
- _enter("{%pI4+%hu},{%pI4+%hu},",
- &local->srx.transport.sin.sin_addr,
- ntohs(local->srx.transport.sin.sin_port),
- &peer->srx.transport.sin.sin_addr,
- ntohs(peer->srx.transport.sin.sin_port));
-
- /* search the transport list */
- read_lock_bh(&rxrpc_transport_lock);
-
- list_for_each_entry(trans, &rxrpc_transports, link) {
- if (trans->local == local && trans->peer == peer)
- goto found_extant_transport;
- }
-
- read_unlock_bh(&rxrpc_transport_lock);
- _leave(" = NULL");
- return NULL;
-
-found_extant_transport:
- atomic_inc(&trans->usage);
- read_unlock_bh(&rxrpc_transport_lock);
- _leave(" = %p", trans);
- return trans;
-}
-
-/*
- * release a transport session
- */
-void rxrpc_put_transport(struct rxrpc_transport *trans)
-{
- _enter("%p{u=%d}", trans, atomic_read(&trans->usage));
-
- ASSERTCMP(atomic_read(&trans->usage), >, 0);
-
- trans->put_time = ktime_get_seconds();
- if (unlikely(atomic_dec_and_test(&trans->usage))) {
- _debug("zombie");
- /* let the reaper determine the timeout to avoid a race with
- * overextending the timeout if the reaper is running at the
- * same time */
- rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
- }
- _leave("");
-}
-
-/*
- * clean up a transport session
- */
-static void rxrpc_cleanup_transport(struct rxrpc_transport *trans)
-{
- _net("DESTROY TRANS %d", trans->debug_id);
-
- rxrpc_purge_queue(&trans->error_queue);
-
- rxrpc_put_local(trans->local);
- rxrpc_put_peer(trans->peer);
- kfree(trans);
-}
-
-/*
- * reap dead transports that have passed their expiry date
- */
-static void rxrpc_transport_reaper(struct work_struct *work)
-{
- struct rxrpc_transport *trans, *_p;
- unsigned long now, earliest, reap_time;
-
- LIST_HEAD(graveyard);
-
- _enter("");
-
- now = ktime_get_seconds();
- earliest = ULONG_MAX;
-
- /* extract all the transports that have been dead too long */
- write_lock_bh(&rxrpc_transport_lock);
- list_for_each_entry_safe(trans, _p, &rxrpc_transports, link) {
- _debug("reap TRANS %d { u=%d t=%ld }",
- trans->debug_id, atomic_read(&trans->usage),
- (long) now - (long) trans->put_time);
-
- if (likely(atomic_read(&trans->usage) > 0))
- continue;
-
- reap_time = trans->put_time + rxrpc_transport_expiry;
- if (reap_time <= now)
- list_move_tail(&trans->link, &graveyard);
- else if (reap_time < earliest)
- earliest = reap_time;
- }
- write_unlock_bh(&rxrpc_transport_lock);
-
- if (earliest != ULONG_MAX) {
- _debug("reschedule reaper %ld", (long) earliest - now);
- ASSERTCMP(earliest, >, now);
- rxrpc_queue_delayed_work(&rxrpc_transport_reap,
- (earliest - now) * HZ);
- }
-
- /* then destroy all those pulled out */
- while (!list_empty(&graveyard)) {
- trans = list_entry(graveyard.next, struct rxrpc_transport,
- link);
- list_del_init(&trans->link);
-
- ASSERTCMP(atomic_read(&trans->usage), ==, 0);
- rxrpc_cleanup_transport(trans);
- }
-
- _leave("");
-}
-
-/*
- * preemptively destroy all the transport session records rather than waiting
- * for them to time out
- */
-void __exit rxrpc_destroy_all_transports(void)
-{
- _enter("");
-
- rxrpc_transport_expiry = 0;
- cancel_delayed_work(&rxrpc_transport_reap);
- rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
-
- _leave("");
-}
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/call_accept.c
index e7a7f05f13e2..202e053a3c6d 100644
--- a/net/rxrpc/ar-accept.c
+++ b/net/rxrpc/call_accept.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
@@ -72,7 +74,6 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
struct sockaddr_rxrpc *srx)
{
struct rxrpc_connection *conn;
- struct rxrpc_transport *trans;
struct rxrpc_skb_priv *sp, *nsp;
struct rxrpc_peer *peer;
struct rxrpc_call *call;
@@ -93,30 +94,22 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
rxrpc_new_skb(notification);
notification->mark = RXRPC_SKB_MARK_NEW_CALL;
- peer = rxrpc_get_peer(srx, GFP_NOIO);
- if (IS_ERR(peer)) {
+ peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
+ if (!peer) {
_debug("no peer");
ret = -EBUSY;
goto error;
}
- trans = rxrpc_get_transport(local, peer, GFP_NOIO);
+ conn = rxrpc_incoming_connection(local, peer, skb);
rxrpc_put_peer(peer);
- if (IS_ERR(trans)) {
- _debug("no trans");
- ret = -EBUSY;
- goto error;
- }
-
- conn = rxrpc_incoming_connection(trans, &sp->hdr);
- rxrpc_put_transport(trans);
if (IS_ERR(conn)) {
_debug("no conn");
ret = PTR_ERR(conn);
goto error;
}
- call = rxrpc_incoming_call(rx, conn, &sp->hdr);
+ call = rxrpc_incoming_call(rx, conn, skb);
rxrpc_put_connection(conn);
if (IS_ERR(call)) {
_debug("no call");
@@ -139,7 +132,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
_debug("await conn sec");
list_add_tail(&call->accept_link, &rx->secureq);
call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
- atomic_inc(&call->conn->usage);
+ rxrpc_get_connection(call->conn);
set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
rxrpc_queue_conn(call->conn);
} else {
@@ -200,10 +193,8 @@ error_nofree:
* accept incoming calls that need peer, transport and/or connection setting up
* - the packets we get are all incoming client DATA packets that have seq == 1
*/
-void rxrpc_accept_incoming_calls(struct work_struct *work)
+void rxrpc_accept_incoming_calls(struct rxrpc_local *local)
{
- struct rxrpc_local *local =
- container_of(work, struct rxrpc_local, acceptor);
struct rxrpc_skb_priv *sp;
struct sockaddr_rxrpc srx;
struct rxrpc_sock *rx;
@@ -213,21 +204,8 @@ void rxrpc_accept_incoming_calls(struct work_struct *work)
_enter("%d", local->debug_id);
- read_lock_bh(&rxrpc_local_lock);
- if (atomic_read(&local->usage) > 0)
- rxrpc_get_local(local);
- else
- local = NULL;
- read_unlock_bh(&rxrpc_local_lock);
- if (!local) {
- _leave(" [local dead]");
- return;
- }
-
-process_next_packet:
skb = skb_dequeue(&local->accept_queue);
if (!skb) {
- rxrpc_put_local(local);
_leave("\n");
return;
}
@@ -290,7 +268,7 @@ found_service:
case -ECONNRESET: /* old calls are ignored */
case -ECONNABORTED: /* aborted calls are reaborted or ignored */
case 0:
- goto process_next_packet;
+ return;
case -ECONNREFUSED:
goto invalid_service;
case -EBUSY:
@@ -306,18 +284,18 @@ backlog_full:
busy:
rxrpc_busy(local, &srx, &whdr);
rxrpc_free_skb(skb);
- goto process_next_packet;
+ return;
invalid_service:
skb->priority = RX_INVALID_OPERATION;
rxrpc_reject_packet(local, skb);
- goto process_next_packet;
+ return;
/* can't change connection security type mid-flow */
security_mismatch:
skb->priority = RX_PROTOCOL_ERROR;
rxrpc_reject_packet(local, skb);
- goto process_next_packet;
+ return;
}
/*
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/call_event.c
index 374478e006e7..0ba84295f913 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/call_event.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/circ_buf.h>
#include <linux/net.h>
@@ -185,7 +187,7 @@ static void rxrpc_resend(struct rxrpc_call *call)
_proto("Tx DATA %%%u { #%d }",
sp->hdr.serial, sp->hdr.seq);
- if (rxrpc_send_packet(call->conn->trans, txb) < 0) {
+ if (rxrpc_send_data_packet(call->conn, txb) < 0) {
stop = true;
sp->resend_at = jiffies + 3;
} else {
@@ -543,7 +545,7 @@ static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU));
- peer = call->conn->trans->peer;
+ peer = call->conn->params.peer;
if (mtu < peer->maxdata) {
spin_lock_bh(&peer->lock);
peer->maxdata = mtu;
@@ -834,13 +836,13 @@ void rxrpc_process_call(struct work_struct *work)
/* there's a good chance we're going to have to send a message, so set
* one up in advance */
- msg.msg_name = &call->conn->trans->peer->srx.transport;
- msg.msg_namelen = call->conn->trans->peer->srx.transport_len;
+ msg.msg_name = &call->conn->params.peer->srx.transport;
+ msg.msg_namelen = call->conn->params.peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
- whdr.epoch = htonl(call->conn->epoch);
+ whdr.epoch = htonl(call->conn->proto.epoch);
whdr.cid = htonl(call->cid);
whdr.callNumber = htonl(call->call_id);
whdr.seq = 0;
@@ -862,17 +864,24 @@ void rxrpc_process_call(struct work_struct *work)
}
if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
+ enum rxrpc_skb_mark mark;
int error;
clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
- error = call->conn->trans->peer->net_error;
- _debug("post net error %d", error);
+ error = call->error_report;
+ if (error < RXRPC_LOCAL_ERROR_OFFSET) {
+ mark = RXRPC_SKB_MARK_NET_ERROR;
+ _debug("post net error %d", error);
+ } else {
+ mark = RXRPC_SKB_MARK_LOCAL_ERROR;
+ error -= RXRPC_LOCAL_ERROR_OFFSET;
+ _debug("post net local error %d", error);
+ }
- if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR,
- error, true) < 0)
+ if (rxrpc_post_message(call, mark, error, true) < 0)
goto no_mem;
clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
goto kill_ACKs;
@@ -1142,8 +1151,8 @@ send_ACK_with_skew:
ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
ntohl(ack.serial));
send_ACK:
- mtu = call->conn->trans->peer->if_mtu;
- mtu -= call->conn->trans->peer->hdrsize;
+ mtu = call->conn->params.peer->if_mtu;
+ mtu -= call->conn->params.peer->hdrsize;
ackinfo.maxMTU = htonl(mtu);
ackinfo.rwind = htonl(rxrpc_rx_window_size);
@@ -1197,7 +1206,7 @@ send_message_2:
len += iov[1].iov_len;
}
- ret = kernel_sendmsg(call->conn->trans->local->socket,
+ ret = kernel_sendmsg(call->conn->params.local->socket,
&msg, iov, ioc, len);
if (ret < 0) {
_debug("sendmsg failed: %d", ret);
@@ -1255,7 +1264,7 @@ maybe_reschedule:
if (call->state >= RXRPC_CALL_COMPLETE &&
!list_empty(&call->accept_link)) {
_debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
- call, call->events, call->flags, call->conn->cid);
+ call, call->events, call->flags, call->conn->proto.cid);
read_lock_bh(&call->state_lock);
if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
@@ -1273,7 +1282,7 @@ error:
* this means there's a race between clearing the flag and setting the
* work pending bit and the work item being processed again */
if (call->events && !work_pending(&call->processor)) {
- _debug("jumpstart %x", call->conn->cid);
+ _debug("jumpstart %x", call->conn->proto.cid);
rxrpc_queue_call(call);
}
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/call_object.c
index 571a41fd5a32..ad933daae13b 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/call_object.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/circ_buf.h>
@@ -29,6 +31,8 @@ unsigned int rxrpc_max_call_lifetime = 60 * HZ;
unsigned int rxrpc_dead_call_expiry = 2 * HZ;
const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
+ [RXRPC_CALL_UNINITIALISED] = "Uninit",
+ [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn",
[RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
[RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
[RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl",
@@ -69,7 +73,7 @@ static unsigned long rxrpc_call_hashfunc(
u32 call_id,
u32 epoch,
u16 service_id,
- sa_family_t proto,
+ sa_family_t family,
void *localptr,
unsigned int addr_size,
const u8 *peer_addr)
@@ -90,7 +94,7 @@ static unsigned long rxrpc_call_hashfunc(
key += (cid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
key += cid & RXRPC_CHANNELMASK;
key += in_clientflag;
- key += proto;
+ key += family;
/* Step through the peer address in 16-bit portions for speed */
for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
key += *p;
@@ -107,7 +111,7 @@ static void rxrpc_call_hash_add(struct rxrpc_call *call)
unsigned int addr_size = 0;
_enter("");
- switch (call->proto) {
+ switch (call->family) {
case AF_INET:
addr_size = sizeof(call->peer_ip.ipv4_addr);
break;
@@ -119,8 +123,8 @@ static void rxrpc_call_hash_add(struct rxrpc_call *call)
}
key = rxrpc_call_hashfunc(call->in_clientflag, call->cid,
call->call_id, call->epoch,
- call->service_id, call->proto,
- call->conn->trans->local, addr_size,
+ call->service_id, call->family,
+ call->conn->params.local, addr_size,
call->peer_ip.ipv6_addr);
/* Store the full key in the call */
call->hash_key = key;
@@ -149,7 +153,7 @@ static void rxrpc_call_hash_del(struct rxrpc_call *call)
struct rxrpc_call *rxrpc_find_call_hash(
struct rxrpc_host_header *hdr,
void *localptr,
- sa_family_t proto,
+ sa_family_t family,
const void *peer_addr)
{
unsigned long key;
@@ -159,7 +163,7 @@ struct rxrpc_call *rxrpc_find_call_hash(
u8 in_clientflag = hdr->flags & RXRPC_CLIENT_INITIATED;
_enter("");
- switch (proto) {
+ switch (family) {
case AF_INET:
addr_size = sizeof(call->peer_ip.ipv4_addr);
break;
@@ -172,7 +176,7 @@ struct rxrpc_call *rxrpc_find_call_hash(
key = rxrpc_call_hashfunc(in_clientflag, hdr->cid, hdr->callNumber,
hdr->epoch, hdr->serviceId,
- proto, localptr, addr_size,
+ family, localptr, addr_size,
peer_addr);
hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
if (call->hash_key == key &&
@@ -180,7 +184,7 @@ struct rxrpc_call *rxrpc_find_call_hash(
call->cid == hdr->cid &&
call->in_clientflag == in_clientflag &&
call->service_id == hdr->serviceId &&
- call->proto == proto &&
+ call->family == family &&
call->local == localptr &&
memcmp(call->peer_ip.ipv6_addr, peer_addr,
addr_size) == 0 &&
@@ -194,6 +198,43 @@ struct rxrpc_call *rxrpc_find_call_hash(
}
/*
+ * find an extant server call
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
+ unsigned long user_call_ID)
+{
+ struct rxrpc_call *call;
+ struct rb_node *p;
+
+ _enter("%p,%lx", rx, user_call_ID);
+
+ read_lock(&rx->call_lock);
+
+ p = rx->calls.rb_node;
+ while (p) {
+ call = rb_entry(p, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ p = p->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ p = p->rb_right;
+ else
+ goto found_extant_call;
+ }
+
+ read_unlock(&rx->call_lock);
+ _leave(" = NULL");
+ return NULL;
+
+found_extant_call:
+ rxrpc_get_call(call);
+ read_unlock(&rx->call_lock);
+ _leave(" = %p [%d]", call, atomic_read(&call->usage));
+ return call;
+}
+
+/*
* allocate a new call
*/
static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
@@ -222,6 +263,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
(unsigned long) call);
INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
INIT_WORK(&call->processor, &rxrpc_process_call);
+ INIT_LIST_HEAD(&call->link);
INIT_LIST_HEAD(&call->accept_link);
skb_queue_head_init(&call->rx_queue);
skb_queue_head_init(&call->rx_oos_queue);
@@ -230,7 +272,6 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
rwlock_init(&call->state_lock);
atomic_set(&call->usage, 1);
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
- call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
@@ -243,117 +284,104 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
}
/*
- * allocate a new client call and attempt to get a connection slot for it
+ * Allocate a new client call.
*/
-static struct rxrpc_call *rxrpc_alloc_client_call(
- struct rxrpc_sock *rx,
- struct rxrpc_transport *trans,
- struct rxrpc_conn_bundle *bundle,
- gfp_t gfp)
+static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
+ struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
{
struct rxrpc_call *call;
- int ret;
_enter("");
- ASSERT(rx != NULL);
- ASSERT(trans != NULL);
- ASSERT(bundle != NULL);
+ ASSERT(rx->local != NULL);
call = rxrpc_alloc_call(gfp);
if (!call)
return ERR_PTR(-ENOMEM);
+ call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
sock_hold(&rx->sk);
call->socket = rx;
call->rx_data_post = 1;
- ret = rxrpc_connect_call(rx, trans, bundle, call, gfp);
- if (ret < 0) {
- kmem_cache_free(rxrpc_call_jar, call);
- return ERR_PTR(ret);
- }
-
/* Record copies of information for hashtable lookup */
- call->proto = rx->proto;
- call->local = trans->local;
- switch (call->proto) {
+ call->family = rx->family;
+ call->local = rx->local;
+ switch (call->family) {
case AF_INET:
- call->peer_ip.ipv4_addr =
- trans->peer->srx.transport.sin.sin_addr.s_addr;
+ call->peer_ip.ipv4_addr = srx->transport.sin.sin_addr.s_addr;
break;
case AF_INET6:
memcpy(call->peer_ip.ipv6_addr,
- trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
+ srx->transport.sin6.sin6_addr.in6_u.u6_addr8,
sizeof(call->peer_ip.ipv6_addr));
break;
}
- call->epoch = call->conn->epoch;
- call->service_id = call->conn->service_id;
- call->in_clientflag = call->conn->in_clientflag;
+
+ call->service_id = srx->srx_service;
+ call->in_clientflag = 0;
+
+ _leave(" = %p", call);
+ return call;
+}
+
+/*
+ * Begin client call.
+ */
+static int rxrpc_begin_client_call(struct rxrpc_call *call,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
+{
+ int ret;
+
+ /* Set up or get a connection record and set the protocol parameters,
+ * including channel number and call ID.
+ */
+ ret = rxrpc_connect_call(call, cp, srx, gfp);
+ if (ret < 0)
+ return ret;
+
+ call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+
/* Add the new call to the hashtable */
rxrpc_call_hash_add(call);
- spin_lock(&call->conn->trans->peer->lock);
- list_add(&call->error_link, &call->conn->trans->peer->error_targets);
- spin_unlock(&call->conn->trans->peer->lock);
+ spin_lock(&call->conn->params.peer->lock);
+ hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets);
+ spin_unlock(&call->conn->params.peer->lock);
call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
add_timer(&call->lifetimer);
-
- _leave(" = %p", call);
- return call;
+ return 0;
}
/*
* set up a call for the given data
* - called in process context with IRQs enabled
*/
-struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
- struct rxrpc_transport *trans,
- struct rxrpc_conn_bundle *bundle,
+struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
unsigned long user_call_ID,
- int create,
gfp_t gfp)
{
- struct rxrpc_call *call, *candidate;
- struct rb_node *p, *parent, **pp;
-
- _enter("%p,%d,%d,%lx,%d",
- rx, trans ? trans->debug_id : -1, bundle ? bundle->debug_id : -1,
- user_call_ID, create);
-
- /* search the extant calls first for one that matches the specified
- * user ID */
- read_lock(&rx->call_lock);
-
- p = rx->calls.rb_node;
- while (p) {
- call = rb_entry(p, struct rxrpc_call, sock_node);
-
- if (user_call_ID < call->user_call_ID)
- p = p->rb_left;
- else if (user_call_ID > call->user_call_ID)
- p = p->rb_right;
- else
- goto found_extant_call;
- }
-
- read_unlock(&rx->call_lock);
+ struct rxrpc_call *call, *xcall;
+ struct rb_node *parent, **pp;
+ int ret;
- if (!create || !trans)
- return ERR_PTR(-EBADSLT);
+ _enter("%p,%lx", rx, user_call_ID);
- /* not yet present - create a candidate for a new record and then
- * redo the search */
- candidate = rxrpc_alloc_client_call(rx, trans, bundle, gfp);
- if (IS_ERR(candidate)) {
- _leave(" = %ld", PTR_ERR(candidate));
- return candidate;
+ call = rxrpc_alloc_client_call(rx, srx, gfp);
+ if (IS_ERR(call)) {
+ _leave(" = %ld", PTR_ERR(call));
+ return call;
}
- candidate->user_call_ID = user_call_ID;
- __set_bit(RXRPC_CALL_HAS_USERID, &candidate->flags);
+ /* Publish the call, even though it is incompletely set up as yet */
+ call->user_call_ID = user_call_ID;
+ __set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
write_lock(&rx->call_lock);
@@ -361,19 +389,16 @@ struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
parent = NULL;
while (*pp) {
parent = *pp;
- call = rb_entry(parent, struct rxrpc_call, sock_node);
+ xcall = rb_entry(parent, struct rxrpc_call, sock_node);
- if (user_call_ID < call->user_call_ID)
+ if (user_call_ID < xcall->user_call_ID)
pp = &(*pp)->rb_left;
- else if (user_call_ID > call->user_call_ID)
+ else if (user_call_ID > xcall->user_call_ID)
pp = &(*pp)->rb_right;
else
- goto found_extant_second;
+ goto found_user_ID_now_present;
}
- /* second search also failed; add the new call */
- call = candidate;
- candidate = NULL;
rxrpc_get_call(call);
rb_link_node(&call->sock_node, parent, pp);
@@ -384,25 +409,39 @@ struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
list_add_tail(&call->link, &rxrpc_calls);
write_unlock_bh(&rxrpc_call_lock);
+ ret = rxrpc_begin_client_call(call, cp, srx, gfp);
+ if (ret < 0)
+ goto error;
+
_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
_leave(" = %p [new]", call);
return call;
- /* we found the call in the list immediately */
-found_extant_call:
- rxrpc_get_call(call);
- read_unlock(&rx->call_lock);
- _leave(" = %p [extant %d]", call, atomic_read(&call->usage));
- return call;
+error:
+ write_lock(&rx->call_lock);
+ rb_erase(&call->sock_node, &rx->calls);
+ write_unlock(&rx->call_lock);
+ rxrpc_put_call(call);
- /* we found the call on the second time through the list */
-found_extant_second:
- rxrpc_get_call(call);
+ write_lock_bh(&rxrpc_call_lock);
+ list_del(&call->link);
+ write_unlock_bh(&rxrpc_call_lock);
+
+ rxrpc_put_call(call);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+
+ /* We unexpectedly found the user ID in the list after taking
+ * the call_lock. This shouldn't happen unless the user races
+ * with itself and tries to add the same user ID twice at the
+ * same time in different threads.
+ */
+found_user_ID_now_present:
write_unlock(&rx->call_lock);
- rxrpc_put_call(candidate);
- _leave(" = %p [second %d]", call, atomic_read(&call->usage));
- return call;
+ rxrpc_put_call(call);
+ _leave(" = -EEXIST [%p]", call);
+ return ERR_PTR(-EEXIST);
}
/*
@@ -411,8 +450,9 @@ found_extant_second:
*/
struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
struct rxrpc_connection *conn,
- struct rxrpc_host_header *hdr)
+ struct sk_buff *skb)
{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_call *call, *candidate;
struct rb_node **p, *parent;
u32 call_id;
@@ -425,13 +465,13 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
if (!candidate)
return ERR_PTR(-EBUSY);
- candidate->socket = rx;
- candidate->conn = conn;
- candidate->cid = hdr->cid;
- candidate->call_id = hdr->callNumber;
- candidate->channel = hdr->cid & RXRPC_CHANNELMASK;
- candidate->rx_data_post = 0;
- candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
+ candidate->socket = rx;
+ candidate->conn = conn;
+ candidate->cid = sp->hdr.cid;
+ candidate->call_id = sp->hdr.callNumber;
+ candidate->channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+ candidate->rx_data_post = 0;
+ candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
if (conn->security_ix > 0)
candidate->state = RXRPC_CALL_SERVER_SECURING;
@@ -440,7 +480,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
/* set the channel for this call */
call = conn->channels[candidate->channel];
_debug("channel[%u] is %p", candidate->channel, call);
- if (call && call->call_id == hdr->callNumber) {
+ if (call && call->call_id == sp->hdr.callNumber) {
/* already set; must've been a duplicate packet */
_debug("extant call [%d]", call->state);
ASSERTCMP(call->conn, ==, conn);
@@ -478,7 +518,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
/* check the call number isn't duplicate */
_debug("check dup");
- call_id = hdr->callNumber;
+ call_id = sp->hdr.callNumber;
p = &conn->calls.rb_node;
parent = NULL;
while (*p) {
@@ -504,36 +544,36 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
rb_insert_color(&call->conn_node, &conn->calls);
conn->channels[call->channel] = call;
sock_hold(&rx->sk);
- atomic_inc(&conn->usage);
+ rxrpc_get_connection(conn);
write_unlock_bh(&conn->lock);
- spin_lock(&conn->trans->peer->lock);
- list_add(&call->error_link, &conn->trans->peer->error_targets);
- spin_unlock(&conn->trans->peer->lock);
+ spin_lock(&conn->params.peer->lock);
+ hlist_add_head(&call->error_link, &conn->params.peer->error_targets);
+ spin_unlock(&conn->params.peer->lock);
write_lock_bh(&rxrpc_call_lock);
list_add_tail(&call->link, &rxrpc_calls);
write_unlock_bh(&rxrpc_call_lock);
/* Record copies of information for hashtable lookup */
- call->proto = rx->proto;
- call->local = conn->trans->local;
- switch (call->proto) {
+ call->family = rx->family;
+ call->local = conn->params.local;
+ switch (call->family) {
case AF_INET:
call->peer_ip.ipv4_addr =
- conn->trans->peer->srx.transport.sin.sin_addr.s_addr;
+ conn->params.peer->srx.transport.sin.sin_addr.s_addr;
break;
case AF_INET6:
memcpy(call->peer_ip.ipv6_addr,
- conn->trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
+ conn->params.peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
sizeof(call->peer_ip.ipv6_addr));
break;
default:
break;
}
- call->epoch = conn->epoch;
- call->service_id = conn->service_id;
- call->in_clientflag = conn->in_clientflag;
+ call->epoch = conn->proto.epoch;
+ call->service_id = conn->params.service_id;
+ call->in_clientflag = conn->proto.in_clientflag;
/* Add the new call to the hashtable */
rxrpc_call_hash_add(call);
@@ -564,46 +604,6 @@ old_call:
}
/*
- * find an extant server call
- * - called in process context with IRQs enabled
- */
-struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *rx,
- unsigned long user_call_ID)
-{
- struct rxrpc_call *call;
- struct rb_node *p;
-
- _enter("%p,%lx", rx, user_call_ID);
-
- /* search the extant calls for one that matches the specified user
- * ID */
- read_lock(&rx->call_lock);
-
- p = rx->calls.rb_node;
- while (p) {
- call = rb_entry(p, struct rxrpc_call, sock_node);
-
- if (user_call_ID < call->user_call_ID)
- p = p->rb_left;
- else if (user_call_ID > call->user_call_ID)
- p = p->rb_right;
- else
- goto found_extant_call;
- }
-
- read_unlock(&rx->call_lock);
- _leave(" = NULL");
- return NULL;
-
- /* we found the call in the list immediately */
-found_extant_call:
- rxrpc_get_call(call);
- read_unlock(&rx->call_lock);
- _leave(" = %p [%d]", call, atomic_read(&call->usage));
- return call;
-}
-
-/*
* detach a call from a socket and set up for release
*/
void rxrpc_release_call(struct rxrpc_call *call)
@@ -641,41 +641,13 @@ void rxrpc_release_call(struct rxrpc_call *call)
write_unlock_bh(&rx->call_lock);
/* free up the channel for reuse */
- spin_lock(&conn->trans->client_lock);
+ spin_lock(&conn->channel_lock);
write_lock_bh(&conn->lock);
write_lock(&call->state_lock);
- if (conn->channels[call->channel] == call)
- conn->channels[call->channel] = NULL;
-
- if (conn->out_clientflag && conn->bundle) {
- conn->avail_calls++;
- switch (conn->avail_calls) {
- case 1:
- list_move_tail(&conn->bundle_link,
- &conn->bundle->avail_conns);
- case 2 ... RXRPC_MAXCALLS - 1:
- ASSERT(conn->channels[0] == NULL ||
- conn->channels[1] == NULL ||
- conn->channels[2] == NULL ||
- conn->channels[3] == NULL);
- break;
- case RXRPC_MAXCALLS:
- list_move_tail(&conn->bundle_link,
- &conn->bundle->unused_conns);
- ASSERT(conn->channels[0] == NULL &&
- conn->channels[1] == NULL &&
- conn->channels[2] == NULL &&
- conn->channels[3] == NULL);
- break;
- default:
- printk(KERN_ERR "RxRPC: conn->avail_calls=%d\n",
- conn->avail_calls);
- BUG();
- }
- }
+ rxrpc_disconnect_call(call);
- spin_unlock(&conn->trans->client_lock);
+ spin_unlock(&conn->channel_lock);
if (call->state < RXRPC_CALL_COMPLETE &&
call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
@@ -844,9 +816,9 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
}
if (call->conn) {
- spin_lock(&call->conn->trans->peer->lock);
- list_del(&call->error_link);
- spin_unlock(&call->conn->trans->peer->lock);
+ spin_lock(&call->conn->params.peer->lock);
+ hlist_del_init(&call->error_link);
+ spin_unlock(&call->conn->params.peer->lock);
write_lock_bh(&call->conn->lock);
rb_erase(&call->conn_node, &call->conn->calls);
@@ -935,16 +907,15 @@ void __exit rxrpc_destroy_all_calls(void)
if (call->state != RXRPC_CALL_DEAD)
break;
default:
- printk(KERN_ERR "RXRPC:"
- " Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
+ pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
call, atomic_read(&call->usage),
atomic_read(&call->ackr_not_idle),
rxrpc_call_states[call->state],
call->flags, call->events);
if (!skb_queue_empty(&call->rx_queue))
- printk(KERN_ERR"RXRPC: Rx queue occupied\n");
+ pr_err("Rx queue occupied\n");
if (!skb_queue_empty(&call->rx_oos_queue))
- printk(KERN_ERR"RXRPC: OOS queue occupied\n");
+ pr_err("OOS queue occupied\n");
break;
}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
new file mode 100644
index 000000000000..82488d6adb83
--- /dev/null
+++ b/net/rxrpc/conn_client.c
@@ -0,0 +1,94 @@
+/* Client connection-specific management code.
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/timer.h>
+#include "ar-internal.h"
+
+/*
+ * We use machine-unique IDs for our client connections.
+ */
+DEFINE_IDR(rxrpc_client_conn_ids);
+static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
+
+/*
+ * Get a connection ID and epoch for a client connection from the global pool.
+ * The connection struct pointer is then recorded in the idr radix tree. The
+ * epoch is changed if this wraps.
+ *
+ * TODO: The IDR tree gets very expensive on memory if the connection IDs are
+ * widely scattered throughout the number space, so we shall need to retire
+ * connections that have, say, an ID more than four times the maximum number of
+ * client conns away from the current allocation point to try and keep the IDs
+ * concentrated. We will also need to retire connections from an old epoch.
+ */
+int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp)
+{
+ u32 epoch;
+ int id;
+
+ _enter("");
+
+ idr_preload(gfp);
+ spin_lock(&rxrpc_conn_id_lock);
+
+ epoch = rxrpc_epoch;
+
+ /* We could use idr_alloc_cyclic() here, but we really need to know
+ * when the thing wraps so that we can advance the epoch.
+ */
+ if (rxrpc_client_conn_ids.cur == 0)
+ rxrpc_client_conn_ids.cur = 1;
+ id = idr_alloc(&rxrpc_client_conn_ids, conn,
+ rxrpc_client_conn_ids.cur, 0x40000000, GFP_NOWAIT);
+ if (id < 0) {
+ if (id != -ENOSPC)
+ goto error;
+ id = idr_alloc(&rxrpc_client_conn_ids, conn,
+ 1, 0x40000000, GFP_NOWAIT);
+ if (id < 0)
+ goto error;
+ epoch++;
+ rxrpc_epoch = epoch;
+ }
+ rxrpc_client_conn_ids.cur = id + 1;
+
+ spin_unlock(&rxrpc_conn_id_lock);
+ idr_preload_end();
+
+ conn->proto.epoch = epoch;
+ conn->proto.cid = id << RXRPC_CIDSHIFT;
+ set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
+ _leave(" [CID %x:%x]", epoch, conn->proto.cid);
+ return 0;
+
+error:
+ spin_unlock(&rxrpc_conn_id_lock);
+ idr_preload_end();
+ _leave(" = %d", id);
+ return id;
+}
+
+/*
+ * Release a connection ID for a client connection from the global pool.
+ */
+void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
+{
+ if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) {
+ spin_lock(&rxrpc_conn_id_lock);
+ idr_remove(&rxrpc_client_conn_ids,
+ conn->proto.cid >> RXRPC_CIDSHIFT);
+ spin_unlock(&rxrpc_conn_id_lock);
+ }
+}
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/conn_event.c
index 5f9563968a5b..bf6971555eac 100644
--- a/net/rxrpc/ar-connevent.c
+++ b/net/rxrpc/conn_event.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
@@ -86,14 +88,14 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code);
- msg.msg_name = &conn->trans->peer->srx.transport;
- msg.msg_namelen = conn->trans->peer->srx.transport_len;
+ msg.msg_name = &conn->params.peer->srx.transport;
+ msg.msg_namelen = conn->params.peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
- whdr.epoch = htonl(conn->epoch);
- whdr.cid = htonl(conn->cid);
+ whdr.epoch = htonl(conn->proto.epoch);
+ whdr.cid = htonl(conn->proto.cid);
whdr.callNumber = 0;
whdr.seq = 0;
whdr.type = RXRPC_PACKET_TYPE_ABORT;
@@ -101,7 +103,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
whdr.userStatus = 0;
whdr.securityIndex = conn->security_ix;
whdr._rsvd = 0;
- whdr.serviceId = htons(conn->service_id);
+ whdr.serviceId = htons(conn->params.service_id);
word = htonl(conn->local_abort);
@@ -116,7 +118,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
whdr.serial = htonl(serial);
_proto("Tx CONN ABORT %%%u { %d }", serial, conn->local_abort);
- ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+ ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
if (ret < 0) {
_debug("sendmsg failed: %d", ret);
return -EAGAIN;
@@ -218,7 +220,7 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn)
ASSERT(conn->security_ix != 0);
- if (!conn->key) {
+ if (!conn->params.key) {
_debug("set up security");
ret = rxrpc_init_server_conn_security(conn);
switch (ret) {
@@ -261,7 +263,7 @@ void rxrpc_process_connection(struct work_struct *work)
_enter("{%d}", conn->debug_id);
- atomic_inc(&conn->usage);
+ rxrpc_get_connection(conn);
if (test_and_clear_bit(RXRPC_CONN_CHALLENGE, &conn->events)) {
rxrpc_secure_connection(conn);
@@ -312,19 +314,14 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
{
CHECK_SLAB_OKAY(&local->usage);
- if (!atomic_inc_not_zero(&local->usage)) {
- printk("resurrected on reject\n");
- BUG();
- }
-
skb_queue_tail(&local->reject_queue, skb);
- rxrpc_queue_work(&local->rejecter);
+ rxrpc_queue_work(&local->processor);
}
/*
* reject packets through the local endpoint
*/
-void rxrpc_reject_packets(struct work_struct *work)
+void rxrpc_reject_packets(struct rxrpc_local *local)
{
union {
struct sockaddr sa;
@@ -332,16 +329,12 @@ void rxrpc_reject_packets(struct work_struct *work)
} sa;
struct rxrpc_skb_priv *sp;
struct rxrpc_wire_header whdr;
- struct rxrpc_local *local;
struct sk_buff *skb;
struct msghdr msg;
struct kvec iov[2];
size_t size;
__be32 code;
- local = container_of(work, struct rxrpc_local, rejecter);
- rxrpc_get_local(local);
-
_enter("%d", local->debug_id);
iov[0].iov_base = &whdr;
@@ -393,9 +386,7 @@ void rxrpc_reject_packets(struct work_struct *work)
}
rxrpc_free_skb(skb);
- rxrpc_put_local(local);
}
- rxrpc_put_local(local);
_leave("");
}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
new file mode 100644
index 000000000000..4bfad7cf96cb
--- /dev/null
+++ b/net/rxrpc/conn_object.c
@@ -0,0 +1,686 @@
+/* RxRPC virtual connection handler
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * Time till a connection expires after last use (in seconds).
+ */
+unsigned int rxrpc_connection_expiry = 10 * 60;
+
+static void rxrpc_connection_reaper(struct work_struct *work);
+
+LIST_HEAD(rxrpc_connections);
+DEFINE_RWLOCK(rxrpc_connection_lock);
+static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
+
+/*
+ * allocate a new connection
+ */
+static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+{
+ struct rxrpc_connection *conn;
+
+ _enter("");
+
+ conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
+ if (conn) {
+ spin_lock_init(&conn->channel_lock);
+ init_waitqueue_head(&conn->channel_wq);
+ INIT_WORK(&conn->processor, &rxrpc_process_connection);
+ INIT_LIST_HEAD(&conn->link);
+ conn->calls = RB_ROOT;
+ skb_queue_head_init(&conn->rx_queue);
+ conn->security = &rxrpc_no_security;
+ rwlock_init(&conn->lock);
+ spin_lock_init(&conn->state_lock);
+ atomic_set(&conn->usage, 1);
+ conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ atomic_set(&conn->avail_chans, RXRPC_MAXCALLS);
+ conn->size_align = 4;
+ conn->header_size = sizeof(struct rxrpc_wire_header);
+ }
+
+ _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
+ return conn;
+}
+
+/*
+ * add a call to a connection's call-by-ID tree
+ */
+static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
+ struct rxrpc_call *call)
+{
+ struct rxrpc_call *xcall;
+ struct rb_node *parent, **p;
+ __be32 call_id;
+
+ write_lock_bh(&conn->lock);
+
+ call_id = call->call_id;
+ p = &conn->calls.rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ xcall = rb_entry(parent, struct rxrpc_call, conn_node);
+
+ if (call_id < xcall->call_id)
+ p = &(*p)->rb_left;
+ else if (call_id > xcall->call_id)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&call->conn_node, parent, p);
+ rb_insert_color(&call->conn_node, &conn->calls);
+
+ write_unlock_bh(&conn->lock);
+}
+
+/*
+ * Allocate a client connection. The caller must take care to clear any
+ * padding bytes in *cp.
+ */
+static struct rxrpc_connection *
+rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
+{
+ struct rxrpc_connection *conn;
+ int ret;
+
+ _enter("");
+
+ conn = rxrpc_alloc_connection(gfp);
+ if (!conn) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ conn->params = *cp;
+ conn->proto.local = cp->local;
+ conn->proto.epoch = rxrpc_epoch;
+ conn->proto.cid = 0;
+ conn->proto.in_clientflag = 0;
+ conn->proto.family = cp->peer->srx.transport.family;
+ conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+ conn->state = RXRPC_CONN_CLIENT;
+
+ switch (conn->proto.family) {
+ case AF_INET:
+ conn->proto.addr_size = sizeof(conn->proto.ipv4_addr);
+ conn->proto.ipv4_addr = cp->peer->srx.transport.sin.sin_addr;
+ conn->proto.port = cp->peer->srx.transport.sin.sin_port;
+ break;
+ }
+
+ ret = rxrpc_get_client_connection_id(conn, gfp);
+ if (ret < 0)
+ goto error_0;
+
+ ret = rxrpc_init_client_conn_security(conn);
+ if (ret < 0)
+ goto error_1;
+
+ conn->security->prime_packet_security(conn);
+
+ write_lock(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock(&rxrpc_connection_lock);
+
+ /* We steal the caller's peer ref. */
+ cp->peer = NULL;
+ rxrpc_get_local(conn->params.local);
+ key_get(conn->params.key);
+
+ _leave(" = %p", conn);
+ return conn;
+
+error_1:
+ rxrpc_put_client_connection_id(conn);
+error_0:
+ kfree(conn);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_call *call,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn, *candidate = NULL;
+ struct rxrpc_local *local = cp->local;
+ struct rb_node *p, **pp, *parent;
+ long diff;
+ int chan;
+
+ DECLARE_WAITQUEUE(myself, current);
+
+ _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+ cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
+ if (!cp->peer)
+ return -ENOMEM;
+
+ if (!cp->exclusive) {
+ /* Search for a existing client connection unless this is going
+ * to be a connection that's used exclusively for a single call.
+ */
+ _debug("search 1");
+ spin_lock(&local->client_conns_lock);
+ p = local->client_conns.rb_node;
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, client_node);
+
+#define cmp(X) ((long)conn->params.X - (long)cp->X)
+ diff = (cmp(peer) ?:
+ cmp(key) ?:
+ cmp(security_level));
+ if (diff < 0)
+ p = p->rb_left;
+ else if (diff > 0)
+ p = p->rb_right;
+ else
+ goto found_extant_conn;
+ }
+ spin_unlock(&local->client_conns_lock);
+ }
+
+ /* We didn't find a connection or we want an exclusive one. */
+ _debug("get new conn");
+ candidate = rxrpc_alloc_client_connection(cp, gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ if (cp->exclusive) {
+ /* Assign the call on an exclusive connection to channel 0 and
+ * don't add the connection to the endpoint's shareable conn
+ * lookup tree.
+ */
+ _debug("exclusive chan 0");
+ conn = candidate;
+ atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+ spin_lock(&conn->channel_lock);
+ chan = 0;
+ goto found_channel;
+ }
+
+ /* We need to redo the search before attempting to add a new connection
+ * lest we race with someone else adding a conflicting instance.
+ */
+ _debug("search 2");
+ spin_lock(&local->client_conns_lock);
+
+ pp = &local->client_conns.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ conn = rb_entry(parent, struct rxrpc_connection, client_node);
+
+ diff = (cmp(peer) ?:
+ cmp(key) ?:
+ cmp(security_level));
+ if (diff < 0)
+ pp = &(*pp)->rb_left;
+ else if (diff > 0)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_conn;
+ }
+
+ /* The second search also failed; simply add the new connection with
+ * the new call in channel 0. Note that we need to take the channel
+ * lock before dropping the client conn lock.
+ */
+ _debug("new conn");
+ conn = candidate;
+ candidate = NULL;
+
+ rb_link_node(&conn->client_node, parent, pp);
+ rb_insert_color(&conn->client_node, &local->client_conns);
+
+ atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
+ spin_lock(&conn->channel_lock);
+ spin_unlock(&local->client_conns_lock);
+ chan = 0;
+
+found_channel:
+ _debug("found chan");
+ call->conn = conn;
+ call->channel = chan;
+ call->epoch = conn->proto.epoch;
+ call->cid = conn->proto.cid | chan;
+ call->call_id = ++conn->call_counter;
+ rcu_assign_pointer(conn->channels[chan], call);
+
+ _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
+
+ rxrpc_add_call_ID_to_conn(conn, call);
+ spin_unlock(&conn->channel_lock);
+ rxrpc_put_peer(cp->peer);
+ cp->peer = NULL;
+ _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+ return 0;
+
+ /* We found a suitable connection already in existence. Discard any
+ * candidate we may have allocated, and try to get a channel on this
+ * one.
+ */
+found_extant_conn:
+ _debug("found conn");
+ rxrpc_get_connection(conn);
+ spin_unlock(&local->client_conns_lock);
+
+ rxrpc_put_connection(candidate);
+
+ if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
+ if (!gfpflags_allow_blocking(gfp)) {
+ rxrpc_put_connection(conn);
+ _leave(" = -EAGAIN");
+ return -EAGAIN;
+ }
+
+ add_wait_queue(&conn->channel_wq, &myself);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (atomic_add_unless(&conn->avail_chans, -1, 0))
+ break;
+ if (signal_pending(current))
+ goto interrupted;
+ schedule();
+ }
+ remove_wait_queue(&conn->channel_wq, &myself);
+ __set_current_state(TASK_RUNNING);
+ }
+
+ /* The connection allegedly now has a free channel and we can now
+ * attach the call to it.
+ */
+ spin_lock(&conn->channel_lock);
+
+ for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+ if (!conn->channels[chan])
+ goto found_channel;
+ BUG();
+
+interrupted:
+ remove_wait_queue(&conn->channel_wq, &myself);
+ __set_current_state(TASK_RUNNING);
+ rxrpc_put_connection(conn);
+ rxrpc_put_peer(cp->peer);
+ cp->peer = NULL;
+ _leave(" = -ERESTARTSYS");
+ return -ERESTARTSYS;
+}
+
+/*
+ * get a record of an incoming connection
+ */
+struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct sk_buff *skb)
+{
+ struct rxrpc_connection *conn, *candidate = NULL;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rb_node *p, **pp;
+ const char *new = "old";
+ __be32 epoch;
+ u32 cid;
+
+ _enter("");
+
+ ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
+
+ epoch = sp->hdr.epoch;
+ cid = sp->hdr.cid & RXRPC_CIDMASK;
+
+ /* search the connection list first */
+ read_lock_bh(&peer->conn_lock);
+
+ p = peer->service_conns.rb_node;
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, service_node);
+
+ _debug("maybe %x", conn->proto.cid);
+
+ if (epoch < conn->proto.epoch)
+ p = p->rb_left;
+ else if (epoch > conn->proto.epoch)
+ p = p->rb_right;
+ else if (cid < conn->proto.cid)
+ p = p->rb_left;
+ else if (cid > conn->proto.cid)
+ p = p->rb_right;
+ else
+ goto found_extant_connection;
+ }
+ read_unlock_bh(&peer->conn_lock);
+
+ /* not yet present - create a candidate for a new record and then
+ * redo the search */
+ candidate = rxrpc_alloc_connection(GFP_NOIO);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ candidate->proto.local = local;
+ candidate->proto.epoch = sp->hdr.epoch;
+ candidate->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
+ candidate->proto.in_clientflag = RXRPC_CLIENT_INITIATED;
+ candidate->params.local = local;
+ candidate->params.peer = peer;
+ candidate->params.service_id = sp->hdr.serviceId;
+ candidate->security_ix = sp->hdr.securityIndex;
+ candidate->out_clientflag = 0;
+ candidate->state = RXRPC_CONN_SERVER;
+ if (candidate->params.service_id)
+ candidate->state = RXRPC_CONN_SERVER_UNSECURED;
+
+ write_lock_bh(&peer->conn_lock);
+
+ pp = &peer->service_conns.rb_node;
+ p = NULL;
+ while (*pp) {
+ p = *pp;
+ conn = rb_entry(p, struct rxrpc_connection, service_node);
+
+ if (epoch < conn->proto.epoch)
+ pp = &(*pp)->rb_left;
+ else if (epoch > conn->proto.epoch)
+ pp = &(*pp)->rb_right;
+ else if (cid < conn->proto.cid)
+ pp = &(*pp)->rb_left;
+ else if (cid > conn->proto.cid)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_second;
+ }
+
+ /* we can now add the new candidate to the list */
+ conn = candidate;
+ candidate = NULL;
+ rb_link_node(&conn->service_node, p, pp);
+ rb_insert_color(&conn->service_node, &peer->service_conns);
+ rxrpc_get_peer(peer);
+ rxrpc_get_local(local);
+
+ write_unlock_bh(&peer->conn_lock);
+
+ write_lock(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock(&rxrpc_connection_lock);
+
+ new = "new";
+
+success:
+ _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
+
+ _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+ return conn;
+
+ /* we found the connection in the list immediately */
+found_extant_connection:
+ if (sp->hdr.securityIndex != conn->security_ix) {
+ read_unlock_bh(&peer->conn_lock);
+ goto security_mismatch;
+ }
+ rxrpc_get_connection(conn);
+ read_unlock_bh(&peer->conn_lock);
+ goto success;
+
+ /* we found the connection on the second time through the list */
+found_extant_second:
+ if (sp->hdr.securityIndex != conn->security_ix) {
+ write_unlock_bh(&peer->conn_lock);
+ goto security_mismatch;
+ }
+ rxrpc_get_connection(conn);
+ write_unlock_bh(&peer->conn_lock);
+ kfree(candidate);
+ goto success;
+
+security_mismatch:
+ kfree(candidate);
+ _leave(" = -EKEYREJECTED");
+ return ERR_PTR(-EKEYREJECTED);
+}
+
+/*
+ * find a connection based on transport and RxRPC connection ID for an incoming
+ * packet
+ */
+struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct sk_buff *skb)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rb_node *p;
+ u32 epoch, cid;
+
+ _enter(",{%x,%x}", sp->hdr.cid, sp->hdr.flags);
+
+ read_lock_bh(&peer->conn_lock);
+
+ cid = sp->hdr.cid & RXRPC_CIDMASK;
+ epoch = sp->hdr.epoch;
+
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) {
+ p = peer->service_conns.rb_node;
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, service_node);
+
+ _debug("maybe %x", conn->proto.cid);
+
+ if (epoch < conn->proto.epoch)
+ p = p->rb_left;
+ else if (epoch > conn->proto.epoch)
+ p = p->rb_right;
+ else if (cid < conn->proto.cid)
+ p = p->rb_left;
+ else if (cid > conn->proto.cid)
+ p = p->rb_right;
+ else
+ goto found;
+ }
+ } else {
+ conn = idr_find(&rxrpc_client_conn_ids, cid >> RXRPC_CIDSHIFT);
+ if (conn && conn->proto.epoch == epoch)
+ goto found;
+ }
+
+ read_unlock_bh(&peer->conn_lock);
+ _leave(" = NULL");
+ return NULL;
+
+found:
+ rxrpc_get_connection(conn);
+ read_unlock_bh(&peer->conn_lock);
+ _leave(" = %p", conn);
+ return conn;
+}
+
+/*
+ * Disconnect a call and clear any channel it occupies when that call
+ * terminates.
+ */
+void rxrpc_disconnect_call(struct rxrpc_call *call)
+{
+ struct rxrpc_connection *conn = call->conn;
+ unsigned chan = call->channel;
+
+ _enter("%d,%d", conn->debug_id, call->channel);
+
+ if (conn->channels[chan] == call) {
+ rcu_assign_pointer(conn->channels[chan], NULL);
+ atomic_inc(&conn->avail_chans);
+ wake_up(&conn->channel_wq);
+ }
+}
+
+/*
+ * release a virtual connection
+ */
+void rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+ if (!conn)
+ return;
+
+ _enter("%p{u=%d,d=%d}",
+ conn, atomic_read(&conn->usage), conn->debug_id);
+
+ ASSERTCMP(atomic_read(&conn->usage), >, 0);
+
+ conn->put_time = ktime_get_seconds();
+ if (atomic_dec_and_test(&conn->usage)) {
+ _debug("zombie");
+ rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+ }
+
+ _leave("");
+}
+
+/*
+ * destroy a virtual connection
+ */
+static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
+{
+ _enter("%p{%d}", conn, atomic_read(&conn->usage));
+
+ ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+
+ _net("DESTROY CONN %d", conn->debug_id);
+
+ ASSERT(RB_EMPTY_ROOT(&conn->calls));
+ rxrpc_purge_queue(&conn->rx_queue);
+
+ conn->security->clear(conn);
+ key_put(conn->params.key);
+ key_put(conn->server_key);
+ rxrpc_put_peer(conn->params.peer);
+ rxrpc_put_local(conn->params.local);
+
+ kfree(conn);
+ _leave("");
+}
+
+/*
+ * reap dead connections
+ */
+static void rxrpc_connection_reaper(struct work_struct *work)
+{
+ struct rxrpc_connection *conn, *_p;
+ struct rxrpc_peer *peer;
+ unsigned long now, earliest, reap_time;
+
+ LIST_HEAD(graveyard);
+
+ _enter("");
+
+ now = ktime_get_seconds();
+ earliest = ULONG_MAX;
+
+ write_lock(&rxrpc_connection_lock);
+ list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+ _debug("reap CONN %d { u=%d,t=%ld }",
+ conn->debug_id, atomic_read(&conn->usage),
+ (long) now - (long) conn->put_time);
+
+ if (likely(atomic_read(&conn->usage) > 0))
+ continue;
+
+ if (rxrpc_conn_is_client(conn)) {
+ struct rxrpc_local *local = conn->params.local;
+ spin_lock(&local->client_conns_lock);
+ reap_time = conn->put_time + rxrpc_connection_expiry;
+
+ if (atomic_read(&conn->usage) > 0) {
+ ;
+ } else if (reap_time <= now) {
+ list_move_tail(&conn->link, &graveyard);
+ rxrpc_put_client_connection_id(conn);
+ rb_erase(&conn->client_node,
+ &local->client_conns);
+ } else if (reap_time < earliest) {
+ earliest = reap_time;
+ }
+
+ spin_unlock(&local->client_conns_lock);
+ } else {
+ peer = conn->params.peer;
+ write_lock_bh(&peer->conn_lock);
+ reap_time = conn->put_time + rxrpc_connection_expiry;
+
+ if (atomic_read(&conn->usage) > 0) {
+ ;
+ } else if (reap_time <= now) {
+ list_move_tail(&conn->link, &graveyard);
+ rb_erase(&conn->service_node,
+ &peer->service_conns);
+ } else if (reap_time < earliest) {
+ earliest = reap_time;
+ }
+
+ write_unlock_bh(&peer->conn_lock);
+ }
+ }
+ write_unlock(&rxrpc_connection_lock);
+
+ if (earliest != ULONG_MAX) {
+ _debug("reschedule reaper %ld", (long) earliest - now);
+ ASSERTCMP(earliest, >, now);
+ rxrpc_queue_delayed_work(&rxrpc_connection_reap,
+ (earliest - now) * HZ);
+ }
+
+ /* then destroy all those pulled out */
+ while (!list_empty(&graveyard)) {
+ conn = list_entry(graveyard.next, struct rxrpc_connection,
+ link);
+ list_del_init(&conn->link);
+
+ ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+ rxrpc_destroy_connection(conn);
+ }
+
+ _leave("");
+}
+
+/*
+ * preemptively destroy all the connection records rather than waiting for them
+ * to time out
+ */
+void __exit rxrpc_destroy_all_connections(void)
+{
+ _enter("");
+
+ rxrpc_connection_expiry = 0;
+ cancel_delayed_work(&rxrpc_connection_reap);
+ rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+
+ _leave("");
+}
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/input.c
index 6ff97412a0bb..f4bd57b77b93 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/input.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
@@ -358,7 +360,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_PACKET_TYPE_BUSY:
_proto("Rx BUSY %%%u", sp->hdr.serial);
- if (call->conn->out_clientflag)
+ if (rxrpc_conn_is_service(call->conn))
goto protocol_error;
write_lock_bh(&call->state_lock);
@@ -531,7 +533,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
case RXRPC_CALL_COMPLETE:
case RXRPC_CALL_CLIENT_FINAL_ACK:
/* complete server call */
- if (call->conn->in_clientflag)
+ if (rxrpc_conn_is_service(call->conn))
goto dead_call;
/* resend last packet of a completed call */
_debug("final ack again");
@@ -558,7 +560,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
dead_call:
if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
skb->priority = RX_CALL_DEAD;
- rxrpc_reject_packet(call->conn->trans->local, skb);
+ rxrpc_reject_packet(call->conn->params.local, skb);
goto unlock;
}
free_unlock:
@@ -578,7 +580,7 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
{
_enter("%p,%p", conn, skb);
- atomic_inc(&conn->usage);
+ rxrpc_get_connection(conn);
skb_queue_tail(&conn->rx_queue, skb);
rxrpc_queue_conn(conn);
}
@@ -592,9 +594,8 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
{
_enter("%p,%p", local, skb);
- atomic_inc(&local->usage);
skb_queue_tail(&local->event_queue, skb);
- rxrpc_queue_work(&local->event_processor);
+ rxrpc_queue_work(&local->processor);
}
/*
@@ -627,29 +628,27 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
}
static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
- struct sk_buff *skb,
- struct rxrpc_skb_priv *sp)
+ struct sk_buff *skb)
{
struct rxrpc_peer *peer;
- struct rxrpc_transport *trans;
struct rxrpc_connection *conn;
+ struct sockaddr_rxrpc srx;
- peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr,
- udp_hdr(skb)->source);
- if (IS_ERR(peer))
- goto cant_find_conn;
-
- trans = rxrpc_find_transport(local, peer);
- rxrpc_put_peer(peer);
- if (!trans)
- goto cant_find_conn;
+ rxrpc_get_addr_from_skb(local, skb, &srx);
+ rcu_read_lock();
+ peer = rxrpc_lookup_peer_rcu(local, &srx);
+ if (!peer)
+ goto cant_find_peer;
- conn = rxrpc_find_connection(trans, &sp->hdr);
- rxrpc_put_transport(trans);
+ conn = rxrpc_find_connection(local, peer, skb);
+ rcu_read_unlock();
if (!conn)
goto cant_find_conn;
return conn;
+
+cant_find_peer:
+ rcu_read_unlock();
cant_find_conn:
return NULL;
}
@@ -657,11 +656,15 @@ cant_find_conn:
/*
* handle data received on the local endpoint
* - may be called in interrupt context
+ *
+ * The socket is locked by the caller and this prevents the socket from being
+ * shut down and the local endpoint from going away, thus sk_user_data will not
+ * be cleared until this function returns.
*/
void rxrpc_data_ready(struct sock *sk)
{
struct rxrpc_skb_priv *sp;
- struct rxrpc_local *local;
+ struct rxrpc_local *local = sk->sk_user_data;
struct sk_buff *skb;
int ret;
@@ -669,21 +672,8 @@ void rxrpc_data_ready(struct sock *sk)
ASSERT(!irqs_disabled());
- read_lock_bh(&rxrpc_local_lock);
- local = sk->sk_user_data;
- if (local && atomic_read(&local->usage) > 0)
- rxrpc_get_local(local);
- else
- local = NULL;
- read_unlock_bh(&rxrpc_local_lock);
- if (!local) {
- _leave(" [local dead]");
- return;
- }
-
skb = skb_recv_datagram(sk, 0, 1, &ret);
if (!skb) {
- rxrpc_put_local(local);
if (ret == -EAGAIN)
return;
_debug("UDP socket error %d", ret);
@@ -697,7 +687,6 @@ void rxrpc_data_ready(struct sock *sk)
/* we'll probably need to checksum it (didn't call sock_recvmsg) */
if (skb_checksum_complete(skb)) {
rxrpc_free_skb(skb);
- rxrpc_put_local(local);
__UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0);
_leave(" [CSUM failed]");
return;
@@ -732,7 +721,7 @@ void rxrpc_data_ready(struct sock *sk)
rxrpc_post_packet_to_local(local, skb);
goto out;
}
-
+
if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
(sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
goto bad_message;
@@ -743,7 +732,7 @@ void rxrpc_data_ready(struct sock *sk)
* old-fashioned way doesn't really hurt */
struct rxrpc_connection *conn;
- conn = rxrpc_conn_from_local(local, skb, sp);
+ conn = rxrpc_conn_from_local(local, skb);
if (!conn)
goto cant_route_call;
@@ -762,7 +751,6 @@ void rxrpc_data_ready(struct sock *sk)
}
out:
- rxrpc_put_local(local);
return;
cant_route_call:
@@ -772,8 +760,7 @@ cant_route_call:
if (sp->hdr.seq == 1) {
_debug("first packet");
skb_queue_tail(&local->accept_queue, skb);
- rxrpc_queue_work(&local->acceptor);
- rxrpc_put_local(local);
+ rxrpc_queue_work(&local->processor);
_leave(" [incoming]");
return;
}
@@ -786,13 +773,11 @@ cant_route_call:
_debug("reject type %d",sp->hdr.type);
rxrpc_reject_packet(local, skb);
}
- rxrpc_put_local(local);
_leave(" [no call]");
return;
bad_message:
skb->priority = RX_PROTOCOL_ERROR;
rxrpc_reject_packet(local, skb);
- rxrpc_put_local(local);
_leave(" [badmsg]");
}
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/key.c
index 1021b4c0bdd2..18c737a61d80 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/key.c
@@ -12,6 +12,8 @@
* "afs@CAMBRIDGE.REDHAT.COM>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <crypto/skcipher.h>
#include <linux/module.h>
#include <linux/net.h>
@@ -800,7 +802,7 @@ static void rxrpc_free_token_list(struct rxrpc_key_token *token)
rxrpc_rxk5_free(token->k5);
break;
default:
- printk(KERN_ERR "Unknown token type %x on rxrpc key\n",
+ pr_err("Unknown token type %x on rxrpc key\n",
token->security_index);
BUG();
}
@@ -985,7 +987,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
if (ret < 0)
goto error;
- conn->key = key;
+ conn->params.key = key;
_leave(" = 0 [%d]", key_serial(key));
return 0;
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
new file mode 100644
index 000000000000..31a3f86ef2f6
--- /dev/null
+++ b/net/rxrpc/local_event.c
@@ -0,0 +1,116 @@
+/* AF_RXRPC local endpoint management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <generated/utsrelease.h>
+#include "ar-internal.h"
+
+static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC";
+
+/*
+ * Reply to a version request
+ */
+static void rxrpc_send_version_request(struct rxrpc_local *local,
+ struct rxrpc_host_header *hdr,
+ struct sk_buff *skb)
+{
+ struct rxrpc_wire_header whdr;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct sockaddr_in sin;
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t len;
+ int ret;
+
+ _enter("");
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = udp_hdr(skb)->source;
+ sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+
+ msg.msg_name = &sin;
+ msg.msg_namelen = sizeof(sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ whdr.epoch = htonl(sp->hdr.epoch);
+ whdr.cid = htonl(sp->hdr.cid);
+ whdr.callNumber = htonl(sp->hdr.callNumber);
+ whdr.seq = 0;
+ whdr.serial = 0;
+ whdr.type = RXRPC_PACKET_TYPE_VERSION;
+ whdr.flags = RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED);
+ whdr.userStatus = 0;
+ whdr.securityIndex = 0;
+ whdr._rsvd = 0;
+ whdr.serviceId = htons(sp->hdr.serviceId);
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = (char *)rxrpc_version_string;
+ iov[1].iov_len = sizeof(rxrpc_version_string);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ _proto("Tx VERSION (reply)");
+
+ ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
+ if (ret < 0)
+ _debug("sendmsg failed: %d", ret);
+
+ _leave("");
+}
+
+/*
+ * Process event packets targetted at a local endpoint.
+ */
+void rxrpc_process_local_events(struct rxrpc_local *local)
+{
+ struct sk_buff *skb;
+ char v;
+
+ _enter("");
+
+ skb = skb_dequeue(&local->event_queue);
+ if (skb) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ _debug("{%d},{%u}", local->debug_id, sp->hdr.type);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_VERSION:
+ if (skb_copy_bits(skb, 0, &v, 1) < 0)
+ return;
+ _proto("Rx VERSION { %02x }", v);
+ if (v == 0)
+ rxrpc_send_version_request(local, &sp->hdr, skb);
+ break;
+
+ default:
+ /* Just ignore anything we don't understand */
+ break;
+ }
+
+ rxrpc_free_skb(skb);
+ }
+
+ _leave("");
+}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
new file mode 100644
index 000000000000..3ab7764f7cd8
--- /dev/null
+++ b/net/rxrpc/local_object.c
@@ -0,0 +1,387 @@
+/* Local endpoint object management
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+#include <linux/hashtable.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static void rxrpc_local_processor(struct work_struct *);
+static void rxrpc_local_rcu(struct rcu_head *);
+
+static DEFINE_MUTEX(rxrpc_local_mutex);
+static LIST_HEAD(rxrpc_local_endpoints);
+
+/*
+ * Compare a local to an address. Return -ve, 0 or +ve to indicate less than,
+ * same or greater than.
+ *
+ * We explicitly don't compare the RxRPC service ID as we want to reject
+ * conflicting uses by differing services. Further, we don't want to share
+ * addresses with different options (IPv6), so we don't compare those bits
+ * either.
+ */
+static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
+ const struct sockaddr_rxrpc *srx)
+{
+ long diff;
+
+ diff = ((local->srx.transport_type - srx->transport_type) ?:
+ (local->srx.transport_len - srx->transport_len) ?:
+ (local->srx.transport.family - srx->transport.family));
+ if (diff != 0)
+ return diff;
+
+ switch (srx->transport.family) {
+ case AF_INET:
+ /* If the choice of UDP port is left up to the transport, then
+ * the endpoint record doesn't match.
+ */
+ return ((u16 __force)local->srx.transport.sin.sin_port -
+ (u16 __force)srx->transport.sin.sin_port) ?:
+ memcmp(&local->srx.transport.sin.sin_addr,
+ &srx->transport.sin.sin_addr,
+ sizeof(struct in_addr));
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Allocate a new local endpoint.
+ */
+static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_local *local;
+
+ local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
+ if (local) {
+ atomic_set(&local->usage, 1);
+ INIT_LIST_HEAD(&local->link);
+ INIT_WORK(&local->processor, rxrpc_local_processor);
+ INIT_LIST_HEAD(&local->services);
+ init_rwsem(&local->defrag_sem);
+ skb_queue_head_init(&local->accept_queue);
+ skb_queue_head_init(&local->reject_queue);
+ skb_queue_head_init(&local->event_queue);
+ local->client_conns = RB_ROOT;
+ spin_lock_init(&local->client_conns_lock);
+ spin_lock_init(&local->lock);
+ rwlock_init(&local->services_lock);
+ local->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ memcpy(&local->srx, srx, sizeof(*srx));
+ }
+
+ _leave(" = %p", local);
+ return local;
+}
+
+/*
+ * create the local socket
+ * - must be called with rxrpc_local_mutex locked
+ */
+static int rxrpc_open_socket(struct rxrpc_local *local)
+{
+ struct sock *sock;
+ int ret, opt;
+
+ _enter("%p{%d}", local, local->srx.transport_type);
+
+ /* create a socket to represent the local endpoint */
+ ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type,
+ IPPROTO_UDP, &local->socket);
+ if (ret < 0) {
+ _leave(" = %d [socket]", ret);
+ return ret;
+ }
+
+ /* if a local address was supplied then bind it */
+ if (local->srx.transport_len > sizeof(sa_family_t)) {
+ _debug("bind");
+ ret = kernel_bind(local->socket,
+ (struct sockaddr *)&local->srx.transport,
+ local->srx.transport_len);
+ if (ret < 0) {
+ _debug("bind failed %d", ret);
+ goto error;
+ }
+ }
+
+ /* we want to receive ICMP errors */
+ opt = 1;
+ ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
+ (char *) &opt, sizeof(opt));
+ if (ret < 0) {
+ _debug("setsockopt failed");
+ goto error;
+ }
+
+ /* we want to set the don't fragment bit */
+ opt = IP_PMTUDISC_DO;
+ ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
+ (char *) &opt, sizeof(opt));
+ if (ret < 0) {
+ _debug("setsockopt failed");
+ goto error;
+ }
+
+ /* set the socket up */
+ sock = local->socket->sk;
+ sock->sk_user_data = local;
+ sock->sk_data_ready = rxrpc_data_ready;
+ sock->sk_error_report = rxrpc_error_report;
+ _leave(" = 0");
+ return 0;
+
+error:
+ kernel_sock_shutdown(local->socket, SHUT_RDWR);
+ local->socket->sk->sk_user_data = NULL;
+ sock_release(local->socket);
+ local->socket = NULL;
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Look up or create a new local endpoint using the specified local address.
+ */
+struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_local *local;
+ struct list_head *cursor;
+ const char *age;
+ long diff;
+ int ret;
+
+ if (srx->transport.family == AF_INET) {
+ _enter("{%d,%u,%pI4+%hu}",
+ srx->transport_type,
+ srx->transport.family,
+ &srx->transport.sin.sin_addr,
+ ntohs(srx->transport.sin.sin_port));
+ } else {
+ _enter("{%d,%u}",
+ srx->transport_type,
+ srx->transport.family);
+ return ERR_PTR(-EAFNOSUPPORT);
+ }
+
+ mutex_lock(&rxrpc_local_mutex);
+
+ for (cursor = rxrpc_local_endpoints.next;
+ cursor != &rxrpc_local_endpoints;
+ cursor = cursor->next) {
+ local = list_entry(cursor, struct rxrpc_local, link);
+
+ diff = rxrpc_local_cmp_key(local, srx);
+ if (diff < 0)
+ continue;
+ if (diff > 0)
+ break;
+
+ /* Services aren't allowed to share transport sockets, so
+ * reject that here. It is possible that the object is dying -
+ * but it may also still have the local transport address that
+ * we want bound.
+ */
+ if (srx->srx_service) {
+ local = NULL;
+ goto addr_in_use;
+ }
+
+ /* Found a match. We replace a dying object. Attempting to
+ * bind the transport socket may still fail if we're attempting
+ * to use a local address that the dying object is still using.
+ */
+ if (!rxrpc_get_local_maybe(local)) {
+ cursor = cursor->next;
+ list_del_init(&local->link);
+ break;
+ }
+
+ age = "old";
+ goto found;
+ }
+
+ local = rxrpc_alloc_local(srx);
+ if (!local)
+ goto nomem;
+
+ ret = rxrpc_open_socket(local);
+ if (ret < 0)
+ goto sock_error;
+
+ list_add_tail(&local->link, cursor);
+ age = "new";
+
+found:
+ mutex_unlock(&rxrpc_local_mutex);
+
+ _net("LOCAL %s %d {%d,%u,%pI4+%hu}",
+ age,
+ local->debug_id,
+ local->srx.transport_type,
+ local->srx.transport.family,
+ &local->srx.transport.sin.sin_addr,
+ ntohs(local->srx.transport.sin.sin_port));
+
+ _leave(" = %p", local);
+ return local;
+
+nomem:
+ ret = -ENOMEM;
+sock_error:
+ mutex_unlock(&rxrpc_local_mutex);
+ kfree(local);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+
+addr_in_use:
+ mutex_unlock(&rxrpc_local_mutex);
+ _leave(" = -EADDRINUSE");
+ return ERR_PTR(-EADDRINUSE);
+}
+
+/*
+ * A local endpoint reached its end of life.
+ */
+void __rxrpc_put_local(struct rxrpc_local *local)
+{
+ _enter("%d", local->debug_id);
+ rxrpc_queue_work(&local->processor);
+}
+
+/*
+ * Destroy a local endpoint's socket and then hand the record to RCU to dispose
+ * of.
+ *
+ * Closing the socket cannot be done from bottom half context or RCU callback
+ * context because it might sleep.
+ */
+static void rxrpc_local_destroyer(struct rxrpc_local *local)
+{
+ struct socket *socket = local->socket;
+
+ _enter("%d", local->debug_id);
+
+ /* We can get a race between an incoming call packet queueing the
+ * processor again and the work processor starting the destruction
+ * process which will shut down the UDP socket.
+ */
+ if (local->dead) {
+ _leave(" [already dead]");
+ return;
+ }
+ local->dead = true;
+
+ mutex_lock(&rxrpc_local_mutex);
+ list_del_init(&local->link);
+ mutex_unlock(&rxrpc_local_mutex);
+
+ ASSERT(RB_EMPTY_ROOT(&local->client_conns));
+ ASSERT(list_empty(&local->services));
+
+ if (socket) {
+ local->socket = NULL;
+ kernel_sock_shutdown(socket, SHUT_RDWR);
+ socket->sk->sk_user_data = NULL;
+ sock_release(socket);
+ }
+
+ /* At this point, there should be no more packets coming in to the
+ * local endpoint.
+ */
+ rxrpc_purge_queue(&local->accept_queue);
+ rxrpc_purge_queue(&local->reject_queue);
+ rxrpc_purge_queue(&local->event_queue);
+
+ _debug("rcu local %d", local->debug_id);
+ call_rcu(&local->rcu, rxrpc_local_rcu);
+}
+
+/*
+ * Process events on an endpoint
+ */
+static void rxrpc_local_processor(struct work_struct *work)
+{
+ struct rxrpc_local *local =
+ container_of(work, struct rxrpc_local, processor);
+ bool again;
+
+ _enter("%d", local->debug_id);
+
+ do {
+ again = false;
+ if (atomic_read(&local->usage) == 0)
+ return rxrpc_local_destroyer(local);
+
+ if (!skb_queue_empty(&local->accept_queue)) {
+ rxrpc_accept_incoming_calls(local);
+ again = true;
+ }
+
+ if (!skb_queue_empty(&local->reject_queue)) {
+ rxrpc_reject_packets(local);
+ again = true;
+ }
+
+ if (!skb_queue_empty(&local->event_queue)) {
+ rxrpc_process_local_events(local);
+ again = true;
+ }
+ } while (again);
+}
+
+/*
+ * Destroy a local endpoint after the RCU grace period expires.
+ */
+static void rxrpc_local_rcu(struct rcu_head *rcu)
+{
+ struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu);
+
+ _enter("%d", local->debug_id);
+
+ ASSERT(!work_pending(&local->processor));
+
+ _net("DESTROY LOCAL %d", local->debug_id);
+ kfree(local);
+ _leave("");
+}
+
+/*
+ * Verify the local endpoint list is empty by this point.
+ */
+void __exit rxrpc_destroy_all_locals(void)
+{
+ struct rxrpc_local *local;
+
+ _enter("");
+
+ if (list_empty(&rxrpc_local_endpoints))
+ return;
+
+ mutex_lock(&rxrpc_local_mutex);
+ list_for_each_entry(local, &rxrpc_local_endpoints, link) {
+ pr_err("AF_RXRPC: Leaked local %p {%d}\n",
+ local, atomic_read(&local->usage));
+ }
+ mutex_unlock(&rxrpc_local_mutex);
+ BUG();
+}
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 1afe9876e79f..bdc5e42fe600 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -15,6 +15,12 @@
#include "ar-internal.h"
/*
+ * The maximum listening backlog queue size that may be set on a socket by
+ * listen().
+ */
+unsigned int rxrpc_max_backlog __read_mostly = 10;
+
+/*
* How long to wait before scheduling ACK generation after seeing a
* packet with RXRPC_REQUEST_ACK set (in jiffies).
*/
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/output.c
index 51cb10062a8d..f4bda06b7d2d 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/output.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/net.h>
#include <linux/gfp.h>
#include <linux/skbuff.h>
@@ -30,13 +32,14 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
/*
* extract control messages from the sendmsg() control buffer
*/
-static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
unsigned long *user_call_ID,
enum rxrpc_command *command,
u32 *abort_code,
- bool server)
+ bool *_exclusive)
{
struct cmsghdr *cmsg;
+ bool got_user_ID = false;
int len;
*command = RXRPC_CMD_SEND_DATA;
@@ -68,6 +71,7 @@ static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
CMSG_DATA(cmsg);
}
_debug("User Call ID %lx", *user_call_ID);
+ got_user_ID = true;
break;
case RXRPC_ABORT:
@@ -88,15 +92,20 @@ static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
*command = RXRPC_CMD_ACCEPT;
if (len != 0)
return -EINVAL;
- if (!server)
- return -EISCONN;
break;
+ case RXRPC_EXCLUSIVE_CALL:
+ *_exclusive = true;
+ if (len != 0)
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
}
+ if (!got_user_ID)
+ return -EINVAL;
_leave(" = 0");
return 0;
}
@@ -124,55 +133,78 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
}
/*
+ * Create a new client call for sendmsg().
+ */
+static struct rxrpc_call *
+rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+ unsigned long user_call_ID, bool exclusive)
+{
+ struct rxrpc_conn_parameters cp;
+ struct rxrpc_call *call;
+ struct key *key;
+
+ DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name);
+
+ _enter("");
+
+ if (!msg->msg_name)
+ return ERR_PTR(-EDESTADDRREQ);
+
+ key = rx->key;
+ if (key && !rx->key->payload.data[0])
+ key = NULL;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.local = rx->local;
+ cp.key = rx->key;
+ cp.security_level = rx->min_sec_level;
+ cp.exclusive = rx->exclusive | exclusive;
+ cp.service_id = srx->srx_service;
+ call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
+
+ _leave(" = %p\n", call);
+ return call;
+}
+
+/*
* send a message forming part of a client call through an RxRPC socket
* - caller holds the socket locked
* - the socket may be either a client socket or a server socket
*/
-int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
- struct msghdr *msg, size_t len)
+int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
{
- struct rxrpc_conn_bundle *bundle;
enum rxrpc_command cmd;
struct rxrpc_call *call;
unsigned long user_call_ID = 0;
- struct key *key;
- u16 service_id;
+ bool exclusive = false;
u32 abort_code = 0;
int ret;
_enter("");
- ASSERT(trans != NULL);
-
- ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
- false);
+ ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
+ &exclusive);
if (ret < 0)
return ret;
- bundle = NULL;
- if (trans) {
- service_id = rx->srx.srx_service;
- if (msg->msg_name) {
- DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx,
- msg->msg_name);
- service_id = srx->srx_service;
- }
- key = rx->key;
- if (key && !rx->key->payload.data[0])
- key = NULL;
- bundle = rxrpc_get_bundle(rx, trans, key, service_id,
- GFP_KERNEL);
- if (IS_ERR(bundle))
- return PTR_ERR(bundle);
+ if (cmd == RXRPC_CMD_ACCEPT) {
+ if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
+ return -EINVAL;
+ call = rxrpc_accept_call(rx, user_call_ID);
+ if (IS_ERR(call))
+ return PTR_ERR(call);
+ rxrpc_put_call(call);
+ return 0;
}
- call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID,
- abort_code == 0, GFP_KERNEL);
- if (trans)
- rxrpc_put_bundle(trans, bundle);
- if (IS_ERR(call)) {
- _leave(" = %ld", PTR_ERR(call));
- return PTR_ERR(call);
+ call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
+ if (!call) {
+ if (cmd != RXRPC_CMD_SEND_DATA)
+ return -EBADSLT;
+ call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
+ exclusive);
+ if (IS_ERR(call))
+ return PTR_ERR(call);
}
_debug("CALL %d USR %lx ST %d on CONN %p",
@@ -180,14 +212,21 @@ int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
if (call->state >= RXRPC_CALL_COMPLETE) {
/* it's too late for this call */
- ret = -ESHUTDOWN;
+ ret = -ECONNRESET;
} else if (cmd == RXRPC_CMD_SEND_ABORT) {
rxrpc_send_abort(call, abort_code);
+ ret = 0;
} else if (cmd != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
- } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+ } else if (!call->in_clientflag &&
+ call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
/* request phase complete for this client call */
ret = -EPROTO;
+ } else if (call->in_clientflag &&
+ call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ /* Reply phase not begun or not complete for service call. */
+ ret = -EPROTO;
} else {
ret = rxrpc_send_data(rx, call, msg, len);
}
@@ -266,70 +305,9 @@ void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
EXPORT_SYMBOL(rxrpc_kernel_abort_call);
/*
- * send a message through a server socket
- * - caller holds the socket locked
- */
-int rxrpc_server_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
-{
- enum rxrpc_command cmd;
- struct rxrpc_call *call;
- unsigned long user_call_ID = 0;
- u32 abort_code = 0;
- int ret;
-
- _enter("");
-
- ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
- true);
- if (ret < 0)
- return ret;
-
- if (cmd == RXRPC_CMD_ACCEPT) {
- call = rxrpc_accept_call(rx, user_call_ID);
- if (IS_ERR(call))
- return PTR_ERR(call);
- rxrpc_put_call(call);
- return 0;
- }
-
- call = rxrpc_find_server_call(rx, user_call_ID);
- if (!call)
- return -EBADSLT;
- if (call->state >= RXRPC_CALL_COMPLETE) {
- ret = -ESHUTDOWN;
- goto out;
- }
-
- switch (cmd) {
- case RXRPC_CMD_SEND_DATA:
- if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
- call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
- /* Tx phase not yet begun for this call */
- ret = -EPROTO;
- break;
- }
-
- ret = rxrpc_send_data(rx, call, msg, len);
- break;
-
- case RXRPC_CMD_SEND_ABORT:
- rxrpc_send_abort(call, abort_code);
- break;
- default:
- BUG();
- }
-
- out:
- rxrpc_put_call(call);
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
* send a packet through the transport endpoint
*/
-int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
+int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
{
struct kvec iov[1];
struct msghdr msg;
@@ -340,30 +318,30 @@ int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
iov[0].iov_base = skb->head;
iov[0].iov_len = skb->len;
- msg.msg_name = &trans->peer->srx.transport.sin;
- msg.msg_namelen = sizeof(trans->peer->srx.transport.sin);
+ msg.msg_name = &conn->params.peer->srx.transport;
+ msg.msg_namelen = conn->params.peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
/* send the packet with the don't fragment bit set if we currently
* think it's small enough */
- if (skb->len - sizeof(struct rxrpc_wire_header) < trans->peer->maxdata) {
- down_read(&trans->local->defrag_sem);
+ if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) {
+ down_read(&conn->params.local->defrag_sem);
/* send the packet by UDP
* - returns -EMSGSIZE if UDP would have to fragment the packet
* to go out of the interface
* - in which case, we'll have processed the ICMP error
* message and update the peer record
*/
- ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
+ ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1,
iov[0].iov_len);
- up_read(&trans->local->defrag_sem);
+ up_read(&conn->params.local->defrag_sem);
if (ret == -EMSGSIZE)
goto send_fragmentable;
- _leave(" = %d [%u]", ret, trans->peer->maxdata);
+ _leave(" = %d [%u]", ret, conn->params.peer->maxdata);
return ret;
}
@@ -371,21 +349,28 @@ send_fragmentable:
/* attempt to send this message with fragmentation enabled */
_debug("send fragment");
- down_write(&trans->local->defrag_sem);
- opt = IP_PMTUDISC_DONT;
- ret = kernel_setsockopt(trans->local->socket, SOL_IP, IP_MTU_DISCOVER,
- (char *) &opt, sizeof(opt));
- if (ret == 0) {
- ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
- iov[0].iov_len);
-
- opt = IP_PMTUDISC_DO;
- kernel_setsockopt(trans->local->socket, SOL_IP,
- IP_MTU_DISCOVER, (char *) &opt, sizeof(opt));
+ down_write(&conn->params.local->defrag_sem);
+
+ switch (conn->params.local->srx.transport.family) {
+ case AF_INET:
+ opt = IP_PMTUDISC_DONT;
+ ret = kernel_setsockopt(conn->params.local->socket,
+ SOL_IP, IP_MTU_DISCOVER,
+ (char *)&opt, sizeof(opt));
+ if (ret == 0) {
+ ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1,
+ iov[0].iov_len);
+
+ opt = IP_PMTUDISC_DO;
+ kernel_setsockopt(conn->params.local->socket, SOL_IP,
+ IP_MTU_DISCOVER,
+ (char *)&opt, sizeof(opt));
+ }
+ break;
}
- up_write(&trans->local->defrag_sem);
- _leave(" = %d [frag %u]", ret, trans->peer->maxdata);
+ up_write(&conn->params.local->defrag_sem);
+ _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata);
return ret;
}
@@ -497,7 +482,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
/* the packet may be freed by rxrpc_process_call() before this
* returns */
- ret = rxrpc_send_packet(call->conn->trans, skb);
+ ret = rxrpc_send_data_packet(call->conn, skb);
_net("sent skb %p", skb);
} else {
_debug("failed to delete ACK timer");
@@ -583,7 +568,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
goto maybe_error;
}
- max = call->conn->trans->peer->maxdata;
+ max = call->conn->params.peer->maxdata;
max -= call->conn->security_size;
max &= ~(call->conn->size_align - 1UL);
@@ -674,7 +659,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
seq = atomic_inc_return(&call->sequence);
- sp->hdr.epoch = conn->epoch;
+ sp->hdr.epoch = conn->proto.epoch;
sp->hdr.cid = call->cid;
sp->hdr.callNumber = call->call_id;
sp->hdr.seq = seq;
@@ -717,7 +702,9 @@ out:
call_aborted:
rxrpc_free_skb(skb);
if (call->state == RXRPC_CALL_NETWORK_ERROR)
- ret = call->conn->trans->peer->net_error;
+ ret = call->error_report < RXRPC_LOCAL_ERROR_OFFSET ?
+ call->error_report :
+ call->error_report - RXRPC_LOCAL_ERROR_OFFSET;
else
ret = -ECONNABORTED;
_leave(" = %d", ret);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
new file mode 100644
index 000000000000..8940674b5e08
--- /dev/null
+++ b/net/rxrpc/peer_event.c
@@ -0,0 +1,281 @@
+/* Peer event handling, typically ICMP messages.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
+
+/*
+ * Find the peer associated with an ICMP packet.
+ */
+static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
+ const struct sk_buff *skb)
+{
+ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+ struct sockaddr_rxrpc srx;
+
+ _enter("");
+
+ memset(&srx, 0, sizeof(srx));
+ srx.transport_type = local->srx.transport_type;
+ srx.transport.family = local->srx.transport.family;
+
+ /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
+ * versa?
+ */
+ switch (srx.transport.family) {
+ case AF_INET:
+ srx.transport.sin.sin_port = serr->port;
+ srx.transport_len = sizeof(struct sockaddr_in);
+ switch (serr->ee.ee_origin) {
+ case SO_EE_ORIGIN_ICMP:
+ _net("Rx ICMP");
+ memcpy(&srx.transport.sin.sin_addr,
+ skb_network_header(skb) + serr->addr_offset,
+ sizeof(struct in_addr));
+ break;
+ case SO_EE_ORIGIN_ICMP6:
+ _net("Rx ICMP6 on v4 sock");
+ memcpy(&srx.transport.sin.sin_addr,
+ skb_network_header(skb) + serr->addr_offset + 12,
+ sizeof(struct in_addr));
+ break;
+ default:
+ memcpy(&srx.transport.sin.sin_addr, &ip_hdr(skb)->saddr,
+ sizeof(struct in_addr));
+ break;
+ }
+ break;
+
+ default:
+ BUG();
+ }
+
+ return rxrpc_lookup_peer_rcu(local, &srx);
+}
+
+/*
+ * Handle an MTU/fragmentation problem.
+ */
+static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr)
+{
+ u32 mtu = serr->ee.ee_info;
+
+ _net("Rx ICMP Fragmentation Needed (%d)", mtu);
+
+ /* wind down the local interface MTU */
+ if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
+ peer->if_mtu = mtu;
+ _net("I/F MTU %u", mtu);
+ }
+
+ if (mtu == 0) {
+ /* they didn't give us a size, estimate one */
+ mtu = peer->if_mtu;
+ if (mtu > 1500) {
+ mtu >>= 1;
+ if (mtu < 1500)
+ mtu = 1500;
+ } else {
+ mtu -= 100;
+ if (mtu < peer->hdrsize)
+ mtu = peer->hdrsize + 4;
+ }
+ }
+
+ if (mtu < peer->mtu) {
+ spin_lock_bh(&peer->lock);
+ peer->mtu = mtu;
+ peer->maxdata = peer->mtu - peer->hdrsize;
+ spin_unlock_bh(&peer->lock);
+ _net("Net MTU %u (maxdata %u)",
+ peer->mtu, peer->maxdata);
+ }
+}
+
+/*
+ * Handle an error received on the local endpoint.
+ */
+void rxrpc_error_report(struct sock *sk)
+{
+ struct sock_exterr_skb *serr;
+ struct rxrpc_local *local = sk->sk_user_data;
+ struct rxrpc_peer *peer;
+ struct sk_buff *skb;
+
+ _enter("%p{%d}", sk, local->debug_id);
+
+ skb = sock_dequeue_err_skb(sk);
+ if (!skb) {
+ _leave("UDP socket errqueue empty");
+ return;
+ }
+ serr = SKB_EXT_ERR(skb);
+ if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
+ _leave("UDP empty message");
+ kfree_skb(skb);
+ return;
+ }
+
+ rxrpc_new_skb(skb);
+
+ rcu_read_lock();
+ peer = rxrpc_lookup_peer_icmp_rcu(local, skb);
+ if (peer && !rxrpc_get_peer_maybe(peer))
+ peer = NULL;
+ if (!peer) {
+ rcu_read_unlock();
+ rxrpc_free_skb(skb);
+ _leave(" [no peer]");
+ return;
+ }
+
+ if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
+ serr->ee.ee_type == ICMP_DEST_UNREACH &&
+ serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
+ rxrpc_adjust_mtu(peer, serr);
+ rcu_read_unlock();
+ rxrpc_free_skb(skb);
+ rxrpc_put_peer(peer);
+ _leave(" [MTU update]");
+ return;
+ }
+
+ rxrpc_store_error(peer, serr);
+ rcu_read_unlock();
+ rxrpc_free_skb(skb);
+
+ /* The ref we obtained is passed off to the work item */
+ rxrpc_queue_work(&peer->error_distributor);
+ _leave("");
+}
+
+/*
+ * Map an error report to error codes on the peer record.
+ */
+static void rxrpc_store_error(struct rxrpc_peer *peer,
+ struct sock_exterr_skb *serr)
+{
+ struct sock_extended_err *ee;
+ int err;
+
+ _enter("");
+
+ ee = &serr->ee;
+
+ _net("Rx Error o=%d t=%d c=%d e=%d",
+ ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
+
+ err = ee->ee_errno;
+
+ switch (ee->ee_origin) {
+ case SO_EE_ORIGIN_ICMP:
+ switch (ee->ee_type) {
+ case ICMP_DEST_UNREACH:
+ switch (ee->ee_code) {
+ case ICMP_NET_UNREACH:
+ _net("Rx Received ICMP Network Unreachable");
+ break;
+ case ICMP_HOST_UNREACH:
+ _net("Rx Received ICMP Host Unreachable");
+ break;
+ case ICMP_PORT_UNREACH:
+ _net("Rx Received ICMP Port Unreachable");
+ break;
+ case ICMP_NET_UNKNOWN:
+ _net("Rx Received ICMP Unknown Network");
+ break;
+ case ICMP_HOST_UNKNOWN:
+ _net("Rx Received ICMP Unknown Host");
+ break;
+ default:
+ _net("Rx Received ICMP DestUnreach code=%u",
+ ee->ee_code);
+ break;
+ }
+ break;
+
+ case ICMP_TIME_EXCEEDED:
+ _net("Rx Received ICMP TTL Exceeded");
+ break;
+
+ default:
+ _proto("Rx Received ICMP error { type=%u code=%u }",
+ ee->ee_type, ee->ee_code);
+ break;
+ }
+ break;
+
+ case SO_EE_ORIGIN_NONE:
+ case SO_EE_ORIGIN_LOCAL:
+ _proto("Rx Received local error { error=%d }", err);
+ err += RXRPC_LOCAL_ERROR_OFFSET;
+ break;
+
+ case SO_EE_ORIGIN_ICMP6:
+ default:
+ _proto("Rx Received error report { orig=%u }", ee->ee_origin);
+ break;
+ }
+
+ peer->error_report = err;
+}
+
+/*
+ * Distribute an error that occurred on a peer
+ */
+void rxrpc_peer_error_distributor(struct work_struct *work)
+{
+ struct rxrpc_peer *peer =
+ container_of(work, struct rxrpc_peer, error_distributor);
+ struct rxrpc_call *call;
+ int error_report;
+
+ _enter("");
+
+ error_report = READ_ONCE(peer->error_report);
+
+ _debug("ISSUE ERROR %d", error_report);
+
+ spin_lock_bh(&peer->lock);
+
+ while (!hlist_empty(&peer->error_targets)) {
+ call = hlist_entry(peer->error_targets.first,
+ struct rxrpc_call, error_link);
+ hlist_del_init(&call->error_link);
+
+ write_lock(&call->state_lock);
+ if (call->state != RXRPC_CALL_COMPLETE &&
+ call->state < RXRPC_CALL_NETWORK_ERROR) {
+ call->error_report = error_report;
+ call->state = RXRPC_CALL_NETWORK_ERROR;
+ set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
+ rxrpc_queue_call(call);
+ }
+ write_unlock(&call->state_lock);
+ }
+
+ spin_unlock_bh(&peer->lock);
+
+ rxrpc_put_peer(peer);
+ _leave("");
+}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
new file mode 100644
index 000000000000..01d4930a11f7
--- /dev/null
+++ b/net/rxrpc/peer_object.c
@@ -0,0 +1,315 @@
+/* RxRPC remote transport endpoint record management
+ *
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/slab.h>
+#include <linux/hashtable.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include "ar-internal.h"
+
+static DEFINE_HASHTABLE(rxrpc_peer_hash, 10);
+static DEFINE_SPINLOCK(rxrpc_peer_hash_lock);
+
+/*
+ * Hash a peer key.
+ */
+static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local,
+ const struct sockaddr_rxrpc *srx)
+{
+ const u16 *p;
+ unsigned int i, size;
+ unsigned long hash_key;
+
+ _enter("");
+
+ hash_key = (unsigned long)local / __alignof__(*local);
+ hash_key += srx->transport_type;
+ hash_key += srx->transport_len;
+ hash_key += srx->transport.family;
+
+ switch (srx->transport.family) {
+ case AF_INET:
+ hash_key += (u16 __force)srx->transport.sin.sin_port;
+ size = sizeof(srx->transport.sin.sin_addr);
+ p = (u16 *)&srx->transport.sin.sin_addr;
+ break;
+ default:
+ WARN(1, "AF_RXRPC: Unsupported transport address family\n");
+ return 0;
+ }
+
+ /* Step through the peer address in 16-bit portions for speed */
+ for (i = 0; i < size; i += sizeof(*p), p++)
+ hash_key += *p;
+
+ _leave(" 0x%lx", hash_key);
+ return hash_key;
+}
+
+/*
+ * Compare a peer to a key. Return -ve, 0 or +ve to indicate less than, same
+ * or greater than.
+ *
+ * Unfortunately, the primitives in linux/hashtable.h don't allow for sorted
+ * buckets and mid-bucket insertion, so we don't make full use of this
+ * information at this point.
+ */
+static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer,
+ struct rxrpc_local *local,
+ const struct sockaddr_rxrpc *srx,
+ unsigned long hash_key)
+{
+ long diff;
+
+ diff = ((peer->hash_key - hash_key) ?:
+ ((unsigned long)peer->local - (unsigned long)local) ?:
+ (peer->srx.transport_type - srx->transport_type) ?:
+ (peer->srx.transport_len - srx->transport_len) ?:
+ (peer->srx.transport.family - srx->transport.family));
+ if (diff != 0)
+ return diff;
+
+ switch (srx->transport.family) {
+ case AF_INET:
+ return ((u16 __force)peer->srx.transport.sin.sin_port -
+ (u16 __force)srx->transport.sin.sin_port) ?:
+ memcmp(&peer->srx.transport.sin.sin_addr,
+ &srx->transport.sin.sin_addr,
+ sizeof(struct in_addr));
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Look up a remote transport endpoint for the specified address using RCU.
+ */
+static struct rxrpc_peer *__rxrpc_lookup_peer_rcu(
+ struct rxrpc_local *local,
+ const struct sockaddr_rxrpc *srx,
+ unsigned long hash_key)
+{
+ struct rxrpc_peer *peer;
+
+ hash_for_each_possible_rcu(rxrpc_peer_hash, peer, hash_link, hash_key) {
+ if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0) {
+ if (atomic_read(&peer->usage) == 0)
+ return NULL;
+ return peer;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Look up a remote transport endpoint for the specified address using RCU.
+ */
+struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
+ const struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_peer *peer;
+ unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
+
+ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
+ if (peer) {
+ switch (srx->transport.family) {
+ case AF_INET:
+ _net("PEER %d {%d,%u,%pI4+%hu}",
+ peer->debug_id,
+ peer->srx.transport_type,
+ peer->srx.transport.family,
+ &peer->srx.transport.sin.sin_addr,
+ ntohs(peer->srx.transport.sin.sin_port));
+ break;
+ }
+
+ _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+ }
+ return peer;
+}
+
+/*
+ * assess the MTU size for the network interface through which this peer is
+ * reached
+ */
+static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
+{
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ peer->if_mtu = 1500;
+
+ rt = ip_route_output_ports(&init_net, &fl4, NULL,
+ peer->srx.transport.sin.sin_addr.s_addr, 0,
+ htons(7000), htons(7001),
+ IPPROTO_UDP, 0, 0);
+ if (IS_ERR(rt)) {
+ _leave(" [route err %ld]", PTR_ERR(rt));
+ return;
+ }
+
+ peer->if_mtu = dst_mtu(&rt->dst);
+ dst_release(&rt->dst);
+
+ _leave(" [if_mtu %u]", peer->if_mtu);
+}
+
+/*
+ * Allocate a peer.
+ */
+struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
+{
+ struct rxrpc_peer *peer;
+
+ _enter("");
+
+ peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
+ if (peer) {
+ atomic_set(&peer->usage, 1);
+ peer->local = local;
+ INIT_HLIST_HEAD(&peer->error_targets);
+ INIT_WORK(&peer->error_distributor,
+ &rxrpc_peer_error_distributor);
+ peer->service_conns = RB_ROOT;
+ rwlock_init(&peer->conn_lock);
+ spin_lock_init(&peer->lock);
+ peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ }
+
+ _leave(" = %p", peer);
+ return peer;
+}
+
+/*
+ * Set up a new peer.
+ */
+static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx,
+ unsigned long hash_key,
+ gfp_t gfp)
+{
+ struct rxrpc_peer *peer;
+
+ _enter("");
+
+ peer = rxrpc_alloc_peer(local, gfp);
+ if (peer) {
+ peer->hash_key = hash_key;
+ memcpy(&peer->srx, srx, sizeof(*srx));
+
+ rxrpc_assess_MTU_size(peer);
+ peer->mtu = peer->if_mtu;
+
+ if (srx->transport.family == AF_INET) {
+ peer->hdrsize = sizeof(struct iphdr);
+ switch (srx->transport_type) {
+ case SOCK_DGRAM:
+ peer->hdrsize += sizeof(struct udphdr);
+ break;
+ default:
+ BUG();
+ break;
+ }
+ } else {
+ BUG();
+ }
+
+ peer->hdrsize += sizeof(struct rxrpc_wire_header);
+ peer->maxdata = peer->mtu - peer->hdrsize;
+ }
+
+ _leave(" = %p", peer);
+ return peer;
+}
+
+/*
+ * obtain a remote transport endpoint for the specified address
+ */
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx, gfp_t gfp)
+{
+ struct rxrpc_peer *peer, *candidate;
+ unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
+
+ _enter("{%d,%d,%pI4+%hu}",
+ srx->transport_type,
+ srx->transport_len,
+ &srx->transport.sin.sin_addr,
+ ntohs(srx->transport.sin.sin_port));
+
+ /* search the peer list first */
+ rcu_read_lock();
+ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
+ if (peer && !rxrpc_get_peer_maybe(peer))
+ peer = NULL;
+ rcu_read_unlock();
+
+ if (!peer) {
+ /* The peer is not yet present in hash - create a candidate
+ * for a new record and then redo the search.
+ */
+ candidate = rxrpc_create_peer(local, srx, hash_key, gfp);
+ if (!candidate) {
+ _leave(" = NULL [nomem]");
+ return NULL;
+ }
+
+ spin_lock(&rxrpc_peer_hash_lock);
+
+ /* Need to check that we aren't racing with someone else */
+ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
+ if (peer && !rxrpc_get_peer_maybe(peer))
+ peer = NULL;
+ if (!peer)
+ hash_add_rcu(rxrpc_peer_hash,
+ &candidate->hash_link, hash_key);
+
+ spin_unlock(&rxrpc_peer_hash_lock);
+
+ if (peer)
+ kfree(candidate);
+ else
+ peer = candidate;
+ }
+
+ _net("PEER %d {%d,%pI4+%hu}",
+ peer->debug_id,
+ peer->srx.transport_type,
+ &peer->srx.transport.sin.sin_addr,
+ ntohs(peer->srx.transport.sin.sin_port));
+
+ _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+ return peer;
+}
+
+/*
+ * Discard a ref on a remote peer record.
+ */
+void __rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+ ASSERT(hlist_empty(&peer->error_targets));
+
+ spin_lock(&rxrpc_peer_hash_lock);
+ hash_del_rcu(&peer->hash_link);
+ spin_unlock(&rxrpc_peer_hash_lock);
+
+ kfree_rcu(peer, rcu);
+}
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/proc.c
index 225163bc658d..500cdcdc843c 100644
--- a/net/rxrpc/ar-proc.c
+++ b/net/rxrpc/proc.c
@@ -46,7 +46,7 @@ static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
{
- struct rxrpc_transport *trans;
+ struct rxrpc_connection *conn;
struct rxrpc_call *call;
char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
@@ -59,25 +59,28 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
}
call = list_entry(v, struct rxrpc_call, link);
- trans = call->conn->trans;
sprintf(lbuff, "%pI4:%u",
- &trans->local->srx.transport.sin.sin_addr,
- ntohs(trans->local->srx.transport.sin.sin_port));
+ &call->local->srx.transport.sin.sin_addr,
+ ntohs(call->local->srx.transport.sin.sin_port));
- sprintf(rbuff, "%pI4:%u",
- &trans->peer->srx.transport.sin.sin_addr,
- ntohs(trans->peer->srx.transport.sin.sin_port));
+ conn = call->conn;
+ if (conn)
+ sprintf(rbuff, "%pI4:%u",
+ &conn->params.peer->srx.transport.sin.sin_addr,
+ ntohs(conn->params.peer->srx.transport.sin.sin_port));
+ else
+ strcpy(rbuff, "no_connection");
seq_printf(seq,
"UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
" %-8.8s %08x %lx\n",
lbuff,
rbuff,
- call->conn->service_id,
+ call->service_id,
call->cid,
call->call_id,
- call->conn->in_clientflag ? "Svc" : "Clt",
+ call->in_clientflag ? "Svc" : "Clt",
atomic_read(&call->usage),
rxrpc_call_states[call->state],
call->remote_abort ?: call->local_abort,
@@ -129,7 +132,6 @@ static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_connection *conn;
- struct rxrpc_transport *trans;
char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
if (v == &rxrpc_connections) {
@@ -142,28 +144,27 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
}
conn = list_entry(v, struct rxrpc_connection, link);
- trans = conn->trans;
sprintf(lbuff, "%pI4:%u",
- &trans->local->srx.transport.sin.sin_addr,
- ntohs(trans->local->srx.transport.sin.sin_port));
+ &conn->params.local->srx.transport.sin.sin_addr,
+ ntohs(conn->params.local->srx.transport.sin.sin_port));
sprintf(rbuff, "%pI4:%u",
- &trans->peer->srx.transport.sin.sin_addr,
- ntohs(trans->peer->srx.transport.sin.sin_port));
+ &conn->params.peer->srx.transport.sin.sin_addr,
+ ntohs(conn->params.peer->srx.transport.sin.sin_port));
seq_printf(seq,
"UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
" %s %08x %08x %08x\n",
lbuff,
rbuff,
- conn->service_id,
- conn->cid,
+ conn->params.service_id,
+ conn->proto.cid,
conn->call_counter,
- conn->in_clientflag ? "Svc" : "Clt",
+ rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
atomic_read(&conn->usage),
rxrpc_conn_states[conn->state],
- key_serial(conn->key),
+ key_serial(conn->params.key),
atomic_read(&conn->serial),
atomic_read(&conn->hi_serial));
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/recvmsg.c
index 160f0927aa3e..a3fa2ed85d63 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/export.h>
@@ -145,9 +147,9 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (!continue_call) {
if (msg->msg_name) {
size_t len =
- sizeof(call->conn->trans->peer->srx);
+ sizeof(call->conn->params.peer->srx);
memcpy(msg->msg_name,
- &call->conn->trans->peer->srx, len);
+ &call->conn->params.peer->srx, len);
msg->msg_namelen = len;
}
sock_recv_timestamp(msg, &rx->sk, skb);
@@ -203,7 +205,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
/* we transferred the whole data packet */
if (sp->hdr.flags & RXRPC_LAST_PACKET) {
_debug("last");
- if (call->conn->out_clientflag) {
+ if (rxrpc_conn_is_client(call->conn)) {
/* last byte of reply received */
ret = copied;
goto terminal_message;
@@ -307,7 +309,7 @@ receive_non_data_message:
&abort_code);
break;
default:
- pr_err("RxRPC: Unknown packet mark %u\n", skb->mark);
+ pr_err("Unknown packet mark %u\n", skb->mark);
BUG();
break;
}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index bab56ed649ba..23c05ec6fa28 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <crypto/skcipher.h>
#include <linux/module.h>
#include <linux/net.h>
@@ -56,9 +58,9 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
struct rxrpc_key_token *token;
int ret;
- _enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
+ _enter("{%d},{%x}", conn->debug_id, key_serial(conn->params.key));
- token = conn->key->payload.data[0];
+ token = conn->params.key->payload.data[0];
conn->security_ix = token->security_index;
ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
@@ -72,7 +74,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
sizeof(token->kad->session_key)) < 0)
BUG();
- switch (conn->security_level) {
+ switch (conn->params.security_level) {
case RXRPC_SECURITY_PLAIN:
break;
case RXRPC_SECURITY_AUTH:
@@ -113,14 +115,14 @@ static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
_enter("");
- if (!conn->key)
+ if (!conn->params.key)
return;
- token = conn->key->payload.data[0];
+ token = conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- tmpbuf.x[0] = htonl(conn->epoch);
- tmpbuf.x[1] = htonl(conn->cid);
+ tmpbuf.x[0] = htonl(conn->proto.epoch);
+ tmpbuf.x[1] = htonl(conn->proto.cid);
tmpbuf.x[2] = 0;
tmpbuf.x[3] = htonl(conn->security_ix);
@@ -218,7 +220,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
rxkhdr.checksum = 0;
/* encrypt from the session key */
- token = call->conn->key->payload.data[0];
+ token = call->conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
@@ -275,13 +277,13 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
sp = rxrpc_skb(skb);
_enter("{%d{%x}},{#%u},%zu,",
- call->debug_id, key_serial(call->conn->key), sp->hdr.seq,
- data_size);
+ call->debug_id, key_serial(call->conn->params.key),
+ sp->hdr.seq, data_size);
if (!call->conn->cipher)
return 0;
- ret = key_validate(call->conn->key);
+ ret = key_validate(call->conn->params.key);
if (ret < 0)
return ret;
@@ -310,7 +312,7 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
y = 1; /* zero checksums are not permitted */
sp->hdr.cksum = y;
- switch (call->conn->security_level) {
+ switch (call->conn->params.security_level) {
case RXRPC_SECURITY_PLAIN:
ret = 0;
break;
@@ -444,7 +446,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
skb_to_sgvec(skb, sg, 0, skb->len);
/* decrypt from the session key */
- token = call->conn->key->payload.data[0];
+ token = call->conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
skcipher_request_set_tfm(req, call->conn->cipher);
@@ -514,7 +516,7 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
sp = rxrpc_skb(skb);
_enter("{%d{%x}},{#%u}",
- call->debug_id, key_serial(call->conn->key), sp->hdr.seq);
+ call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq);
if (!call->conn->cipher)
return 0;
@@ -555,7 +557,7 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
return -EPROTO;
}
- switch (call->conn->security_level) {
+ switch (call->conn->params.security_level) {
case RXRPC_SECURITY_PLAIN:
ret = 0;
break;
@@ -587,9 +589,9 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
u32 serial;
int ret;
- _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key));
- ret = key_validate(conn->key);
+ ret = key_validate(conn->params.key);
if (ret < 0)
return ret;
@@ -600,14 +602,14 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
challenge.min_level = htonl(0);
challenge.__padding = 0;
- msg.msg_name = &conn->trans->peer->srx.transport.sin;
- msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin);
+ msg.msg_name = &conn->params.peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(conn->params.peer->srx.transport.sin);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
- whdr.epoch = htonl(conn->epoch);
- whdr.cid = htonl(conn->cid);
+ whdr.epoch = htonl(conn->proto.epoch);
+ whdr.cid = htonl(conn->proto.cid);
whdr.callNumber = 0;
whdr.seq = 0;
whdr.type = RXRPC_PACKET_TYPE_CHALLENGE;
@@ -615,7 +617,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
whdr.userStatus = 0;
whdr.securityIndex = conn->security_ix;
whdr._rsvd = 0;
- whdr.serviceId = htons(conn->service_id);
+ whdr.serviceId = htons(conn->params.service_id);
iov[0].iov_base = &whdr;
iov[0].iov_len = sizeof(whdr);
@@ -628,7 +630,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
whdr.serial = htonl(serial);
_proto("Tx CHALLENGE %%%u", serial);
- ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+ ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
if (ret < 0) {
_debug("sendmsg failed: %d", ret);
return -EAGAIN;
@@ -655,8 +657,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
_enter("");
- msg.msg_name = &conn->trans->peer->srx.transport.sin;
- msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin);
+ msg.msg_name = &conn->params.peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(conn->params.peer->srx.transport.sin);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -682,7 +684,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
whdr.serial = htonl(serial);
_proto("Tx RESPONSE %%%u", serial);
- ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len);
+ ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len);
if (ret < 0) {
_debug("sendmsg failed: %d", ret);
return -EAGAIN;
@@ -769,14 +771,14 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
u32 version, nonce, min_level, abort_code;
int ret;
- _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key));
- if (!conn->key) {
+ if (!conn->params.key) {
_leave(" = -EPROTO [no key]");
return -EPROTO;
}
- ret = key_validate(conn->key);
+ ret = key_validate(conn->params.key);
if (ret < 0) {
*_abort_code = RXKADEXPIRED;
return ret;
@@ -799,20 +801,20 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
goto protocol_error;
abort_code = RXKADLEVELFAIL;
- if (conn->security_level < min_level)
+ if (conn->params.security_level < min_level)
goto protocol_error;
- token = conn->key->payload.data[0];
+ token = conn->params.key->payload.data[0];
/* build the response packet */
memset(&resp, 0, sizeof(resp));
resp.version = htonl(RXKAD_VERSION);
- resp.encrypted.epoch = htonl(conn->epoch);
- resp.encrypted.cid = htonl(conn->cid);
+ resp.encrypted.epoch = htonl(conn->proto.epoch);
+ resp.encrypted.cid = htonl(conn->proto.cid);
resp.encrypted.securityIndex = htonl(conn->security_ix);
resp.encrypted.inc_nonce = htonl(nonce + 1);
- resp.encrypted.level = htonl(conn->security_level);
+ resp.encrypted.level = htonl(conn->params.security_level);
resp.kvno = htonl(token->kad->kvno);
resp.ticket_len = htonl(token->kad->ticket_len);
@@ -1094,9 +1096,9 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
rxkad_decrypt_response(conn, &response, &session_key);
abort_code = RXKADSEALEDINCON;
- if (ntohl(response.encrypted.epoch) != conn->epoch)
+ if (ntohl(response.encrypted.epoch) != conn->proto.epoch)
goto protocol_error_free;
- if (ntohl(response.encrypted.cid) != conn->cid)
+ if (ntohl(response.encrypted.cid) != conn->proto.cid)
goto protocol_error_free;
if (ntohl(response.encrypted.securityIndex) != conn->security_ix)
goto protocol_error_free;
@@ -1120,7 +1122,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
level = ntohl(response.encrypted.level);
if (level > RXRPC_SECURITY_ENCRYPT)
goto protocol_error_free;
- conn->security_level = level;
+ conn->params.security_level = level;
/* create a key to hold the security data and expiration time - after
* this the connection security can be handled in exactly the same way
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/security.c
index d223253b22fa..814d285ff802 100644
--- a/net/rxrpc/ar-security.c
+++ b/net/rxrpc/security.c
@@ -76,7 +76,7 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
{
const struct rxrpc_security *sec;
struct rxrpc_key_token *token;
- struct key *key = conn->key;
+ struct key *key = conn->params.key;
int ret;
_enter("{%d},{%x}", conn->debug_id, key_serial(key));
@@ -113,7 +113,7 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
{
const struct rxrpc_security *sec;
- struct rxrpc_local *local = conn->trans->local;
+ struct rxrpc_local *local = conn->params.local;
struct rxrpc_sock *rx;
struct key *key;
key_ref_t kref;
@@ -121,7 +121,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
_enter("");
- sprintf(kdesc, "%u:%u", conn->service_id, conn->security_ix);
+ sprintf(kdesc, "%u:%u", conn->params.service_id, conn->security_ix);
sec = rxrpc_security_lookup(conn->security_ix);
if (!sec) {
@@ -132,7 +132,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
/* find the service */
read_lock_bh(&local->services_lock);
list_for_each_entry(rx, &local->services, listen_link) {
- if (rx->srx.srx_service == conn->service_id)
+ if (rx->srx.srx_service == conn->params.service_id)
goto found_service;
}
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/skbuff.c
index 62a267472fce..eee0cfd9ac8c 100644
--- a/net/rxrpc/ar-skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index d20ed575acf4..03ad08774d4e 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -18,6 +18,7 @@ static struct ctl_table_header *rxrpc_sysctl_reg_table;
static const unsigned int zero = 0;
static const unsigned int one = 1;
static const unsigned int four = 4;
+static const unsigned int thirtytwo = 32;
static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = RXRPC_MAXACKS;
@@ -89,16 +90,17 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)&one,
},
+
+ /* Non-time values */
{
- .procname = "transport_expiry",
- .data = &rxrpc_transport_expiry,
+ .procname = "max_backlog",
+ .data = &rxrpc_max_backlog,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&one,
+ .extra1 = (void *)&four,
+ .extra2 = (void *)&thirtytwo,
},
-
- /* Non-time values */
{
.procname = "rx_window_size",
.data = &rxrpc_rx_window_size,
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
new file mode 100644
index 000000000000..f28122a15a24
--- /dev/null
+++ b/net/rxrpc/utils.c
@@ -0,0 +1,41 @@
+/* Utility routines
+ *
+ * Copyright (C) 2015 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include "ar-internal.h"
+
+/*
+ * Set up an RxRPC address from a socket buffer.
+ */
+void rxrpc_get_addr_from_skb(struct rxrpc_local *local,
+ const struct sk_buff *skb,
+ struct sockaddr_rxrpc *srx)
+{
+ memset(srx, 0, sizeof(*srx));
+ srx->transport_type = local->srx.transport_type;
+ srx->transport.family = local->srx.transport.family;
+
+ /* Can we see an ipv4 UDP packet on an ipv6 UDP socket? and vice
+ * versa?
+ */
+ switch (srx->transport.family) {
+ case AF_INET:
+ srx->transport.sin.sin_port = udp_hdr(skb)->source;
+ srx->transport_len = sizeof(struct sockaddr_in);
+ memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
+ sizeof(struct in_addr));
+ break;
+
+ default:
+ BUG();
+ }
+}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index c7a0b0d481c0..47ec2305f920 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -224,8 +224,8 @@ int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index)
}
EXPORT_SYMBOL(tcf_hash_search);
-int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
- int bind)
+bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
+ int bind)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
struct tcf_common *p = NULL;
@@ -235,9 +235,9 @@ int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
p->tcfc_refcnt++;
a->priv = p;
a->hinfo = hinfo;
- return 1;
+ return true;
}
- return 0;
+ return false;
}
EXPORT_SYMBOL(tcf_hash_check);
@@ -283,10 +283,11 @@ err2:
p->tcfc_index = index ? index : tcf_hash_new_index(tn);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
+ p->tcfc_tm.firstuse = 0;
if (est) {
err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats,
&p->tcfc_rate_est,
- &p->tcfc_lock, est);
+ &p->tcfc_lock, NULL, est);
if (err) {
free_percpu(p->cpu_qstats);
goto err2;
@@ -503,8 +504,8 @@ nla_put_failure:
}
EXPORT_SYMBOL(tcf_action_dump_1);
-int
-tcf_action_dump(struct sk_buff *skb, struct list_head *actions, int bind, int ref)
+int tcf_action_dump(struct sk_buff *skb, struct list_head *actions,
+ int bind, int ref)
{
struct tc_action *a;
int err = -EINVAL;
@@ -670,7 +671,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (err < 0)
goto errout;
- if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
+ if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
&p->tcfc_rate_est) < 0 ||
gnet_stats_copy_queue(&d, p->cpu_qstats,
@@ -687,9 +688,9 @@ errout:
return -1;
}
-static int
-tca_get_fill(struct sk_buff *skb, struct list_head *actions, u32 portid, u32 seq,
- u16 flags, int event, int bind, int ref)
+static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
+ u32 portid, u32 seq, u16 flags, int event, int bind,
+ int ref)
{
struct tcamsg *t;
struct nlmsghdr *nlh;
@@ -730,7 +731,8 @@ act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
+ if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
+ 0, 0) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -838,7 +840,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
if (a.ops == NULL) /*some idjot trying to flush unknown action */
goto err_out;
- nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
+ nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
+ sizeof(*t), 0);
if (!nlh)
goto out_module_put;
t = nlmsg_data(nlh);
@@ -1001,7 +1004,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
- if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETACTION) &&
+ !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index c7123e01c2ca..f7b6cf49ea6f 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -154,10 +154,7 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
if (ret)
goto nla_put_failure;
- tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install);
- tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
- tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
-
+ tcf_tm_dump(&tm, &prog->tcf_tm);
if (nla_put_64bit(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm,
TCA_ACT_BPF_PAD))
goto nla_put_failure;
@@ -172,7 +169,8 @@ nla_put_failure:
static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
[TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) },
[TCA_ACT_BPF_FD] = { .type = NLA_U32 },
- [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN },
+ [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING,
+ .len = ACT_BPF_NAME_LEN },
[TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 },
[TCA_ACT_BPF_OPS] = { .type = NLA_BINARY,
.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2ba700c765e0..35a5270f289d 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -44,7 +44,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
int proto;
spin_lock(&ca->tcf_lock);
- ca->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&ca->tcf_tm);
bstats_update(&ca->tcf_bstats, skb);
if (skb->protocol == htons(ETH_P_IP)) {
@@ -160,9 +160,7 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(ci->tcf_tm.expires);
+ tcf_tm_dump(&t, &ci->tcf_tm);
if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t,
TCA_CONNMARK_PAD))
goto nla_put_failure;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 28e934ed038a..dcd9ababd351 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -501,7 +501,7 @@ static int tcf_csum(struct sk_buff *skb,
u32 update_flags;
spin_lock(&p->tcf_lock);
- p->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&p->tcf_tm);
bstats_update(&p->tcf_bstats, skb);
action = p->tcf_action;
update_flags = p->update_flags;
@@ -546,9 +546,8 @@ static int tcf_csum_dump(struct sk_buff *skb,
if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD))
goto nla_put_failure;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ec5cc8435238..19058a7f3e5c 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -162,7 +162,8 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
tm->lastuse = lastuse;
}
-static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_gact *gact = a->priv;
@@ -188,9 +189,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
goto nla_put_failure;
}
#endif
- t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
+ tcf_tm_dump(&t, &gact->tcf_tm);
if (nla_put_64bit(skb, TCA_GACT_TM, sizeof(t), &t, TCA_GACT_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index ea4a2fef1b71..845ab5119c05 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -428,7 +428,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
u16 ife_type = 0;
u8 *daddr = NULL;
u8 *saddr = NULL;
- int ret = 0, exists = 0;
+ bool exists = false;
+ int ret = 0;
int err;
err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy);
@@ -562,9 +563,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (nla_put(skb, TCA_IFE_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - ife->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - ife->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(ife->tcf_tm.expires);
+ tcf_tm_dump(&t, &ife->tcf_tm);
if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD))
goto nla_put_failure;
@@ -632,7 +631,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&ife->tcf_lock);
bstats_update(&ife->tcf_bstats, skb);
- ife->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&ife->tcf_tm);
spin_unlock(&ife->tcf_lock);
ifehdrln = ntohs(ifehdrln);
@@ -720,7 +719,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&ife->tcf_lock);
bstats_update(&ife->tcf_bstats, skb);
- ife->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&ife->tcf_tm);
if (!metalen) { /* no metadata to send */
/* abuse overlimits to count when we allow packet
@@ -811,7 +810,7 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
pr_info_ratelimited("unknown failure(policy neither de/encode\n");
spin_lock(&ife->tcf_lock);
bstats_update(&ife->tcf_bstats, skb);
- ife->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&ife->tcf_tm);
ife->tcf_qstats.drops++;
spin_unlock(&ife->tcf_lock);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d4bd19ee5822..b8c50600697a 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -34,7 +34,8 @@ static int ipt_net_id;
static int xt_net_id;
-static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
+static int ipt_init_target(struct xt_entry_target *t, char *table,
+ unsigned int hook)
{
struct xt_tgchk_param par;
struct xt_target *target;
@@ -96,7 +97,8 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
struct tcf_ipt *ipt;
struct xt_entry_target *td, *t;
char *tname;
- int ret = 0, err, exists = 0;
+ bool exists = false;
+ int ret = 0, err;
u32 hook = 0;
u32 index = 0;
@@ -215,7 +217,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&ipt->tcf_lock);
- ipt->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&ipt->tcf_tm);
bstats_update(&ipt->tcf_bstats, skb);
/* yes, we have to worry about both in and out dev
@@ -245,7 +247,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
default:
net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n",
ret);
- result = TC_POLICE_OK;
+ result = TC_ACT_OK;
break;
}
spin_unlock(&ipt->tcf_lock);
@@ -253,7 +255,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
}
-static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
+ int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_ipt *ipt = a->priv;
@@ -280,11 +283,11 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) ||
nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname))
goto nla_put_failure;
- tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
- tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
- tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
+
+ tcf_tm_dump(&tm, &ipt->tcf_tm);
if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD))
goto nla_put_failure;
+
kfree(t);
return skb->len;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 128942bc9e42..5b135d357e1e 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -62,7 +62,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct tc_mirred *parm;
struct tcf_mirred *m;
struct net_device *dev;
- int ret, ok_push = 0, exists = 0;
+ int ret, ok_push = 0;
+ bool exists = false;
if (nla == NULL)
return -EINVAL;
@@ -157,7 +158,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
u32 at;
tcf_lastuse_update(&m->tcf_tm);
-
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
rcu_read_lock();
@@ -219,9 +219,8 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &m->tcf_tm);
if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c0a879f940de..06ccb03f25da 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -103,7 +103,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&p->tcf_lock);
- p->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&p->tcf_tm);
old_addr = p->old_addr;
new_addr = p->new_addr;
mask = p->mask;
@@ -264,9 +264,8 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD))
goto nla_put_failure;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index c6e18f230af6..82d3c1479029 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -121,7 +121,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&p->tcf_lock);
- p->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&p->tcf_tm);
if (p->tcfp_nkeys > 0) {
struct tc_pedit_key *tkey = p->tcfp_keys;
@@ -200,11 +200,11 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_PEDIT_PARMS, s, opt))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD))
goto nla_put_failure;
+
kfree(opt);
return skb->len;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index c557789765dc..1e8ede3955f4 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -115,9 +115,9 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
[TCA_POLICE_RESULT] = { .type = NLA_U32 },
};
-static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action *a,
- int ovr, int bind)
+static int tcf_act_police_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action *a,
+ int ovr, int bind)
{
int ret = 0, err;
struct nlattr *tb[TCA_POLICE_MAX + 1];
@@ -182,7 +182,8 @@ override:
if (est) {
err = gen_replace_estimator(&police->tcf_bstats, NULL,
&police->tcf_rate_est,
- &police->tcf_lock, est);
+ &police->tcf_lock,
+ NULL, est);
if (err)
goto failure_unlock;
} else if (tb[TCA_POLICE_AVRATE] &&
@@ -336,6 +337,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - police->tcf_tm.lastuse);
+ t.firstuse = jiffies_to_clock_t(jiffies - police->tcf_tm.firstuse);
t.expires = jiffies_to_clock_t(police->tcf_tm.expires);
if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD))
goto nla_put_failure;
@@ -364,7 +366,7 @@ static struct tc_action_ops act_police_ops = {
.owner = THIS_MODULE,
.act = tcf_act_police,
.dump = tcf_act_police_dump,
- .init = tcf_act_police_locate,
+ .init = tcf_act_police_init,
.walk = tcf_act_police_walker,
.lookup = tcf_police_search,
};
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e42f8daca147..318328d34d12 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -35,7 +35,7 @@ static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
struct tcf_defact *d = a->priv;
spin_lock(&d->tcf_lock);
- d->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&d->tcf_tm);
bstats_update(&d->tcf_bstats, skb);
/* print policy string followed by _ then packet count
@@ -86,8 +86,9 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_DEF_MAX + 1];
struct tc_defact *parm;
struct tcf_defact *d;
+ bool exists = false;
+ int ret = 0, err;
char *defdata;
- int ret = 0, err, exists = 0;
if (nla == NULL)
return -EINVAL;
@@ -158,9 +159,8 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_DEF_PARMS, sizeof(opt), &opt) ||
nla_put_string(skb, TCA_DEF_DATA, d->tcfd_defdata))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &d->tcf_tm);
if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index e928802966bc..53d1486cddf7 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -37,7 +37,7 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_skbedit *d = a->priv;
spin_lock(&d->tcf_lock);
- d->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&d->tcf_tm);
bstats_update(&d->tcf_bstats, skb);
if (d->flags & SKBEDIT_F_PRIORITY)
@@ -69,7 +69,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct tcf_skbedit *d;
u32 flags = 0, *priority = NULL, *mark = NULL;
u16 *queue_mapping = NULL;
- int ret = 0, err, exists = 0;
+ bool exists = false;
+ int ret = 0, err;
if (nla == NULL)
return -EINVAL;
@@ -168,9 +169,8 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
nla_put(skb, TCA_SKBEDIT_MARK, sizeof(d->mark),
&d->mark))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+
+ tcf_tm_dump(&t, &d->tcf_tm);
if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index ac4adc812c12..db9b7ed570ba 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -31,7 +31,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
int err;
spin_lock(&v->tcf_lock);
- v->tcf_tm.lastuse = jiffies;
+ tcf_lastuse_update(&v->tcf_tm);
bstats_update(&v->tcf_bstats, skb);
action = v->tcf_action;
@@ -77,8 +77,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
int action;
__be16 push_vid = 0;
__be16 push_proto = 0;
- int ret = 0, exists = 0;
- int err;
+ bool exists = false;
+ int ret = 0, err;
if (!nla)
return -EINVAL;
@@ -179,12 +179,11 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
(nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
- nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, v->tcfv_push_proto)))
+ nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
+ v->tcfv_push_proto)))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - v->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - v->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(v->tcf_tm.expires);
+ tcf_tm_dump(&t, &v->tcf_tm);
if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD))
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a75864d93142..843a716a4303 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -103,6 +103,17 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
unsigned long fh, int event);
+static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
+ struct nlmsghdr *n,
+ struct tcf_proto __rcu **chain, int event)
+{
+ struct tcf_proto __rcu **it_chain;
+ struct tcf_proto *tp;
+
+ for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL;
+ it_chain = &tp->next)
+ tfilter_notify(net, oskb, n, tp, 0, event);
+}
/* Select new prio value from the range, managed by kernel. */
@@ -156,11 +167,23 @@ replay:
cl = 0;
if (prio == 0) {
- /* If no priority is given, user wants we allocated it. */
- if (n->nlmsg_type != RTM_NEWTFILTER ||
- !(n->nlmsg_flags & NLM_F_CREATE))
+ switch (n->nlmsg_type) {
+ case RTM_DELTFILTER:
+ if (protocol || t->tcm_handle || tca[TCA_KIND])
+ return -ENOENT;
+ break;
+ case RTM_NEWTFILTER:
+ /* If no priority is provided by the user,
+ * we allocate one.
+ */
+ if (n->nlmsg_flags & NLM_F_CREATE) {
+ prio = TC_H_MAKE(0x80000000U, 0U);
+ break;
+ }
+ /* fall-through */
+ default:
return -ENOENT;
- prio = TC_H_MAKE(0x80000000U, 0U);
+ }
}
/* Find head of filter chain. */
@@ -200,6 +223,12 @@ replay:
err = -EINVAL;
if (chain == NULL)
goto errout;
+ if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
+ tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
+ tcf_destroy_chain(chain);
+ err = 0;
+ goto errout;
+ }
/* Check the chain for existence of proto-tcf with this priority */
for (back = chain;
@@ -351,8 +380,9 @@ errout:
return err;
}
-static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp,
- unsigned long fh, u32 portid, u32 seq, u16 flags, int event)
+static int tcf_fill_node(struct net *net, struct sk_buff *skb,
+ struct tcf_proto *tp, unsigned long fh, u32 portid,
+ u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -474,9 +504,11 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
TC_H_MIN(tcm->tcm_info) != tp->protocol)
continue;
if (t > s_t)
- memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+ memset(&cb->args[1], 0,
+ sizeof(cb->args)-sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, 0, NETLINK_CB(cb->skb).portid,
+ if (tcf_fill_node(net, skb, tp, 0,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
break;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b3b7978f4182..5060801a2f6d 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -66,6 +66,7 @@ struct cls_fl_filter {
struct fl_flow_key key;
struct list_head list;
u32 handle;
+ u32 flags;
struct rcu_head rcu;
};
@@ -123,6 +124,9 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
+ if (!atomic_read(&head->ht.nelems))
+ return -1;
+
fl_clear_masked_range(&skb_key, &head->mask);
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
@@ -136,7 +140,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
f = rhashtable_lookup_fast(&head->ht,
fl_key_get_start(&skb_mkey, &head->mask),
head->ht_params);
- if (f) {
+ if (f && !tc_skip_sw(f->flags)) {
*res = f->res;
return tcf_exts_exec(skb, &f->exts, res);
}
@@ -183,19 +187,20 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie)
dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
}
-static void fl_hw_replace_filter(struct tcf_proto *tp,
- struct flow_dissector *dissector,
- struct fl_flow_key *mask,
- struct fl_flow_key *key,
- struct tcf_exts *actions,
- unsigned long cookie, u32 flags)
+static int fl_hw_replace_filter(struct tcf_proto *tp,
+ struct flow_dissector *dissector,
+ struct fl_flow_key *mask,
+ struct fl_flow_key *key,
+ struct tcf_exts *actions,
+ unsigned long cookie, u32 flags)
{
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload offload = {0};
struct tc_to_netdev tc;
+ int err;
if (!tc_should_offload(dev, tp, flags))
- return;
+ return tc_skip_sw(flags) ? -EINVAL : 0;
offload.command = TC_CLSFLOWER_REPLACE;
offload.cookie = cookie;
@@ -207,7 +212,12 @@ static void fl_hw_replace_filter(struct tcf_proto *tp,
tc.type = TC_SETUP_CLSFLOWER;
tc.cls_flower = &offload;
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+ err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+
+ if (tc_skip_sw(flags))
+ return err;
+
+ return 0;
}
static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
@@ -524,7 +534,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
struct cls_fl_filter *fnew;
struct nlattr *tb[TCA_FLOWER_MAX + 1];
struct fl_flow_mask mask = {};
- u32 flags = 0;
int err;
if (!tca[TCA_OPTIONS])
@@ -552,8 +561,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
fnew->handle = handle;
- if (tb[TCA_FLOWER_FLAGS])
- flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+ if (tb[TCA_FLOWER_FLAGS]) {
+ fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+
+ if (!tc_flags_valid(fnew->flags)) {
+ err = -EINVAL;
+ goto errout;
+ }
+ }
err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
if (err)
@@ -563,19 +578,23 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto errout;
- err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
- head->ht_params);
+ if (!tc_skip_sw(fnew->flags)) {
+ err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+ head->ht_params);
+ if (err)
+ goto errout;
+ }
+
+ err = fl_hw_replace_filter(tp,
+ &head->dissector,
+ &mask.key,
+ &fnew->key,
+ &fnew->exts,
+ (unsigned long)fnew,
+ fnew->flags);
if (err)
goto errout;
- fl_hw_replace_filter(tp,
- &head->dissector,
- &mask.key,
- &fnew->key,
- &fnew->exts,
- (unsigned long)fnew,
- flags);
-
if (fold) {
rhashtable_remove_fast(&head->ht, &fold->ht_node,
head->ht_params);
@@ -734,6 +753,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
sizeof(key->tp.dst))))
goto nla_put_failure;
+ nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
+
if (tcf_exts_dump(skb, &f->exts))
goto nla_put_failure;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ddf047df5361..12ebde845523 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -95,8 +95,6 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
Expected action: do not backoff, but wait until queue will clear.
NET_XMIT_CN - probably this packet enqueued, but another one dropped.
Expected action: backoff or ignore
- NET_XMIT_POLICED - dropped by police.
- Expected action: backoff or error to real-time apps.
Auxiliary routines:
@@ -583,7 +581,6 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
timer);
rcu_read_lock();
- qdisc_unthrottled(wd->qdisc);
__netif_schedule(qdisc_root(wd->qdisc));
rcu_read_unlock();
@@ -598,15 +595,12 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
}
EXPORT_SYMBOL(qdisc_watchdog_init);
-void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle)
+void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
{
if (test_bit(__QDISC_STATE_DEACTIVATED,
&qdisc_root_sleeping(wd->qdisc)->state))
return;
- if (throttle)
- qdisc_throttled(wd->qdisc);
-
if (wd->last_expires == expires)
return;
@@ -620,7 +614,6 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
{
hrtimer_cancel(&wd->timer);
- qdisc_unthrottled(wd->qdisc);
}
EXPORT_SYMBOL(qdisc_watchdog_cancel);
@@ -982,7 +975,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
rcu_assign_pointer(sch->stab, stab);
}
if (tca[TCA_RATE]) {
- spinlock_t *root_lock;
+ seqcount_t *running;
err = -EOPNOTSUPP;
if (sch->flags & TCQ_F_MQROOT)
@@ -991,14 +984,15 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
if ((sch->parent != TC_H_ROOT) &&
!(sch->flags & TCQ_F_INGRESS) &&
(!p || !(p->flags & TCQ_F_MQROOT)))
- root_lock = qdisc_root_sleeping_lock(sch);
+ running = qdisc_root_sleeping_running(sch);
else
- root_lock = qdisc_lock(sch);
+ running = &sch->running;
err = gen_new_estimator(&sch->bstats,
sch->cpu_bstats,
&sch->rate_est,
- root_lock,
+ NULL,
+ running,
tca[TCA_RATE]);
if (err)
goto err_out4;
@@ -1061,7 +1055,8 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
gen_replace_estimator(&sch->bstats,
sch->cpu_bstats,
&sch->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
}
out:
@@ -1369,8 +1364,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
goto nla_put_failure;
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- qdisc_root_sleeping_lock(q), &d,
- TCA_PAD) < 0)
+ NULL, &d, TCA_PAD) < 0)
goto nla_put_failure;
if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
@@ -1381,7 +1375,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
cpu_qstats = q->cpu_qstats;
}
- if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
+ &d, cpu_bstats, &q->bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
goto nla_put_failure;
@@ -1684,8 +1679,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
goto nla_put_failure;
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- qdisc_root_sleeping_lock(q), &d,
- TCA_PAD) < 0)
+ NULL, &d, TCA_PAD) < 0)
goto nla_put_failure;
if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 1911af3ca7c0..481e4f12aeb4 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -357,16 +357,17 @@ static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,
/* --------------------------- Qdisc operations ---------------------------- */
-static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow;
struct tcf_result res;
int result;
- int ret = NET_XMIT_POLICED;
+ int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
- result = TC_POLICE_OK; /* be nice to gcc */
+ result = TC_ACT_OK; /* be nice to gcc */
flow = NULL;
if (TC_H_MAJ(skb->priority) != sch->handle ||
!(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
@@ -398,12 +399,12 @@ done:
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
goto drop;
- case TC_POLICE_RECLASSIFY:
+ case TC_ACT_RECLASSIFY:
if (flow->excess)
flow = flow->excess;
else
@@ -413,7 +414,7 @@ done:
#endif
}
- ret = qdisc_enqueue(skb, flow->q);
+ ret = qdisc_enqueue(skb, flow->q, to_free);
if (ret != NET_XMIT_SUCCESS) {
drop: __maybe_unused
if (net_xmit_drop_count(ret)) {
@@ -519,20 +520,6 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
return p->link.q->ops->peek(p->link.q);
}
-static unsigned int atm_tc_drop(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
- unsigned int len;
-
- pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list) {
- if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
- return len;
- }
- return 0;
-}
-
static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
@@ -637,7 +624,8 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
{
struct atm_flow_data *flow = (struct atm_flow_data *)arg;
- if (gnet_stats_copy_basic(d, NULL, &flow->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &flow->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
return -1;
@@ -671,7 +659,6 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
.enqueue = atm_tc_enqueue,
.dequeue = atm_tc_dequeue,
.peek = atm_tc_peek,
- .drop = atm_tc_drop,
.init = atm_tc_init,
.reset = atm_tc_reset,
.destroy = atm_tc_destroy,
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index 3fee70d9814f..c98a61e980ba 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -17,9 +17,10 @@
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
-static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
- qdisc_drop(skb, sch);
+ qdisc_drop(skb, sch, to_free);
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index baafddf229ce..beb554aa8cfb 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -80,10 +80,6 @@ struct cbq_class {
unsigned char priority; /* class priority */
unsigned char priority2; /* priority to be used after overlimit */
unsigned char ewma_log; /* time constant for idle time calculation */
- unsigned char ovl_strategy;
-#ifdef CONFIG_NET_CLS_ACT
- unsigned char police;
-#endif
u32 defmap;
@@ -94,10 +90,6 @@ struct cbq_class {
u32 avpkt;
struct qdisc_rate_table *R_tab;
- /* Overlimit strategy parameters */
- void (*overlimit)(struct cbq_class *cl);
- psched_tdiff_t penalty;
-
/* General scheduler (WRR) parameters */
long allot;
long quantum; /* Allotment per WRR round */
@@ -353,7 +345,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
{
int toplevel = q->toplevel;
- if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
+ if (toplevel > cl->level) {
psched_time_t now = psched_get_time();
do {
@@ -366,7 +358,8 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
}
static int
-cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct cbq_sched_data *q = qdisc_priv(sch);
int uninitialized_var(ret);
@@ -378,14 +371,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (cl == NULL) {
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return ret;
}
-#ifdef CONFIG_NET_CLS_ACT
- cl->q->__parent = sch;
-#endif
- ret = qdisc_enqueue(skb, cl->q);
+ ret = qdisc_enqueue(skb, cl->q, to_free);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
cbq_mark_toplevel(q, cl);
@@ -402,11 +392,8 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return ret;
}
-/* Overlimit actions */
-
-/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
-
-static void cbq_ovl_classic(struct cbq_class *cl)
+/* Overlimit action: penalize leaf class by adding offtime */
+static void cbq_overlimit(struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
psched_tdiff_t delay = cl->undertime - q->now;
@@ -456,99 +443,6 @@ static void cbq_ovl_classic(struct cbq_class *cl)
}
}
-/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
- * they go overlimit
- */
-
-static void cbq_ovl_rclassic(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- struct cbq_class *this = cl;
-
- do {
- if (cl->level > q->toplevel) {
- cl = NULL;
- break;
- }
- } while ((cl = cl->borrow) != NULL);
-
- if (cl == NULL)
- cl = this;
- cbq_ovl_classic(cl);
-}
-
-/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
-
-static void cbq_ovl_delay(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- psched_tdiff_t delay = cl->undertime - q->now;
-
- if (test_bit(__QDISC_STATE_DEACTIVATED,
- &qdisc_root_sleeping(cl->qdisc)->state))
- return;
-
- if (!cl->delayed) {
- psched_time_t sched = q->now;
- ktime_t expires;
-
- delay += cl->offtime;
- if (cl->avgidle < 0)
- delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
- if (cl->avgidle < cl->minidle)
- cl->avgidle = cl->minidle;
- cl->undertime = q->now + delay;
-
- if (delay > 0) {
- sched += delay + cl->penalty;
- cl->penalized = sched;
- cl->cpriority = TC_CBQ_MAXPRIO;
- q->pmask |= (1<<TC_CBQ_MAXPRIO);
-
- expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
- if (hrtimer_try_to_cancel(&q->delay_timer) &&
- ktime_to_ns(ktime_sub(
- hrtimer_get_expires(&q->delay_timer),
- expires)) > 0)
- hrtimer_set_expires(&q->delay_timer, expires);
- hrtimer_restart(&q->delay_timer);
- cl->delayed = 1;
- cl->xstats.overactions++;
- return;
- }
- delay = 1;
- }
- if (q->wd_expires == 0 || q->wd_expires > delay)
- q->wd_expires = delay;
-}
-
-/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
-
-static void cbq_ovl_lowprio(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-
- cl->penalized = q->now + cl->penalty;
-
- if (cl->cpriority != cl->priority2) {
- cl->cpriority = cl->priority2;
- q->pmask |= (1<<cl->cpriority);
- cl->xstats.overactions++;
- }
- cbq_ovl_classic(cl);
-}
-
-/* TC_CBQ_OVL_DROP: penalize class by dropping */
-
-static void cbq_ovl_drop(struct cbq_class *cl)
-{
- if (cl->q->ops->drop)
- if (cl->q->ops->drop(cl->q))
- cl->qdisc->q.qlen--;
- cl->xstats.overactions++;
- cbq_ovl_classic(cl);
-}
-
static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
psched_time_t now)
{
@@ -620,45 +514,10 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
}
- qdisc_unthrottled(sch);
__netif_schedule(qdisc_root(sch));
return HRTIMER_NORESTART;
}
-#ifdef CONFIG_NET_CLS_ACT
-static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
-{
- struct Qdisc *sch = child->__parent;
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = q->rx_class;
-
- q->rx_class = NULL;
-
- if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
- int ret;
-
- cbq_mark_toplevel(q, cl);
-
- q->rx_class = cl;
- cl->q->__parent = sch;
-
- ret = qdisc_enqueue(skb, cl->q);
- if (ret == NET_XMIT_SUCCESS) {
- sch->q.qlen++;
- if (!cl->next_alive)
- cbq_activate_class(cl);
- return 0;
- }
- if (net_xmit_drop_count(ret))
- qdisc_qstats_drop(sch);
- return 0;
- }
-
- qdisc_qstats_drop(sch);
- return -1;
-}
-#endif
-
/*
* It is mission critical procedure.
*
@@ -807,7 +666,7 @@ cbq_under_limit(struct cbq_class *cl)
cl = cl->borrow;
if (!cl) {
this_cl->qstats.overlimits++;
- this_cl->overlimit(this_cl);
+ cbq_overlimit(this_cl);
return NULL;
}
if (cl->level > q->toplevel)
@@ -960,7 +819,6 @@ cbq_dequeue(struct Qdisc *sch)
if (skb) {
qdisc_bstats_update(sch, skb);
sch->q.qlen--;
- qdisc_unthrottled(sch);
return skb;
}
@@ -1166,31 +1024,6 @@ static void cbq_link_class(struct cbq_class *this)
}
}
-static unsigned int cbq_drop(struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl, *cl_head;
- int prio;
- unsigned int len;
-
- for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
- cl_head = q->active[prio];
- if (!cl_head)
- continue;
-
- cl = cl_head;
- do {
- if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
- sch->q.qlen--;
- if (!cl->q->q.qlen)
- cbq_deactivate_class(cl);
- return len;
- }
- } while ((cl = cl->next_alive) != cl_head);
- }
- return 0;
-}
-
static void
cbq_reset(struct Qdisc *sch)
{
@@ -1280,50 +1113,6 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
return 0;
}
-static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
-{
- switch (ovl->strategy) {
- case TC_CBQ_OVL_CLASSIC:
- cl->overlimit = cbq_ovl_classic;
- break;
- case TC_CBQ_OVL_DELAY:
- cl->overlimit = cbq_ovl_delay;
- break;
- case TC_CBQ_OVL_LOWPRIO:
- if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
- ovl->priority2 - 1 <= cl->priority)
- return -EINVAL;
- cl->priority2 = ovl->priority2 - 1;
- cl->overlimit = cbq_ovl_lowprio;
- break;
- case TC_CBQ_OVL_DROP:
- cl->overlimit = cbq_ovl_drop;
- break;
- case TC_CBQ_OVL_RCLASSIC:
- cl->overlimit = cbq_ovl_rclassic;
- break;
- default:
- return -EINVAL;
- }
- cl->penalty = ovl->penalty;
- return 0;
-}
-
-#ifdef CONFIG_NET_CLS_ACT
-static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
-{
- cl->police = p->police;
-
- if (cl->q->handle) {
- if (p->police == TC_POLICE_RECLASSIFY)
- cl->q->reshape_fail = cbq_reshape_fail;
- else
- cl->q->reshape_fail = NULL;
- }
- return 0;
-}
-#endif
-
static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
{
cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
@@ -1375,8 +1164,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
q->link.priority = TC_CBQ_MAXPRIO - 1;
q->link.priority2 = TC_CBQ_MAXPRIO - 1;
q->link.cpriority = TC_CBQ_MAXPRIO - 1;
- q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
- q->link.overlimit = cbq_ovl_classic;
q->link.allot = psched_mtu(qdisc_dev(sch));
q->link.quantum = q->link.allot;
q->link.weight = q->link.R_tab->rate.rate;
@@ -1463,24 +1250,6 @@ nla_put_failure:
return -1;
}
-static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_ovl opt;
-
- opt.strategy = cl->ovl_strategy;
- opt.priority2 = cl->priority2 + 1;
- opt.pad = 0;
- opt.penalty = cl->penalty;
- if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
{
unsigned char *b = skb_tail_pointer(skb);
@@ -1500,36 +1269,11 @@ nla_put_failure:
return -1;
}
-#ifdef CONFIG_NET_CLS_ACT
-static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_police opt;
-
- if (cl->police) {
- opt.police = cl->police;
- opt.__res1 = 0;
- opt.__res2 = 0;
- if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
- goto nla_put_failure;
- }
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-#endif
-
static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
{
if (cbq_dump_lss(skb, cl) < 0 ||
cbq_dump_rate(skb, cl) < 0 ||
cbq_dump_wrr(skb, cl) < 0 ||
- cbq_dump_ovl(skb, cl) < 0 ||
-#ifdef CONFIG_NET_CLS_ACT
- cbq_dump_police(skb, cl) < 0 ||
-#endif
cbq_dump_fopt(skb, cl) < 0)
return -1;
return 0;
@@ -1600,7 +1344,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (cl->undertime != PSCHED_PASTPERFECT)
cl->xstats.undertime = cl->undertime - q->now;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
return -1;
@@ -1618,11 +1363,6 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
&pfifo_qdisc_ops, cl->common.classid);
if (new == NULL)
return -ENOBUFS;
- } else {
-#ifdef CONFIG_NET_CLS_ACT
- if (cl->police == TC_POLICE_RECLASSIFY)
- new->reshape_fail = cbq_reshape_fail;
-#endif
}
*old = qdisc_replace(sch, new, &cl->q);
@@ -1735,6 +1475,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (err < 0)
return err;
+ if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE])
+ return -EOPNOTSUPP;
+
if (cl) {
/* Check parent */
if (parentid) {
@@ -1755,7 +1498,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
qdisc_put_rtab(rtab);
@@ -1782,14 +1526,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
}
- if (tb[TCA_CBQ_OVL_STRATEGY])
- cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
-
-#ifdef CONFIG_NET_CLS_ACT
- if (tb[TCA_CBQ_POLICE])
- cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
-#endif
-
if (tb[TCA_CBQ_FOPT])
cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
@@ -1848,7 +1584,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
kfree(cl);
@@ -1884,13 +1621,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cl->maxidle = q->link.maxidle;
if (cl->avpkt == 0)
cl->avpkt = q->link.avpkt;
- cl->overlimit = cbq_ovl_classic;
- if (tb[TCA_CBQ_OVL_STRATEGY])
- cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
-#ifdef CONFIG_NET_CLS_ACT
- if (tb[TCA_CBQ_POLICE])
- cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
-#endif
if (tb[TCA_CBQ_FOPT])
cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
sch_tree_unlock(sch);
@@ -2035,7 +1765,6 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
.enqueue = cbq_enqueue,
.dequeue = cbq_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = cbq_drop,
.init = cbq_init,
.reset = cbq_reset,
.destroy = cbq_destroy,
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 0a08c860eee4..3b6d5bd69101 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -115,7 +115,8 @@ static void choke_zap_tail_holes(struct choke_sched_data *q)
}
/* Drop packet from queue array by creating a "hole" */
-static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
+static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
+ struct sk_buff **to_free)
{
struct choke_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb = q->tab[idx];
@@ -129,7 +130,7 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
qdisc_qstats_backlog_dec(sch, skb);
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
- qdisc_drop(skb, sch);
+ qdisc_drop(skb, sch, to_free);
--sch->q.qlen;
}
@@ -261,7 +262,8 @@ static bool choke_match_random(const struct choke_sched_data *q,
return choke_match_flow(oskb, nskb);
}
-static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
struct choke_sched_data *q = qdisc_priv(sch);
@@ -288,7 +290,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* Draw a packet at random from queue and compare flow */
if (choke_match_random(q, skb, &idx)) {
q->stats.matched++;
- choke_drop_by_idx(sch, idx);
+ choke_drop_by_idx(sch, idx, to_free);
goto congestion_drop;
}
@@ -331,16 +333,16 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
q->stats.pdrop++;
- return qdisc_drop(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
congestion_drop:
- qdisc_drop(skb, sch);
+ qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;
other_drop:
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return ret;
}
@@ -365,22 +367,6 @@ static struct sk_buff *choke_dequeue(struct Qdisc *sch)
return skb;
}
-static unsigned int choke_drop(struct Qdisc *sch)
-{
- struct choke_sched_data *q = qdisc_priv(sch);
- unsigned int len;
-
- len = qdisc_queue_drop(sch);
- if (len > 0)
- q->stats.other++;
- else {
- if (!red_is_idling(&q->vars))
- red_start_of_idle_period(&q->vars);
- }
-
- return len;
-}
-
static void choke_reset(struct Qdisc *sch)
{
struct choke_sched_data *q = qdisc_priv(sch);
@@ -391,11 +377,11 @@ static void choke_reset(struct Qdisc *sch)
q->head = (q->head + 1) & q->tab_mask;
if (!skb)
continue;
- qdisc_qstats_backlog_dec(sch, skb);
- --sch->q.qlen;
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
red_restart(&q->vars);
@@ -471,7 +457,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
--sch->q.qlen;
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
q->head = 0;
@@ -569,7 +555,6 @@ static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
.enqueue = choke_enqueue,
.dequeue = choke_dequeue,
.peek = choke_peek_head,
- .drop = choke_drop,
.init = choke_init,
.destroy = choke_destroy,
.reset = choke_reset,
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index dddf3bb65a32..4002df3c7d9f 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -82,7 +82,8 @@ static void drop_func(struct sk_buff *skb, void *ctx)
{
struct Qdisc *sch = ctx;
- qdisc_drop(skb, sch);
+ kfree_skb(skb);
+ qdisc_qstats_drop(sch);
}
static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
@@ -107,7 +108,8 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
return skb;
}
-static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct codel_sched_data *q;
@@ -117,7 +119,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
q = qdisc_priv(sch);
q->drop_overlimit++;
- return qdisc_drop(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
}
static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
@@ -174,7 +176,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index bf8af2c43c2c..8af5c59eef84 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -91,7 +91,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
@@ -119,7 +120,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
qdisc_destroy(cl->qdisc);
@@ -279,7 +281,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (qlen)
xstats.deficit = cl->deficit;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
return -1;
@@ -347,7 +350,8 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
return NULL;
}
-static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl;
@@ -357,11 +361,11 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (cl == NULL) {
if (err & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return err;
}
- err = qdisc_enqueue(skb, cl->qdisc);
+ err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
@@ -420,27 +424,6 @@ out:
return NULL;
}
-static unsigned int drr_drop(struct Qdisc *sch)
-{
- struct drr_sched *q = qdisc_priv(sch);
- struct drr_class *cl;
- unsigned int len;
-
- list_for_each_entry(cl, &q->active, alist) {
- if (cl->qdisc->ops->drop) {
- len = cl->qdisc->ops->drop(cl->qdisc);
- if (len > 0) {
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- if (cl->qdisc->q.qlen == 0)
- list_del(&cl->alist);
- return len;
- }
- }
- }
- return 0;
-}
-
static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
{
struct drr_sched *q = qdisc_priv(sch);
@@ -510,7 +493,6 @@ static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
.enqueue = drr_enqueue,
.dequeue = drr_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = drr_drop,
.init = drr_init_qdisc,
.reset = drr_reset_qdisc,
.destroy = drr_destroy_qdisc,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 34b4ddaca27c..1308bbf460f7 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -191,7 +191,8 @@ static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch,
/* --------------------------- Qdisc operations ---------------------------- */
-static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
int err;
@@ -234,7 +235,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#ifdef CONFIG_NET_CLS_ACT
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
@@ -251,7 +252,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}
- err = qdisc_enqueue(skb, p->q);
+ err = qdisc_enqueue(skb, p->q, to_free);
if (err != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(err))
qdisc_qstats_drop(sch);
@@ -264,7 +265,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
drop:
- qdisc_drop(skb, sch);
+ qdisc_drop(skb, sch, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
}
@@ -320,23 +321,6 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch)
return p->q->ops->peek(p->q);
}
-static unsigned int dsmark_drop(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- unsigned int len;
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
- if (p->q->ops->drop == NULL)
- return 0;
-
- len = p->q->ops->drop(p->q);
- if (len)
- sch->q.qlen--;
-
- return len;
-}
-
static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
@@ -489,7 +473,6 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
.enqueue = dsmark_enqueue,
.dequeue = dsmark_dequeue,
.peek = dsmark_peek,
- .drop = dsmark_drop,
.init = dsmark_init,
.reset = dsmark_reset,
.destroy = dsmark_destroy,
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 2e4bd2c0a50c..baeed6a78d28 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,23 +19,26 @@
/* 1 band FIFO pseudo-"scheduler" */
-static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
return qdisc_enqueue_tail(skb, sch);
- return qdisc_reshape_fail(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
}
-static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
if (likely(skb_queue_len(&sch->q) < sch->limit))
return qdisc_enqueue_tail(skb, sch);
- return qdisc_reshape_fail(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
}
-static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
unsigned int prev_backlog;
@@ -44,7 +47,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
prev_backlog = sch->qstats.backlog;
/* queue full, remove one skb to fulfill the limit */
- __qdisc_queue_drop_head(sch, &sch->q);
+ __qdisc_queue_drop_head(sch, &sch->q, to_free);
qdisc_qstats_drop(sch);
qdisc_enqueue_tail(skb, sch);
@@ -103,7 +106,6 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
.enqueue = pfifo_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
- .drop = qdisc_queue_drop,
.init = fifo_init,
.reset = qdisc_reset_queue,
.change = fifo_init,
@@ -118,7 +120,6 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
.enqueue = bfifo_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
- .drop = qdisc_queue_drop,
.init = fifo_init,
.reset = qdisc_reset_queue,
.change = fifo_init,
@@ -133,7 +134,6 @@ struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
.enqueue = pfifo_tail_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
- .drop = qdisc_queue_drop_head,
.init = fifo_init,
.reset = qdisc_reset_queue,
.change = fifo_init,
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 3c6a47d66a04..e5458b99e09c 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -368,18 +368,19 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
}
}
-static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct fq_sched_data *q = qdisc_priv(sch);
struct fq_flow *f;
if (unlikely(sch->q.qlen >= sch->limit))
- return qdisc_drop(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
f = fq_classify(skb, q);
if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
q->stat_flows_plimit++;
- return qdisc_drop(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
}
f->qlen++;
@@ -445,8 +446,7 @@ begin:
if (!head->first) {
if (q->time_next_delayed_flow != ~0ULL)
qdisc_watchdog_schedule_ns(&q->watchdog,
- q->time_next_delayed_flow,
- false);
+ q->time_next_delayed_flow);
return NULL;
}
}
@@ -515,17 +515,25 @@ out:
return skb;
}
+static void fq_flow_purge(struct fq_flow *flow)
+{
+ rtnl_kfree_skbs(flow->head, flow->tail);
+ flow->head = NULL;
+ flow->qlen = 0;
+}
+
static void fq_reset(struct Qdisc *sch)
{
struct fq_sched_data *q = qdisc_priv(sch);
struct rb_root *root;
- struct sk_buff *skb;
struct rb_node *p;
struct fq_flow *f;
unsigned int idx;
- while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
- kfree_skb(skb);
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
+
+ fq_flow_purge(&q->internal);
if (!q->fq_root)
return;
@@ -536,8 +544,7 @@ static void fq_reset(struct Qdisc *sch)
f = container_of(p, struct fq_flow, fq_node);
rb_erase(p, root);
- while ((skb = fq_dequeue_head(sch, f)) != NULL)
- kfree_skb(skb);
+ fq_flow_purge(f);
kmem_cache_free(fq_flow_cachep, f);
}
@@ -738,7 +745,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (!skb)
break;
drop_len += qdisc_pkt_len(skb);
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
drop_count++;
}
qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index da250b2e06ae..a5ea0e9b6be4 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -139,7 +139,8 @@ static inline void flow_queue_add(struct fq_codel_flow *flow,
skb->next = NULL;
}
-static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets)
+static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets,
+ struct sk_buff **to_free)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
@@ -171,8 +172,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets)
do {
skb = dequeue_head(flow);
len += qdisc_pkt_len(skb);
- mem += skb->truesize;
- kfree_skb(skb);
+ mem += get_codel_cb(skb)->mem_usage;
+ __qdisc_drop(skb, to_free);
} while (++i < max_packets && len < threshold);
flow->dropped += i;
@@ -184,16 +185,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets)
return idx;
}
-static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
-{
- unsigned int prev_backlog;
-
- prev_backlog = sch->qstats.backlog;
- fq_codel_drop(sch, 1U);
- return prev_backlog - sch->qstats.backlog;
-}
-
-static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
unsigned int idx, prev_backlog, prev_qlen;
@@ -206,7 +199,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (idx == 0) {
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return ret;
}
idx--;
@@ -223,7 +216,8 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
flow->deficit = q->quantum;
flow->dropped = 0;
}
- q->memory_usage += skb->truesize;
+ get_codel_cb(skb)->mem_usage = skb->truesize;
+ q->memory_usage += get_codel_cb(skb)->mem_usage;
memory_limited = q->memory_usage > q->memory_limit;
if (++sch->q.qlen <= sch->limit && !memory_limited)
return NET_XMIT_SUCCESS;
@@ -238,7 +232,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* So instead of dropping a single packet, drop half of its backlog
* with a 64 packets limit to not add a too big cpu spike here.
*/
- ret = fq_codel_drop(sch, q->drop_batch_size);
+ ret = fq_codel_drop(sch, q->drop_batch_size, to_free);
prev_qlen -= sch->q.qlen;
prev_backlog -= sch->qstats.backlog;
@@ -274,7 +268,7 @@ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
if (flow->head) {
skb = dequeue_head(flow);
q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb);
- q->memory_usage -= skb->truesize;
+ q->memory_usage -= get_codel_cb(skb)->mem_usage;
sch->q.qlen--;
sch->qstats.backlog -= qdisc_pkt_len(skb);
}
@@ -285,7 +279,8 @@ static void drop_func(struct sk_buff *skb, void *ctx)
{
struct Qdisc *sch = ctx;
- qdisc_drop(skb, sch);
+ kfree_skb(skb);
+ qdisc_qstats_drop(sch);
}
static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
@@ -345,6 +340,12 @@ begin:
return skb;
}
+static void fq_codel_flow_purge(struct fq_codel_flow *flow)
+{
+ rtnl_kfree_skbs(flow->head, flow->tail);
+ flow->head = NULL;
+}
+
static void fq_codel_reset(struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -355,18 +356,13 @@ static void fq_codel_reset(struct Qdisc *sch)
for (i = 0; i < q->flows_cnt; i++) {
struct fq_codel_flow *flow = q->flows + i;
- while (flow->head) {
- struct sk_buff *skb = dequeue_head(flow);
-
- qdisc_qstats_backlog_dec(sch, skb);
- kfree_skb(skb);
- }
-
+ fq_codel_flow_purge(flow);
INIT_LIST_HEAD(&flow->flowchain);
codel_vars_init(&flow->cvars);
}
memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
q->memory_usage = 0;
}
@@ -442,7 +438,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
struct sk_buff *skb = fq_codel_dequeue(sch);
q->cstats.drop_len += qdisc_pkt_len(skb);
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
q->cstats.drop_count++;
}
qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
@@ -578,11 +574,13 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.qdisc_stats.memory_usage = q->memory_usage;
st.qdisc_stats.drop_overmemory = q->drop_overmemory;
+ sch_tree_lock(sch);
list_for_each(pos, &q->new_flows)
st.qdisc_stats.new_flows_len++;
list_for_each(pos, &q->old_flows)
st.qdisc_stats.old_flows_len++;
+ sch_tree_unlock(sch);
return gnet_stats_copy_app(d, &st, sizeof(st));
}
@@ -636,7 +634,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
if (idx < q->flows_cnt) {
const struct fq_codel_flow *flow = &q->flows[idx];
- const struct sk_buff *skb = flow->head;
+ const struct sk_buff *skb;
memset(&xstats, 0, sizeof(xstats));
xstats.type = TCA_FQ_CODEL_XSTATS_CLASS;
@@ -654,9 +652,14 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
codel_time_to_us(delta) :
-codel_time_to_us(-delta);
}
- while (skb) {
- qs.qlen++;
- skb = skb->next;
+ if (flow->head) {
+ sch_tree_lock(sch);
+ skb = flow->head;
+ while (skb) {
+ qs.qlen++;
+ skb = skb->next;
+ }
+ sch_tree_unlock(sch);
}
qs.backlog = q->backlogs[idx];
qs.drops = flow->dropped;
@@ -709,7 +712,6 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
.enqueue = fq_codel_enqueue,
.dequeue = fq_codel_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = fq_codel_qdisc_drop,
.init = fq_codel_init,
.reset = fq_codel_reset,
.destroy = fq_codel_destroy,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f9e0e9c03d0a..e95b67cd5718 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -77,6 +77,34 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
skb->next = NULL;
}
+/* This variant of try_bulk_dequeue_skb() makes sure
+ * all skbs in the chain are for the same txq
+ */
+static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
+ struct sk_buff *skb,
+ int *packets)
+{
+ int mapping = skb_get_queue_mapping(skb);
+ struct sk_buff *nskb;
+ int cnt = 0;
+
+ do {
+ nskb = q->dequeue(q);
+ if (!nskb)
+ break;
+ if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
+ q->skb_bad_txq = nskb;
+ qdisc_qstats_backlog_inc(q, nskb);
+ q->q.qlen++;
+ break;
+ }
+ skb->next = nskb;
+ skb = nskb;
+ } while (++cnt < 8);
+ (*packets) += cnt;
+ skb->next = NULL;
+}
+
/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
* A requeued skb (via q->gso_skb) can also be a SKB list.
*/
@@ -87,8 +115,9 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
const struct netdev_queue *txq = q->dev_queue;
*packets = 1;
- *validate = true;
if (unlikely(skb)) {
+ /* skb in gso_skb were already validated */
+ *validate = false;
/* check the reason of requeuing without tx lock first */
txq = skb_get_tx_queue(txq->dev, skb);
if (!netif_xmit_frozen_or_stopped(txq)) {
@@ -97,22 +126,37 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
q->q.qlen--;
} else
skb = NULL;
- /* skb in gso_skb were already validated */
- *validate = false;
- } else {
- if (!(q->flags & TCQ_F_ONETXQUEUE) ||
- !netif_xmit_frozen_or_stopped(txq)) {
- skb = q->dequeue(q);
- if (skb && qdisc_may_bulk(q))
- try_bulk_dequeue_skb(q, skb, txq, packets);
+ return skb;
+ }
+ *validate = true;
+ skb = q->skb_bad_txq;
+ if (unlikely(skb)) {
+ /* check the reason of requeuing without tx lock first */
+ txq = skb_get_tx_queue(txq->dev, skb);
+ if (!netif_xmit_frozen_or_stopped(txq)) {
+ q->skb_bad_txq = NULL;
+ qdisc_qstats_backlog_dec(q, skb);
+ q->q.qlen--;
+ goto bulk;
}
+ return NULL;
+ }
+ if (!(q->flags & TCQ_F_ONETXQUEUE) ||
+ !netif_xmit_frozen_or_stopped(txq))
+ skb = q->dequeue(q);
+ if (skb) {
+bulk:
+ if (qdisc_may_bulk(q))
+ try_bulk_dequeue_skb(q, skb, txq, packets);
+ else
+ try_bulk_dequeue_skb_slow(q, skb, packets);
}
return skb;
}
/*
* Transmit possibly several skbs, and handle the return status as
- * required. Holding the __QDISC___STATE_RUNNING bit guarantees that
+ * required. Owning running seqcount bit guarantees that
* only one CPU can execute this function.
*
* Returns to the caller:
@@ -165,7 +209,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
/*
* NOTE: Called under qdisc_lock(q) with locally disabled BH.
*
- * __QDISC___STATE_RUNNING guarantees only one CPU can process
+ * running seqcount guarantees only one CPU can process
* this qdisc at a time. qdisc_lock(q) serializes queue accesses for
* this queue.
*
@@ -348,9 +392,10 @@ EXPORT_SYMBOL(netif_carrier_off);
cheaper.
*/
-static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
+static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
+ struct sk_buff **to_free)
{
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return NET_XMIT_CN;
}
@@ -381,6 +426,7 @@ struct Qdisc noop_qdisc = {
.list = LIST_HEAD_INIT(noop_qdisc.list),
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
+ .running = SEQCNT_ZERO(noop_qdisc.running),
.busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
};
EXPORT_SYMBOL(noop_qdisc);
@@ -438,7 +484,8 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
return priv->q + band;
}
-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
+ struct sk_buff **to_free)
{
if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
int band = prio2band[skb->priority & TC_PRIO_MAX];
@@ -450,7 +497,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
return __qdisc_enqueue_tail(skb, qdisc, list);
}
- return qdisc_drop(skb, qdisc);
+ return qdisc_drop(skb, qdisc, to_free);
}
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
@@ -492,7 +539,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- __qdisc_reset_queue(qdisc, band2list(priv, prio));
+ __qdisc_reset_queue(band2list(priv, prio));
priv->bitmap = 0;
qdisc->qstats.backlog = 0;
@@ -539,6 +586,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
EXPORT_SYMBOL(pfifo_fast_ops);
static struct lock_class_key qdisc_tx_busylock;
+static struct lock_class_key qdisc_running_key;
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
const struct Qdisc_ops *ops)
@@ -572,6 +620,10 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
lockdep_set_class(&sch->busylock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+ seqcount_init(&sch->running);
+ lockdep_set_class(&sch->running,
+ dev->qdisc_running_key ?: &qdisc_running_key);
+
sch->ops = ops;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
@@ -616,11 +668,14 @@ void qdisc_reset(struct Qdisc *qdisc)
if (ops->reset)
ops->reset(qdisc);
+ kfree_skb(qdisc->skb_bad_txq);
+ qdisc->skb_bad_txq = NULL;
+
if (qdisc->gso_skb) {
kfree_skb_list(qdisc->gso_skb);
qdisc->gso_skb = NULL;
- qdisc->q.qlen = 0;
}
+ qdisc->q.qlen = 0;
}
EXPORT_SYMBOL(qdisc_reset);
@@ -659,6 +714,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
dev_put(qdisc_dev(qdisc));
kfree_skb_list(qdisc->gso_skb);
+ kfree_skb(qdisc->skb_bad_txq);
/*
* gen_estimator est_timer() might access qdisc->q.lock,
* wait a RCU grace period before freeing qdisc.
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 80105109f756..c78a093c551a 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -149,7 +149,8 @@ static inline int gred_use_harddrop(struct gred_sched *t)
return t->red_flags & TC_RED_HARDDROP;
}
-static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct gred_sched_data *q = NULL;
struct gred_sched *t = qdisc_priv(sch);
@@ -237,10 +238,10 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
q->stats.pdrop++;
drop:
- return qdisc_drop(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
congestion_drop:
- qdisc_drop(skb, sch);
+ qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;
}
@@ -276,40 +277,6 @@ static struct sk_buff *gred_dequeue(struct Qdisc *sch)
return NULL;
}
-static unsigned int gred_drop(struct Qdisc *sch)
-{
- struct sk_buff *skb;
- struct gred_sched *t = qdisc_priv(sch);
-
- skb = qdisc_dequeue_tail(sch);
- if (skb) {
- unsigned int len = qdisc_pkt_len(skb);
- struct gred_sched_data *q;
- u16 dp = tc_index_to_dp(skb);
-
- if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
- net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x while dropping, screwing up backlog\n",
- tc_index_to_dp(skb));
- } else {
- q->backlog -= len;
- q->stats.other++;
-
- if (gred_wred_mode(t)) {
- if (!sch->qstats.backlog)
- red_start_of_idle_period(&t->wred_set);
- } else {
- if (!q->backlog)
- red_start_of_idle_period(&q->vars);
- }
- }
-
- qdisc_drop(skb, sch);
- return len;
- }
-
- return 0;
-}
-
static void gred_reset(struct Qdisc *sch)
{
int i;
@@ -623,7 +590,6 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
.enqueue = gred_enqueue,
.dequeue = gred_dequeue,
.peek = qdisc_peek_head,
- .drop = gred_drop,
.init = gred_init,
.reset = gred_reset,
.destroy = gred_destroy,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 1ac9f9f03fe3..8cb5eff7b79c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1015,11 +1015,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cur_time = psched_get_time();
if (tca[TCA_RATE]) {
- spinlock_t *lock = qdisc_root_sleeping_lock(sch);
-
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- lock,
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
@@ -1068,7 +1067,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
kfree(cl);
@@ -1373,7 +1373,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
xstats.work = cl->cl_total;
xstats.rtwork = cl->cl_cumul;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
return -1;
@@ -1572,7 +1572,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
}
static int
-hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
{
struct hfsc_class *cl;
int uninitialized_var(err);
@@ -1581,11 +1581,11 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (cl == NULL) {
if (err & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return err;
}
- err = qdisc_enqueue(skb, cl->qdisc);
+ err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
@@ -1664,7 +1664,6 @@ hfsc_dequeue(struct Qdisc *sch)
set_passive(cl);
}
- qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
@@ -1672,32 +1671,6 @@ hfsc_dequeue(struct Qdisc *sch)
return skb;
}
-static unsigned int
-hfsc_drop(struct Qdisc *sch)
-{
- struct hfsc_sched *q = qdisc_priv(sch);
- struct hfsc_class *cl;
- unsigned int len;
-
- list_for_each_entry(cl, &q->droplist, dlist) {
- if (cl->qdisc->ops->drop != NULL &&
- (len = cl->qdisc->ops->drop(cl->qdisc)) > 0) {
- if (cl->qdisc->q.qlen == 0) {
- update_vf(cl, 0, 0);
- set_passive(cl);
- } else {
- list_move_tail(&cl->dlist, &q->droplist);
- }
- cl->qstats.drops++;
- qdisc_qstats_drop(sch);
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- return len;
- }
- }
- return 0;
-}
-
static const struct Qdisc_class_ops hfsc_class_ops = {
.change = hfsc_change_class,
.delete = hfsc_delete_class,
@@ -1724,7 +1697,6 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
.enqueue = hfsc_enqueue,
.dequeue = hfsc_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = hfsc_drop,
.cl_ops = &hfsc_class_ops,
.priv_size = sizeof(struct hfsc_sched),
.owner = THIS_MODULE
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 13d6f83ec491..e3d0458af17b 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -345,7 +345,7 @@ static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb)
skb->next = NULL;
}
-static unsigned int hhf_drop(struct Qdisc *sch)
+static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free)
{
struct hhf_sched_data *q = qdisc_priv(sch);
struct wdrr_bucket *bucket;
@@ -359,25 +359,16 @@ static unsigned int hhf_drop(struct Qdisc *sch)
struct sk_buff *skb = dequeue_head(bucket);
sch->q.qlen--;
- qdisc_qstats_drop(sch);
qdisc_qstats_backlog_dec(sch, skb);
- kfree_skb(skb);
+ qdisc_drop(skb, sch, to_free);
}
/* Return id of the bucket from which the packet was dropped. */
return bucket - q->buckets;
}
-static unsigned int hhf_qdisc_drop(struct Qdisc *sch)
-{
- unsigned int prev_backlog;
-
- prev_backlog = sch->qstats.backlog;
- hhf_drop(sch);
- return prev_backlog - sch->qstats.backlog;
-}
-
-static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct hhf_sched_data *q = qdisc_priv(sch);
enum wdrr_bucket_idx idx;
@@ -415,7 +406,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* Return Congestion Notification only if we dropped a packet from this
* bucket.
*/
- if (hhf_drop(sch) == idx)
+ if (hhf_drop(sch, to_free) == idx)
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this. */
@@ -473,7 +464,7 @@ static void hhf_reset(struct Qdisc *sch)
struct sk_buff *skb;
while ((skb = hhf_dequeue(sch)) != NULL)
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
static void *hhf_zalloc(size_t sz)
@@ -583,7 +574,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = hhf_dequeue(sch);
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
prev_backlog - sch->qstats.backlog);
@@ -709,7 +700,6 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
.enqueue = hhf_enqueue,
.dequeue = hhf_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = hhf_qdisc_drop,
.init = hhf_init,
.reset = hhf_reset,
.destroy = hhf_destroy,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 62f9d8100c6e..91982d9784b3 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -117,7 +117,6 @@ struct htb_class {
* Written often fields
*/
struct gnet_stats_basic_packed bstats;
- struct gnet_stats_queue qstats;
struct tc_htb_xstats xstats; /* our special stats */
/* token bucket parameters */
@@ -140,6 +139,8 @@ struct htb_class {
enum htb_cmode cmode; /* current mode of the class */
struct rb_node pq_node; /* node for event queue */
struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
+
+ unsigned int drops ____cacheline_aligned_in_smp;
};
struct htb_level {
@@ -569,7 +570,8 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
list_del_init(&cl->un.leaf.drop_list);
}
-static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
int uninitialized_var(ret);
struct htb_sched *q = qdisc_priv(sch);
@@ -581,19 +583,20 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
__skb_queue_tail(&q->direct_queue, skb);
q->direct_pkts++;
} else {
- return qdisc_drop(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
}
#ifdef CONFIG_NET_CLS_ACT
} else if (!cl) {
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return ret;
#endif
- } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
+ } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q,
+ to_free)) != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret)) {
qdisc_qstats_drop(sch);
- cl->qstats.drops++;
+ cl->drops++;
}
return ret;
} else {
@@ -889,7 +892,6 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
if (skb != NULL) {
ok:
qdisc_bstats_update(sch, skb);
- qdisc_unthrottled(sch);
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
return skb;
@@ -929,38 +931,13 @@ ok:
}
qdisc_qstats_overlimit(sch);
if (likely(next_event > q->now))
- qdisc_watchdog_schedule_ns(&q->watchdog, next_event, true);
+ qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
else
schedule_work(&q->work);
fin:
return skb;
}
-/* try to drop from each class (by prio) until one succeed */
-static unsigned int htb_drop(struct Qdisc *sch)
-{
- struct htb_sched *q = qdisc_priv(sch);
- int prio;
-
- for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
- struct list_head *p;
- list_for_each(p, q->drops + prio) {
- struct htb_class *cl = list_entry(p, struct htb_class,
- un.leaf.drop_list);
- unsigned int len;
- if (cl->un.leaf.q->ops->drop &&
- (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- if (!cl->un.leaf.q->q.qlen)
- htb_deactivate(q, cl);
- return len;
- }
- }
- }
- return 0;
-}
-
/* reset all classes */
/* always caled under BH & queue lock */
static void htb_reset(struct Qdisc *sch)
@@ -983,7 +960,7 @@ static void htb_reset(struct Qdisc *sch)
}
}
qdisc_watchdog_cancel(&q->watchdog);
- __skb_queue_purge(&q->direct_queue);
+ __qdisc_reset_queue(&q->direct_queue);
sch->q.qlen = 0;
sch->qstats.backlog = 0;
memset(q->hlevel, 0, sizeof(q->hlevel));
@@ -1136,16 +1113,22 @@ static int
htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
{
struct htb_class *cl = (struct htb_class *)arg;
+ struct gnet_stats_queue qs = {
+ .drops = cl->drops,
+ };
__u32 qlen = 0;
- if (!cl->level && cl->un.leaf.q)
+ if (!cl->level && cl->un.leaf.q) {
qlen = cl->un.leaf.q->q.qlen;
+ qs.backlog = cl->un.leaf.q->qstats.backlog;
+ }
cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
+ gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
@@ -1258,7 +1241,7 @@ static void htb_destroy(struct Qdisc *sch)
htb_destroy_class(sch, cl);
}
qdisc_class_hash_destroy(&q->clhash);
- __skb_queue_purge(&q->direct_queue);
+ __qdisc_reset_queue(&q->direct_queue);
}
static int htb_delete(struct Qdisc *sch, unsigned long arg)
@@ -1397,7 +1380,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (htb_rate_est || tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE] ? : &est.nla);
if (err) {
kfree(cl);
@@ -1459,11 +1443,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
parent->children++;
} else {
if (tca[TCA_RATE]) {
- spinlock_t *lock = qdisc_root_sleeping_lock(sch);
-
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- lock,
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
@@ -1601,7 +1584,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
.enqueue = htb_enqueue,
.dequeue = htb_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = htb_drop,
.init = htb_init,
.reset = htb_reset,
.destroy = htb_destroy,
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 56a77b878eb3..b9439827c172 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -199,7 +199,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
return -1;
return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index b8002ce3d010..549c66359924 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -342,7 +342,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
* hold here is the look on dev_queue->qdisc_sleeping
* also acquired below.
*/
- spin_unlock_bh(d->lock);
+ if (d->lock)
+ spin_unlock_bh(d->lock);
for (i = tc.offset; i < tc.offset + tc.count; i++) {
struct netdev_queue *q = netdev_get_tx_queue(dev, i);
@@ -359,15 +360,17 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
spin_unlock_bh(qdisc_lock(qdisc));
}
/* Reclaim root sleeping lock before completing stats */
- spin_lock_bh(d->lock);
- if (gnet_stats_copy_basic(d, NULL, &bstats) < 0 ||
+ if (d->lock)
+ spin_lock_bh(d->lock);
+ if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
return -1;
} else {
struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &sch->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL,
&sch->qstats, sch->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index bcdd54bb101c..9ffbb025b37e 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -65,7 +65,8 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
}
static int
-multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct Qdisc *qdisc;
int ret;
@@ -76,12 +77,12 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return ret;
}
#endif
- ret = qdisc_enqueue(skb, qdisc);
+ ret = qdisc_enqueue(skb, qdisc, to_free);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
return NET_XMIT_SUCCESS;
@@ -151,27 +152,6 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
}
-static unsigned int multiq_drop(struct Qdisc *sch)
-{
- struct multiq_sched_data *q = qdisc_priv(sch);
- int band;
- unsigned int len;
- struct Qdisc *qdisc;
-
- for (band = q->bands - 1; band >= 0; band--) {
- qdisc = q->queues[band];
- if (qdisc->ops->drop) {
- len = qdisc->ops->drop(qdisc);
- if (len != 0) {
- sch->q.qlen--;
- return len;
- }
- }
- }
- return 0;
-}
-
-
static void
multiq_reset(struct Qdisc *sch)
{
@@ -356,7 +336,8 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl_q->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
return -1;
@@ -415,7 +396,6 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
.enqueue = multiq_enqueue,
.dequeue = multiq_dequeue,
.peek = multiq_peek,
- .drop = multiq_drop,
.init = multiq_init,
.reset = multiq_reset,
.destroy = multiq_destroy,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 178f1630a036..aaaf02175338 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -368,9 +368,7 @@ static void tfifo_reset(struct Qdisc *sch)
struct sk_buff *skb = netem_rb_to_skb(p);
rb_erase(p, &q->t_root);
- skb->next = NULL;
- skb->prev = NULL;
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
}
@@ -399,7 +397,8 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
* when we statistically choose to corrupt one, we instead segment it, returning
* the first packet to be corrupted, and re-enqueue the remaining frames
*/
-static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
+static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct sk_buff *segs;
netdev_features_t features = netif_skb_features(skb);
@@ -407,7 +406,7 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR_OR_NULL(segs)) {
- qdisc_reshape_fail(skb, sch);
+ qdisc_drop(skb, sch, to_free);
return NULL;
}
consume_skb(skb);
@@ -420,7 +419,8 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
* NET_XMIT_DROP: queue length didn't change.
* NET_XMIT_SUCCESS: one skb was queued.
*/
-static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct netem_sched_data *q = qdisc_priv(sch);
/* We don't fill cb now as skb_unshare() may invalidate it */
@@ -445,7 +445,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
if (count == 0) {
qdisc_qstats_drop(sch);
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
}
@@ -465,7 +465,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
q->duplicate = 0;
- rootq->enqueue(skb2, rootq);
+ rootq->enqueue(skb2, rootq, to_free);
q->duplicate = dupsave;
}
@@ -477,7 +477,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
*/
if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
if (skb_is_gso(skb)) {
- segs = netem_segment(skb, sch);
+ segs = netem_segment(skb, sch, to_free);
if (!segs)
return NET_XMIT_DROP;
} else {
@@ -487,10 +487,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skb = segs;
segs = segs->next;
- if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
- (skb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_help(skb))) {
- rc = qdisc_drop(skb, sch);
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (unlikely(!skb)) {
+ qdisc_qstats_drop(sch);
+ goto finish_segs;
+ }
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb)) {
+ qdisc_drop(skb, sch, to_free);
goto finish_segs;
}
@@ -499,7 +503,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
- return qdisc_reshape_fail(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
qdisc_qstats_backlog_inc(sch, skb);
@@ -559,7 +563,7 @@ finish_segs:
segs->next = NULL;
qdisc_skb_cb(segs)->pkt_len = segs->len;
last_len = segs->len;
- rc = qdisc_enqueue(segs, sch);
+ rc = qdisc_enqueue(segs, sch, to_free);
if (rc != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(rc))
qdisc_qstats_drop(sch);
@@ -576,50 +580,17 @@ finish_segs:
return NET_XMIT_SUCCESS;
}
-static unsigned int netem_drop(struct Qdisc *sch)
-{
- struct netem_sched_data *q = qdisc_priv(sch);
- unsigned int len;
-
- len = qdisc_queue_drop(sch);
-
- if (!len) {
- struct rb_node *p = rb_first(&q->t_root);
-
- if (p) {
- struct sk_buff *skb = netem_rb_to_skb(p);
-
- rb_erase(p, &q->t_root);
- sch->q.qlen--;
- skb->next = NULL;
- skb->prev = NULL;
- qdisc_qstats_backlog_dec(sch, skb);
- kfree_skb(skb);
- }
- }
- if (!len && q->qdisc && q->qdisc->ops->drop)
- len = q->qdisc->ops->drop(q->qdisc);
- if (len)
- qdisc_qstats_drop(sch);
-
- return len;
-}
-
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
struct rb_node *p;
- if (qdisc_is_throttled(sch))
- return NULL;
-
tfifo_dequeue:
skb = __skb_dequeue(&sch->q);
if (skb) {
qdisc_qstats_backlog_dec(sch, skb);
deliver:
- qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
return skb;
}
@@ -651,8 +622,11 @@ deliver:
if (q->qdisc) {
unsigned int pkt_len = qdisc_pkt_len(skb);
- int err = qdisc_enqueue(skb, q->qdisc);
+ struct sk_buff *to_free = NULL;
+ int err;
+ err = qdisc_enqueue(skb, q->qdisc, &to_free);
+ kfree_skb_list(to_free);
if (err != NET_XMIT_SUCCESS &&
net_xmit_drop_count(err)) {
qdisc_qstats_drop(sch);
@@ -1143,7 +1117,6 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
.enqueue = netem_enqueue,
.dequeue = netem_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = netem_drop,
.init = netem_init,
.reset = netem_reset,
.destroy = netem_destroy,
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 71ae3b9629f9..a570b0bb254c 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -134,7 +134,8 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
return false;
}
-static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct pie_sched_data *q = qdisc_priv(sch);
bool enqueue = false;
@@ -166,7 +167,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
out:
q->stats.dropped++;
- return qdisc_drop(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
}
static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
@@ -234,7 +235,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
- qdisc_drop(skb, sch);
+ rtnl_qdisc_drop(skb, sch);
}
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 5abfe44678d4..1c6cbab3e7b9 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -64,6 +64,8 @@ struct plug_sched_data {
*/
bool unplug_indefinite;
+ bool throttled;
+
/* Queue Limit in bytes */
u32 limit;
@@ -86,7 +88,8 @@ struct plug_sched_data {
u32 pkts_to_release;
};
-static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct plug_sched_data *q = qdisc_priv(sch);
@@ -96,14 +99,14 @@ static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return qdisc_enqueue_tail(skb, sch);
}
- return qdisc_reshape_fail(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
}
static struct sk_buff *plug_dequeue(struct Qdisc *sch)
{
struct plug_sched_data *q = qdisc_priv(sch);
- if (qdisc_is_throttled(sch))
+ if (q->throttled)
return NULL;
if (!q->unplug_indefinite) {
@@ -111,7 +114,7 @@ static struct sk_buff *plug_dequeue(struct Qdisc *sch)
/* No more packets to dequeue. Block the queue
* and wait for the next release command.
*/
- qdisc_throttled(sch);
+ q->throttled = true;
return NULL;
}
q->pkts_to_release--;
@@ -141,7 +144,7 @@ static int plug_init(struct Qdisc *sch, struct nlattr *opt)
q->limit = ctl->limit;
}
- qdisc_throttled(sch);
+ q->throttled = true;
return 0;
}
@@ -173,7 +176,7 @@ static int plug_change(struct Qdisc *sch, struct nlattr *opt)
q->pkts_last_epoch = q->pkts_current_epoch;
q->pkts_current_epoch = 0;
if (q->unplug_indefinite)
- qdisc_throttled(sch);
+ q->throttled = true;
q->unplug_indefinite = false;
break;
case TCQ_PLUG_RELEASE_ONE:
@@ -182,7 +185,7 @@ static int plug_change(struct Qdisc *sch, struct nlattr *opt)
*/
q->pkts_to_release += q->pkts_last_epoch;
q->pkts_last_epoch = 0;
- qdisc_unthrottled(sch);
+ q->throttled = false;
netif_schedule_queue(sch->dev_queue);
break;
case TCQ_PLUG_RELEASE_INDEFINITE:
@@ -190,7 +193,7 @@ static int plug_change(struct Qdisc *sch, struct nlattr *opt)
q->pkts_to_release = 0;
q->pkts_last_epoch = 0;
q->pkts_current_epoch = 0;
- qdisc_unthrottled(sch);
+ q->throttled = false;
netif_schedule_queue(sch->dev_queue);
break;
case TCQ_PLUG_LIMIT:
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index a356450b747b..8f575899adfa 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -67,7 +67,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
}
static int
-prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
{
struct Qdisc *qdisc;
int ret;
@@ -83,7 +83,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
#endif
- ret = qdisc_enqueue(skb, qdisc);
+ ret = qdisc_enqueue(skb, qdisc, to_free);
if (ret == NET_XMIT_SUCCESS) {
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
@@ -127,25 +127,6 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch)
}
-static unsigned int prio_drop(struct Qdisc *sch)
-{
- struct prio_sched_data *q = qdisc_priv(sch);
- int prio;
- unsigned int len;
- struct Qdisc *qdisc;
-
- for (prio = q->bands-1; prio >= 0; prio--) {
- qdisc = q->queues[prio];
- if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) {
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- return len;
- }
- }
- return 0;
-}
-
-
static void
prio_reset(struct Qdisc *sch)
{
@@ -304,7 +285,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl_q->bstats) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
return -1;
@@ -363,7 +345,6 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
.enqueue = prio_enqueue,
.dequeue = prio_dequeue,
.peek = prio_peek,
- .drop = prio_drop,
.init = prio_init,
.reset = prio_reset,
.destroy = prio_destroy,
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f18857febdad..f27ffee106f6 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -460,7 +460,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
return err;
@@ -486,7 +487,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ NULL,
+ qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err)
goto destroy_class;
@@ -663,7 +665,8 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
xstats.weight = cl->agg->class_weight;
xstats.lmax = cl->agg->lmax;
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL,
&cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
@@ -1214,7 +1217,8 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
return agg;
}
-static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl;
@@ -1237,11 +1241,11 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
qdisc_pkt_len(skb));
if (err) {
cl->qstats.drops++;
- return qdisc_drop(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
}
}
- err = qdisc_enqueue(skb, cl->qdisc);
+ err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
pr_debug("qfq_enqueue: enqueue failed %d\n", err);
if (net_xmit_drop_count(err)) {
@@ -1422,52 +1426,6 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
qfq_deactivate_class(q, cl);
}
-static unsigned int qfq_drop_from_slot(struct qfq_sched *q,
- struct hlist_head *slot)
-{
- struct qfq_aggregate *agg;
- struct qfq_class *cl;
- unsigned int len;
-
- hlist_for_each_entry(agg, slot, next) {
- list_for_each_entry(cl, &agg->active, alist) {
-
- if (!cl->qdisc->ops->drop)
- continue;
-
- len = cl->qdisc->ops->drop(cl->qdisc);
- if (len > 0) {
- if (cl->qdisc->q.qlen == 0)
- qfq_deactivate_class(q, cl);
-
- return len;
- }
- }
- }
- return 0;
-}
-
-static unsigned int qfq_drop(struct Qdisc *sch)
-{
- struct qfq_sched *q = qdisc_priv(sch);
- struct qfq_group *grp;
- unsigned int i, j, len;
-
- for (i = 0; i <= QFQ_MAX_INDEX; i++) {
- grp = &q->groups[i];
- for (j = 0; j < QFQ_MAX_SLOTS; j++) {
- len = qfq_drop_from_slot(q, &grp->slots[j]);
- if (len > 0) {
- sch->q.qlen--;
- return len;
- }
- }
-
- }
-
- return 0;
-}
-
static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
{
struct qfq_sched *q = qdisc_priv(sch);
@@ -1562,7 +1520,6 @@ static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
.enqueue = qfq_enqueue,
.dequeue = qfq_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = qfq_drop,
.init = qfq_init_qdisc,
.reset = qfq_reset_qdisc,
.destroy = qfq_destroy_qdisc,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 91578bdd378c..249b2a18acbd 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -56,7 +56,8 @@ static inline int red_use_harddrop(struct red_sched_data *q)
return q->flags & TC_RED_HARDDROP;
}
-static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct red_sched_data *q = qdisc_priv(sch);
struct Qdisc *child = q->qdisc;
@@ -95,7 +96,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
break;
}
- ret = qdisc_enqueue(skb, child);
+ ret = qdisc_enqueue(skb, child, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
@@ -106,7 +107,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return ret;
congestion_drop:
- qdisc_drop(skb, sch);
+ qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;
}
@@ -136,26 +137,6 @@ static struct sk_buff *red_peek(struct Qdisc *sch)
return child->ops->peek(child);
}
-static unsigned int red_drop(struct Qdisc *sch)
-{
- struct red_sched_data *q = qdisc_priv(sch);
- struct Qdisc *child = q->qdisc;
- unsigned int len;
-
- if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
- q->stats.other++;
- qdisc_qstats_drop(sch);
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- return len;
- }
-
- if (!red_is_idling(&q->vars))
- red_start_of_idle_period(&q->vars);
-
- return 0;
-}
-
static void red_reset(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
@@ -365,7 +346,6 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
.enqueue = red_enqueue,
.dequeue = red_dequeue,
.peek = red_peek,
- .drop = red_drop,
.init = red_init,
.reset = red_reset,
.destroy = red_destroy,
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index c69611640fa5..add3cc7d37ec 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -275,7 +275,8 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
return false;
}
-static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct sfb_sched_data *q = qdisc_priv(sch);
@@ -397,7 +398,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
enqueue:
- ret = qdisc_enqueue(skb, child);
+ ret = qdisc_enqueue(skb, child, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
sch->q.qlen++;
increment_qlen(skb, q);
@@ -408,7 +409,7 @@ enqueue:
return ret;
drop:
- qdisc_drop(skb, sch);
+ qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;
other_drop:
if (ret & __NET_XMIT_BYPASS)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 498f0a2cb47f..7f195ed4d568 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -343,7 +343,7 @@ static int sfq_headdrop(const struct sfq_sched_data *q)
}
static int
-sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
{
struct sfq_sched_data *q = qdisc_priv(sch);
unsigned int hash, dropped;
@@ -367,7 +367,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (x == SFQ_EMPTY_SLOT) {
x = q->dep[0].next; /* get a free slot */
if (x >= SFQ_MAX_FLOWS)
- return qdisc_drop(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
q->ht[hash] = x;
slot = &q->slots[x];
slot->hash = hash;
@@ -424,14 +424,14 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (slot->qlen >= q->maxdepth) {
congestion_drop:
if (!sfq_headdrop(q))
- return qdisc_drop(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
/* We know we have at least one packet in queue */
head = slot_dequeue_head(slot);
delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
sch->qstats.backlog -= delta;
slot->backlog -= delta;
- qdisc_drop(head, sch);
+ qdisc_drop(head, sch, to_free);
slot_queue_add(slot, skb);
return NET_XMIT_CN;
@@ -520,7 +520,7 @@ sfq_reset(struct Qdisc *sch)
struct sk_buff *skb;
while ((skb = sfq_dequeue(sch)) != NULL)
- kfree_skb(skb);
+ rtnl_kfree_skbs(skb, skb);
}
/*
@@ -896,7 +896,6 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
.enqueue = sfq_enqueue,
.dequeue = sfq_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = sfq_drop,
.init = sfq_init,
.reset = sfq_reset,
.destroy = sfq_destroy,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 3161e491990b..303355c449ab 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -155,7 +155,8 @@ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
/* GSO packet is too big, segment it so that tbf can transmit
* each segment in time
*/
-static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
+static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct tbf_sched_data *q = qdisc_priv(sch);
struct sk_buff *segs, *nskb;
@@ -166,7 +167,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR_OR_NULL(segs))
- return qdisc_reshape_fail(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
nb = 0;
while (segs) {
@@ -174,7 +175,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
segs->next = NULL;
qdisc_skb_cb(segs)->pkt_len = segs->len;
len += segs->len;
- ret = qdisc_enqueue(segs, q->qdisc);
+ ret = qdisc_enqueue(segs, q->qdisc, to_free);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
qdisc_qstats_drop(sch);
@@ -190,17 +191,18 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
}
-static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
{
struct tbf_sched_data *q = qdisc_priv(sch);
int ret;
if (qdisc_pkt_len(skb) > q->max_size) {
if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
- return tbf_segment(skb, sch);
- return qdisc_reshape_fail(skb, sch);
+ return tbf_segment(skb, sch, to_free);
+ return qdisc_drop(skb, sch, to_free);
}
- ret = qdisc_enqueue(skb, q->qdisc);
+ ret = qdisc_enqueue(skb, q->qdisc, to_free);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
qdisc_qstats_drop(sch);
@@ -212,19 +214,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
-static unsigned int tbf_drop(struct Qdisc *sch)
-{
- struct tbf_sched_data *q = qdisc_priv(sch);
- unsigned int len = 0;
-
- if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
- sch->qstats.backlog -= len;
- sch->q.qlen--;
- qdisc_qstats_drop(sch);
- }
- return len;
-}
-
static bool tbf_peak_present(const struct tbf_sched_data *q)
{
return q->peak.rate_bytes_ps;
@@ -267,14 +256,12 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
q->ptokens = ptoks;
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
- qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
return skb;
}
qdisc_watchdog_schedule_ns(&q->watchdog,
- now + max_t(long, -toks, -ptoks),
- true);
+ now + max_t(long, -toks, -ptoks));
/* Maybe we have a shorter packet in the queue,
which can be sent now. It sounds cool,
@@ -559,7 +546,6 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
.enqueue = tbf_enqueue,
.dequeue = tbf_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = tbf_drop,
.init = tbf_init,
.reset = tbf_reset,
.destroy = tbf_destroy,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index e02687185a59..2cd9b4478b92 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -77,7 +77,7 @@ struct teql_sched_data {
/* "teql*" qdisc routines */
static int
-teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
{
struct net_device *dev = qdisc_dev(sch);
struct teql_sched_data *q = qdisc_priv(sch);
@@ -87,7 +87,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
- return qdisc_drop(skb, sch);
+ return qdisc_drop(skb, sch, to_free);
}
static struct sk_buff *
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 0fca5824ad0e..6c4f7496cec6 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -11,7 +11,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
transport.o chunk.o sm_make_chunk.o ulpevent.o \
inqueue.o outqueue.o ulpqueue.o \
tsnmap.o bind_addr.o socket.o primitive.o \
- output.o input.o debug.o ssnmap.o auth.o
+ output.o input.o debug.o ssnmap.o auth.o \
+ offload.o
sctp_probe-y := probe.o
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a701527a9480..6f8e676d285e 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -112,7 +112,6 @@ int sctp_rcv(struct sk_buff *skb)
struct sctp_ep_common *rcvr;
struct sctp_transport *transport = NULL;
struct sctp_chunk *chunk;
- struct sctphdr *sh;
union sctp_addr src;
union sctp_addr dest;
int family;
@@ -124,28 +123,29 @@ int sctp_rcv(struct sk_buff *skb)
__SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS);
- if (skb_linearize(skb))
+ /* If packet is too small to contain a single chunk, let's not
+ * waste time on it anymore.
+ */
+ if (skb->len < sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) +
+ skb_transport_offset(skb))
goto discard_it;
- sh = sctp_hdr(skb);
+ if (!pskb_may_pull(skb, sizeof(struct sctphdr)))
+ goto discard_it;
- /* Pull up the IP and SCTP headers. */
+ /* Pull up the IP header. */
__skb_pull(skb, skb_transport_offset(skb));
- if (skb->len < sizeof(struct sctphdr))
- goto discard_it;
skb->csum_valid = 0; /* Previous value not applicable */
if (skb_csum_unnecessary(skb))
__skb_decr_checksum_unnecessary(skb);
- else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0)
+ else if (!sctp_checksum_disable &&
+ !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
+ sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
skb->csum_valid = 1;
- skb_pull(skb, sizeof(struct sctphdr));
-
- /* Make sure we at least have chunk headers worth of data left. */
- if (skb->len < sizeof(struct sctp_chunkhdr))
- goto discard_it;
+ __skb_pull(skb, sizeof(struct sctphdr));
family = ipver2af(ip_hdr(skb)->version);
af = sctp_get_af_specific(family);
@@ -230,7 +230,7 @@ int sctp_rcv(struct sk_buff *skb)
chunk->rcvr = rcvr;
/* Remember the SCTP header. */
- chunk->sctp_hdr = sh;
+ chunk->sctp_hdr = sctp_hdr(skb);
/* Set the source and destination addresses of the incoming chunk. */
sctp_init_addrs(chunk, &src, &dest);
@@ -660,19 +660,23 @@ out_unlock:
*/
static int sctp_rcv_ootb(struct sk_buff *skb)
{
- sctp_chunkhdr_t *ch;
- __u8 *ch_end;
-
- ch = (sctp_chunkhdr_t *) skb->data;
+ sctp_chunkhdr_t *ch, _ch;
+ int ch_end, offset = 0;
/* Scan through all the chunks in the packet. */
do {
+ /* Make sure we have at least the header there */
+ if (offset + sizeof(sctp_chunkhdr_t) > skb->len)
+ break;
+
+ ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
+
/* Break out if chunk length is less then minimal. */
if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
break;
- ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
- if (ch_end > skb_tail_pointer(skb))
+ ch_end = offset + WORD_ROUND(ntohs(ch->length));
+ if (ch_end > skb->len)
break;
/* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the
@@ -697,8 +701,8 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
if (SCTP_CID_INIT == ch->type && (void *)ch != skb->data)
goto discard;
- ch = (sctp_chunkhdr_t *) ch_end;
- } while (ch_end < skb_tail_pointer(skb));
+ offset = ch_end;
+ } while (ch_end < skb->len);
return 0;
@@ -1173,6 +1177,17 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
{
sctp_chunkhdr_t *ch;
+ /* We do not allow GSO frames here as we need to linearize and
+ * then cannot guarantee frame boundaries. This shouldn't be an
+ * issue as packets hitting this are mostly INIT or INIT-ACK and
+ * those cannot be on GSO-style anyway.
+ */
+ if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
+ return NULL;
+
+ if (skb_linearize(skb))
+ return NULL;
+
ch = (sctp_chunkhdr_t *) skb->data;
/* The code below will attempt to walk the chunk and extract
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 9d87bba0ff1d..edabbbdfca54 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -130,13 +130,25 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
* at this time.
*/
- if ((chunk = queue->in_progress)) {
+ chunk = queue->in_progress;
+ if (chunk) {
/* There is a packet that we have been working on.
* Any post processing work to do before we move on?
*/
if (chunk->singleton ||
chunk->end_of_packet ||
chunk->pdiscard) {
+ if (chunk->head_skb == chunk->skb) {
+ chunk->skb = skb_shinfo(chunk->skb)->frag_list;
+ goto new_skb;
+ }
+ if (chunk->skb->next) {
+ chunk->skb = chunk->skb->next;
+ goto new_skb;
+ }
+
+ if (chunk->head_skb)
+ chunk->skb = chunk->head_skb;
sctp_chunk_free(chunk);
chunk = queue->in_progress = NULL;
} else {
@@ -152,34 +164,64 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
if (!chunk) {
struct list_head *entry;
+next_chunk:
/* Is the queue empty? */
- if (list_empty(&queue->in_chunk_list))
+ entry = sctp_list_dequeue(&queue->in_chunk_list);
+ if (!entry)
return NULL;
- entry = queue->in_chunk_list.next;
- chunk = queue->in_progress =
- list_entry(entry, struct sctp_chunk, list);
- list_del_init(entry);
+ chunk = list_entry(entry, struct sctp_chunk, list);
- /* This is the first chunk in the packet. */
- chunk->singleton = 1;
- ch = (sctp_chunkhdr_t *) chunk->skb->data;
- chunk->data_accepted = 0;
+ /* Linearize if it's not GSO */
+ if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP &&
+ skb_is_nonlinear(chunk->skb)) {
+ if (skb_linearize(chunk->skb)) {
+ __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
+ sctp_chunk_free(chunk);
+ goto next_chunk;
+ }
+
+ /* Update sctp_hdr as it probably changed */
+ chunk->sctp_hdr = sctp_hdr(chunk->skb);
+ }
+
+ if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
+ /* GSO-marked skbs but without frags, handle
+ * them normally
+ */
+ if (skb_shinfo(chunk->skb)->frag_list)
+ chunk->head_skb = chunk->skb;
+
+ /* skbs with "cover letter" */
+ if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len)
+ chunk->skb = skb_shinfo(chunk->skb)->frag_list;
+
+ if (WARN_ON(!chunk->skb)) {
+ __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
+ sctp_chunk_free(chunk);
+ goto next_chunk;
+ }
+ }
if (chunk->asoc)
sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb);
+
+ queue->in_progress = chunk;
+
+new_skb:
+ /* This is the first chunk in the packet. */
+ ch = (sctp_chunkhdr_t *) chunk->skb->data;
+ chunk->singleton = 1;
+ chunk->data_accepted = 0;
+ chunk->pdiscard = 0;
+ chunk->auth = 0;
+ chunk->has_asconf = 0;
+ chunk->end_of_packet = 0;
+ chunk->ecn_ce_done = 0;
}
chunk->chunk_hdr = ch;
chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
- /* In the unlikely case of an IP reassembly, the skb could be
- * non-linear. If so, update chunk_end so that it doesn't go past
- * the skb->tail.
- */
- if (unlikely(skb_is_nonlinear(chunk->skb))) {
- if (chunk->chunk_end > skb_tail_pointer(chunk->skb))
- chunk->chunk_end = skb_tail_pointer(chunk->skb);
- }
skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
chunk->subh.v = NULL; /* Subheader is no longer valid. */
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
new file mode 100644
index 000000000000..a37887b373a7
--- /dev/null
+++ b/net/sctp/offload.c
@@ -0,0 +1,98 @@
+/*
+ * sctp_offload - GRO/GSO Offloading for SCTP
+ *
+ * Copyright (C) 2015, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/sctp.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/time.h>
+#include <net/net_namespace.h>
+
+#include <linux/skbuff.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/checksum.h>
+#include <net/protocol.h>
+
+static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
+{
+ skb->ip_summed = CHECKSUM_NONE;
+ return sctp_compute_cksum(skb, skb_transport_offset(skb));
+}
+
+static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct sctphdr *sh;
+
+ sh = sctp_hdr(skb);
+ if (!pskb_may_pull(skb, sizeof(*sh)))
+ goto out;
+
+ __skb_pull(skb, sizeof(*sh));
+
+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ struct skb_shared_info *pinfo = skb_shinfo(skb);
+ struct sk_buff *frag_iter;
+
+ pinfo->gso_segs = 0;
+ if (skb->len != skb->data_len) {
+ /* Means we have chunks in here too */
+ pinfo->gso_segs++;
+ }
+
+ skb_walk_frags(skb, frag_iter)
+ pinfo->gso_segs++;
+
+ segs = NULL;
+ goto out;
+ }
+
+ segs = skb_segment(skb, features | NETIF_F_HW_CSUM);
+ if (IS_ERR(segs))
+ goto out;
+
+ /* All that is left is update SCTP CRC if necessary */
+ if (!(features & NETIF_F_SCTP_CRC)) {
+ for (skb = segs; skb; skb = skb->next) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ sh = sctp_hdr(skb);
+ sh->checksum = sctp_gso_make_checksum(skb);
+ }
+ }
+ }
+
+out:
+ return segs;
+}
+
+static const struct net_offload sctp_offload = {
+ .callbacks = {
+ .gso_segment = sctp_gso_segment,
+ },
+};
+
+int __init sctp_offload_init(void)
+{
+ return inet_add_offload(&sctp_offload, IPPROTO_SCTP);
+}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9844fe573029..1541a91d6d9d 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -84,18 +84,42 @@ static void sctp_packet_reset(struct sctp_packet *packet)
struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
__u32 vtag, int ecn_capable)
{
- struct sctp_chunk *chunk = NULL;
+ struct sctp_transport *tp = packet->transport;
+ struct sctp_association *asoc = tp->asoc;
pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
packet->vtag = vtag;
+ if (asoc && tp->dst) {
+ struct sock *sk = asoc->base.sk;
+
+ rcu_read_lock();
+ if (__sk_dst_get(sk) != tp->dst) {
+ dst_hold(tp->dst);
+ sk_setup_caps(sk, tp->dst);
+ }
+
+ if (sk_can_gso(sk)) {
+ struct net_device *dev = tp->dst->dev;
+
+ packet->max_size = dev->gso_max_size;
+ } else {
+ packet->max_size = asoc->pathmtu;
+ }
+ rcu_read_unlock();
+
+ } else {
+ packet->max_size = tp->pathmtu;
+ }
+
if (ecn_capable && sctp_packet_empty(packet)) {
- chunk = sctp_get_ecne_prepend(packet->transport->asoc);
+ struct sctp_chunk *chunk;
/* If there a is a prepend chunk stick it on the list before
* any other chunks get appended.
*/
+ chunk = sctp_get_ecne_prepend(asoc);
if (chunk)
sctp_packet_append_chunk(packet, chunk);
}
@@ -158,7 +182,8 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
sctp_xmit_t retval;
int error = 0;
- pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
+ pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__,
+ packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
case SCTP_XMIT_PMTU_FULL:
@@ -381,12 +406,15 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
struct sctp_transport *tp = packet->transport;
struct sctp_association *asoc = tp->asoc;
struct sctphdr *sh;
- struct sk_buff *nskb;
+ struct sk_buff *nskb = NULL, *head = NULL;
struct sctp_chunk *chunk, *tmp;
struct sock *sk;
int err = 0;
int padding; /* How much padding do we need? */
+ int pkt_size;
__u8 has_data = 0;
+ int gso = 0;
+ int pktcount = 0;
struct dst_entry *dst;
unsigned char *auth = NULL; /* pointer to auth in skb data */
@@ -400,18 +428,37 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
sk = chunk->skb->sk;
- /* Allocate the new skb. */
- nskb = alloc_skb(packet->size + MAX_HEADER, gfp);
- if (!nskb)
+ /* Allocate the head skb, or main one if not in GSO */
+ if (packet->size > tp->pathmtu && !packet->ipfragok) {
+ if (sk_can_gso(sk)) {
+ gso = 1;
+ pkt_size = packet->overhead;
+ } else {
+ /* If this happens, we trash this packet and try
+ * to build a new one, hopefully correct this
+ * time. Application may notice this error.
+ */
+ pr_err_once("Trying to GSO but underlying device doesn't support it.");
+ goto nomem;
+ }
+ } else {
+ pkt_size = packet->size;
+ }
+ head = alloc_skb(pkt_size + MAX_HEADER, gfp);
+ if (!head)
goto nomem;
+ if (gso) {
+ NAPI_GRO_CB(head)->last = head;
+ skb_shinfo(head)->gso_type = sk->sk_gso_type;
+ }
/* Make sure the outbound skb has enough header room reserved. */
- skb_reserve(nskb, packet->overhead + MAX_HEADER);
+ skb_reserve(head, packet->overhead + MAX_HEADER);
/* Set the owning socket so that we know where to get the
* destination IP address.
*/
- sctp_packet_set_owner_w(nskb, sk);
+ sctp_packet_set_owner_w(head, sk);
if (!sctp_transport_dst_check(tp)) {
sctp_transport_route(tp, NULL, sctp_sk(sk));
@@ -422,11 +469,11 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
dst = dst_clone(tp->dst);
if (!dst)
goto no_route;
- skb_dst_set(nskb, dst);
+ skb_dst_set(head, dst);
/* Build the SCTP header. */
- sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr));
- skb_reset_transport_header(nskb);
+ sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
+ skb_reset_transport_header(head);
sh->source = htons(packet->source_port);
sh->dest = htons(packet->destination_port);
@@ -441,90 +488,133 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
sh->vtag = htonl(packet->vtag);
sh->checksum = 0;
- /**
- * 6.10 Bundling
- *
- * An endpoint bundles chunks by simply including multiple
- * chunks in one outbound SCTP packet. ...
- */
-
- /**
- * 3.2 Chunk Field Descriptions
- *
- * The total length of a chunk (including Type, Length and
- * Value fields) MUST be a multiple of 4 bytes. If the length
- * of the chunk is not a multiple of 4 bytes, the sender MUST
- * pad the chunk with all zero bytes and this padding is not
- * included in the chunk length field. The sender should
- * never pad with more than 3 bytes.
- *
- * [This whole comment explains WORD_ROUND() below.]
- */
-
pr_debug("***sctp_transmit_packet***\n");
- list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
- list_del_init(&chunk->list);
- if (sctp_chunk_is_data(chunk)) {
- /* 6.3.1 C4) When data is in flight and when allowed
- * by rule C5, a new RTT measurement MUST be made each
- * round trip. Furthermore, new RTT measurements
- * SHOULD be made no more than once per round-trip
- * for a given destination transport address.
- */
+ do {
+ /* Set up convenience variables... */
+ chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
+ pktcount++;
- if (!chunk->resent && !tp->rto_pending) {
- chunk->rtt_in_progress = 1;
- tp->rto_pending = 1;
+ /* Calculate packet size, so it fits in PMTU. Leave
+ * other chunks for the next packets.
+ */
+ if (gso) {
+ pkt_size = packet->overhead;
+ list_for_each_entry(chunk, &packet->chunk_list, list) {
+ int padded = WORD_ROUND(chunk->skb->len);
+
+ if (pkt_size + padded > tp->pathmtu)
+ break;
+ pkt_size += padded;
}
- has_data = 1;
- }
+ /* Allocate a new skb. */
+ nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
+ if (!nskb)
+ goto nomem;
- padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
- if (padding)
- memset(skb_put(chunk->skb, padding), 0, padding);
+ /* Make sure the outbound skb has enough header
+ * room reserved.
+ */
+ skb_reserve(nskb, packet->overhead + MAX_HEADER);
+ } else {
+ nskb = head;
+ }
- /* if this is the auth chunk that we are adding,
- * store pointer where it will be added and put
- * the auth into the packet.
+ /**
+ * 3.2 Chunk Field Descriptions
+ *
+ * The total length of a chunk (including Type, Length and
+ * Value fields) MUST be a multiple of 4 bytes. If the length
+ * of the chunk is not a multiple of 4 bytes, the sender MUST
+ * pad the chunk with all zero bytes and this padding is not
+ * included in the chunk length field. The sender should
+ * never pad with more than 3 bytes.
+ *
+ * [This whole comment explains WORD_ROUND() below.]
*/
- if (chunk == packet->auth)
- auth = skb_tail_pointer(nskb);
- memcpy(skb_put(nskb, chunk->skb->len),
+ pkt_size -= packet->overhead;
+ list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
+ list_del_init(&chunk->list);
+ if (sctp_chunk_is_data(chunk)) {
+ /* 6.3.1 C4) When data is in flight and when allowed
+ * by rule C5, a new RTT measurement MUST be made each
+ * round trip. Furthermore, new RTT measurements
+ * SHOULD be made no more than once per round-trip
+ * for a given destination transport address.
+ */
+
+ if (!chunk->resent && !tp->rto_pending) {
+ chunk->rtt_in_progress = 1;
+ tp->rto_pending = 1;
+ }
+
+ has_data = 1;
+ }
+
+ padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
+ if (padding)
+ memset(skb_put(chunk->skb, padding), 0, padding);
+
+ /* if this is the auth chunk that we are adding,
+ * store pointer where it will be added and put
+ * the auth into the packet.
+ */
+ if (chunk == packet->auth)
+ auth = skb_tail_pointer(nskb);
+
+ memcpy(skb_put(nskb, chunk->skb->len),
chunk->skb->data, chunk->skb->len);
- pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, "
- "rtt_in_progress:%d\n", chunk,
- sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
- chunk->has_tsn ? "TSN" : "No TSN",
- chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
- ntohs(chunk->chunk_hdr->length), chunk->skb->len,
- chunk->rtt_in_progress);
-
- /*
- * If this is a control chunk, this is our last
- * reference. Free data chunks after they've been
- * acknowledged or have failed.
- */
- if (!sctp_chunk_is_data(chunk))
- sctp_chunk_free(chunk);
- }
+ pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
+ chunk,
+ sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
+ chunk->has_tsn ? "TSN" : "No TSN",
+ chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
+ ntohs(chunk->chunk_hdr->length), chunk->skb->len,
+ chunk->rtt_in_progress);
+
+ /* If this is a control chunk, this is our last
+ * reference. Free data chunks after they've been
+ * acknowledged or have failed.
+ * Re-queue auth chunks if needed.
+ */
+ pkt_size -= WORD_ROUND(chunk->skb->len);
- /* SCTP-AUTH, Section 6.2
- * The sender MUST calculate the MAC as described in RFC2104 [2]
- * using the hash function H as described by the MAC Identifier and
- * the shared association key K based on the endpoint pair shared key
- * described by the shared key identifier. The 'data' used for the
- * computation of the AUTH-chunk is given by the AUTH chunk with its
- * HMAC field set to zero (as shown in Figure 6) followed by all
- * chunks that are placed after the AUTH chunk in the SCTP packet.
- */
- if (auth)
- sctp_auth_calculate_hmac(asoc, nskb,
- (struct sctp_auth_chunk *)auth,
- gfp);
+ if (chunk == packet->auth && !list_empty(&packet->chunk_list))
+ list_add(&chunk->list, &packet->chunk_list);
+ else if (!sctp_chunk_is_data(chunk))
+ sctp_chunk_free(chunk);
+
+ if (!pkt_size)
+ break;
+ }
+
+ /* SCTP-AUTH, Section 6.2
+ * The sender MUST calculate the MAC as described in RFC2104 [2]
+ * using the hash function H as described by the MAC Identifier and
+ * the shared association key K based on the endpoint pair shared key
+ * described by the shared key identifier. The 'data' used for the
+ * computation of the AUTH-chunk is given by the AUTH chunk with its
+ * HMAC field set to zero (as shown in Figure 6) followed by all
+ * chunks that are placed after the AUTH chunk in the SCTP packet.
+ */
+ if (auth)
+ sctp_auth_calculate_hmac(asoc, nskb,
+ (struct sctp_auth_chunk *)auth,
+ gfp);
+
+ if (!gso)
+ break;
+
+ if (skb_gro_receive(&head, nskb))
+ goto nomem;
+ nskb = NULL;
+ if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
+ sk->sk_gso_max_segs))
+ goto nomem;
+ } while (!list_empty(&packet->chunk_list));
/* 2) Calculate the Adler-32 checksum of the whole packet,
* including the SCTP common header and all the
@@ -532,16 +622,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
*
* Note: Adler-32 is no longer applicable, as has been replaced
* by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
+ *
+ * If it's a GSO packet, it's postponed to sctp_skb_segment.
*/
- if (!sctp_checksum_disable) {
- if (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
- (dst_xfrm(dst) != NULL) || packet->ipfragok) {
- sh->checksum = sctp_compute_cksum(nskb, 0);
+ if (!sctp_checksum_disable || gso) {
+ if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
+ dst_xfrm(dst) || packet->ipfragok)) {
+ sh->checksum = sctp_compute_cksum(head, 0);
} else {
/* no need to seed pseudo checksum for SCTP */
- nskb->ip_summed = CHECKSUM_PARTIAL;
- nskb->csum_start = skb_transport_header(nskb) - nskb->head;
- nskb->csum_offset = offsetof(struct sctphdr, checksum);
+ head->ip_summed = CHECKSUM_PARTIAL;
+ head->csum_start = skb_transport_header(head) - head->head;
+ head->csum_offset = offsetof(struct sctphdr, checksum);
}
}
@@ -557,7 +649,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
* Note: The works for IPv6 layer checks this bit too later
* in transmission. See IP6_ECN_flow_xmit().
*/
- tp->af_specific->ecn_capable(nskb->sk);
+ tp->af_specific->ecn_capable(sk);
/* Set up the IP options. */
/* BUG: not implemented
@@ -566,7 +658,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
/* Dump that on IP! */
if (asoc) {
- asoc->stats.opackets++;
+ asoc->stats.opackets += pktcount;
if (asoc->peer.last_sent_to != tp)
/* Considering the multiple CPU scenario, this is a
* "correcter" place for last_sent_to. --xguo
@@ -589,16 +681,36 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
}
}
- pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len);
+ pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);
+
+ if (gso) {
+ /* Cleanup our debris for IP stacks */
+ memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
+ sizeof(struct inet6_skb_parm)));
- nskb->ignore_df = packet->ipfragok;
- tp->af_specific->sctp_xmit(nskb, tp);
+ skb_shinfo(head)->gso_segs = pktcount;
+ skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
+
+ /* We have to refresh this in case we are xmiting to
+ * more than one transport at a time
+ */
+ rcu_read_lock();
+ if (__sk_dst_get(sk) != tp->dst) {
+ dst_hold(tp->dst);
+ sk_setup_caps(sk, tp->dst);
+ }
+ rcu_read_unlock();
+ }
+ head->ignore_df = packet->ipfragok;
+ tp->af_specific->sctp_xmit(head, tp);
out:
sctp_packet_reset(packet);
return err;
no_route:
- kfree_skb(nskb);
+ kfree_skb(head);
+ if (nskb != head)
+ kfree_skb(nskb);
if (asoc)
IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
@@ -751,39 +863,63 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
struct sctp_chunk *chunk,
u16 chunk_len)
{
- size_t psize;
- size_t pmtu;
- int too_big;
+ size_t psize, pmtu;
sctp_xmit_t retval = SCTP_XMIT_OK;
psize = packet->size;
- pmtu = ((packet->transport->asoc) ?
- (packet->transport->asoc->pathmtu) :
- (packet->transport->pathmtu));
-
- too_big = (psize + chunk_len > pmtu);
+ if (packet->transport->asoc)
+ pmtu = packet->transport->asoc->pathmtu;
+ else
+ pmtu = packet->transport->pathmtu;
/* Decide if we need to fragment or resubmit later. */
- if (too_big) {
- /* It's OK to fragmet at IP level if any one of the following
+ if (psize + chunk_len > pmtu) {
+ /* It's OK to fragment at IP level if any one of the following
* is true:
- * 1. The packet is empty (meaning this chunk is greater
- * the MTU)
- * 2. The chunk we are adding is a control chunk
- * 3. The packet doesn't have any data in it yet and data
- * requires authentication.
+ * 1. The packet is empty (meaning this chunk is greater
+ * the MTU)
+ * 2. The packet doesn't have any data in it yet and data
+ * requires authentication.
*/
- if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) ||
+ if (sctp_packet_empty(packet) ||
(!packet->has_data && chunk->auth)) {
/* We no longer do re-fragmentation.
* Just fragment at the IP layer, if we
* actually hit this condition
*/
packet->ipfragok = 1;
- } else {
- retval = SCTP_XMIT_PMTU_FULL;
+ goto out;
}
+
+ /* It is also okay to fragment if the chunk we are
+ * adding is a control chunk, but only if current packet
+ * is not a GSO one otherwise it causes fragmentation of
+ * a large frame. So in this case we allow the
+ * fragmentation by forcing it to be in a new packet.
+ */
+ if (!sctp_chunk_is_data(chunk) && packet->has_data)
+ retval = SCTP_XMIT_PMTU_FULL;
+
+ if (psize + chunk_len > packet->max_size)
+ /* Hit GSO/PMTU limit, gotta flush */
+ retval = SCTP_XMIT_PMTU_FULL;
+
+ if (!packet->transport->burst_limited &&
+ psize + chunk_len > (packet->transport->cwnd >> 1))
+ /* Do not allow a single GSO packet to use more
+ * than half of cwnd.
+ */
+ retval = SCTP_XMIT_PMTU_FULL;
+
+ if (packet->transport->burst_limited &&
+ psize + chunk_len > (packet->transport->burst_limited >> 1))
+ /* Do not allow a single GSO packet to use more
+ * than half of original cwnd.
+ */
+ retval = SCTP_XMIT_PMTU_FULL;
+ /* Otherwise it will fit in the GSO packet */
}
+out:
return retval;
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d3d50daa248b..3b56ae55aba3 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1479,7 +1479,8 @@ static __init int sctp_init(void)
INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
}
- if (sctp_transport_hashtable_init())
+ status = sctp_transport_hashtable_init();
+ if (status)
goto err_thash_alloc;
pr_info("Hash tables configured (bind %d/%d)\n", sctp_port_hashsize,
@@ -1516,6 +1517,9 @@ static __init int sctp_init(void)
if (status)
goto err_v6_add_protocol;
+ if (sctp_offload_init() < 0)
+ pr_crit("%s: Cannot add SCTP protocol offload\n", __func__);
+
out:
return status;
err_v6_add_protocol:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index aa3712259368..12d45193357c 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -806,8 +806,10 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
/* Set the RCV_SHUTDOWN flag when a SHUTDOWN is received. */
if (sctp_state(asoc, SHUTDOWN_RECEIVED) &&
- sctp_sstate(sk, ESTABLISHED))
+ sctp_sstate(sk, ESTABLISHED)) {
+ sk->sk_state = SCTP_SS_CLOSING;
sk->sk_shutdown |= RCV_SHUTDOWN;
+ }
}
if (sctp_state(asoc, COOKIE_WAIT)) {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 67154b848aa9..cdabbd8219b1 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4003,6 +4003,8 @@ static int sctp_init_sock(struct sock *sk)
return -ESOCKTNOSUPPORT;
}
+ sk->sk_gso_type = SKB_GSO_SCTP;
+
/* Initialize default send parameters. These parameters can be
* modified with the SCTP_DEFAULT_SEND_PARAM socket option.
*/
@@ -4193,6 +4195,7 @@ static void sctp_shutdown(struct sock *sk, int how)
return;
if (how & SEND_SHUTDOWN) {
+ sk->sk_state = SCTP_SS_CLOSING;
ep = sctp_sk(sk)->ep;
if (!list_empty(&ep->asocs)) {
asoc = list_entry(ep->asocs.next,
@@ -7564,10 +7567,13 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
/* If the association on the newsk is already closed before accept()
* is called, set RCV_SHUTDOWN flag.
*/
- if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP))
+ if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) {
+ newsk->sk_state = SCTP_SS_CLOSED;
newsk->sk_shutdown |= RCV_SHUTDOWN;
+ } else {
+ newsk->sk_state = SCTP_SS_ESTABLISHED;
+ }
- newsk->sk_state = SCTP_SS_ESTABLISHED;
release_sock(newsk);
}
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 57e460be4692..31b9f9c52974 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_TIPC) := tipc.o
tipc-y += addr.o bcast.o bearer.o \
core.o link.o discover.o msg.o \
- name_distr.o subscr.o name_table.o net.o \
+ name_distr.o subscr.o monitor.o name_table.o net.o \
netlink.o netlink_compat.o node.o socket.o eth_media.o \
server.o socket.o
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 93f7c983be33..64f4004a6fac 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -73,4 +73,5 @@ int tipc_addr_node_valid(u32 addr);
int tipc_in_scope(u32 domain, u32 addr);
int tipc_addr_scope(u32 domain);
char *tipc_addr_string_fill(char *string, u32 addr);
+
#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index bf8f05c3eb82..8584cc48654c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1,7 +1,7 @@
/*
* net/tipc/bearer.c: TIPC bearer code
*
- * Copyright (c) 1996-2006, 2013-2014, Ericsson AB
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
* Copyright (c) 2004-2006, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -39,6 +39,7 @@
#include "bearer.h"
#include "link.h"
#include "discover.h"
+#include "monitor.h"
#include "bcast.h"
#include "netlink.h"
@@ -313,6 +314,10 @@ restart:
rcu_assign_pointer(tn->bearer_list[bearer_id], b);
if (skb)
tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
+
+ if (tipc_mon_create(net, bearer_id))
+ return -ENOMEM;
+
pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name,
tipc_addr_string_fill(addr_string, disc_domain), priority);
@@ -348,6 +353,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
tipc_disc_delete(b->link_req);
RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
kfree_rcu(b, rcu);
+ tipc_mon_delete(net, bearer_id);
}
int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index f686e41b5abb..0d337c7b6fad 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -1,7 +1,7 @@
/*
* net/tipc/bearer.h: Include file for TIPC bearer code
*
- * Copyright (c) 1996-2006, 2013-2014, Ericsson AB
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
diff --git a/net/tipc/core.c b/net/tipc/core.c
index fe1b062c4f18..236b043a4156 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -57,6 +57,7 @@ static int __net_init tipc_init_net(struct net *net)
tn->net_id = 4711;
tn->own_addr = 0;
+ tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
get_random_bytes(&tn->random, sizeof(int));
INIT_LIST_HEAD(&tn->node_list);
spin_lock_init(&tn->node_list_lock);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index eff58dc53aa1..a1845fb27d80 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -66,11 +66,13 @@ struct tipc_bc_base;
struct tipc_link;
struct tipc_name_table;
struct tipc_server;
+struct tipc_monitor;
#define TIPC_MOD_VER "2.0.0"
-#define NODE_HTABLE_SIZE 512
-#define MAX_BEARERS 3
+#define NODE_HTABLE_SIZE 512
+#define MAX_BEARERS 3
+#define TIPC_DEF_MON_THRESHOLD 32
extern int tipc_net_id __read_mostly;
extern int sysctl_tipc_rmem[3] __read_mostly;
@@ -88,6 +90,10 @@ struct tipc_net {
u32 num_nodes;
u32 num_links;
+ /* Neighbor monitoring list */
+ struct tipc_monitor *monitors[MAX_BEARERS];
+ int mon_threshold;
+
/* Bearer list */
struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
@@ -126,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
return &tipc_net(net)->node_list;
}
+static inline unsigned int tipc_hashfn(u32 addr)
+{
+ return addr & (NODE_HTABLE_SIZE - 1);
+}
+
static inline u16 mod(u16 x)
{
return x & 0xffffu;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index ad9d477cc242..6b109a808d4c 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -135,9 +135,12 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
u16 caps = msg_node_capabilities(hdr);
bool respond = false;
bool dupl_addr = false;
+ int err;
- bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr));
+ err = bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr));
kfree_skb(skb);
+ if (err)
+ return;
/* Ensure message from node is valid and communication is permitted */
if (net_id != tn->net_id)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 67b6ab9f4c8d..c1df33f878b2 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -42,6 +42,7 @@
#include "name_distr.h"
#include "discover.h"
#include "netlink.h"
+#include "monitor.h"
#include <linux/pkt_sched.h>
@@ -87,7 +88,6 @@ struct tipc_stats {
* @peer_bearer_id: bearer id used by link's peer endpoint
* @bearer_id: local bearer id used by link
* @tolerance: minimum link continuity loss needed to reset link [in ms]
- * @keepalive_intv: link keepalive timer interval
* @abort_limit: # of unacknowledged continuity probes needed to reset link
* @state: current state of link FSM
* @peer_caps: bitmap describing capabilities of peer node
@@ -96,6 +96,7 @@ struct tipc_stats {
* @pmsg: convenience pointer to "proto_msg" field
* @priority: current link priority
* @net_plane: current link network plane ('A' through 'H')
+ * @mon_state: cookie with information needed by link monitor
* @backlog_limit: backlog queue congestion thresholds (indexed by importance)
* @exp_msg_count: # of tunnelled messages expected during link changeover
* @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
@@ -131,7 +132,6 @@ struct tipc_link {
u32 peer_bearer_id;
u32 bearer_id;
u32 tolerance;
- unsigned long keepalive_intv;
u32 abort_limit;
u32 state;
u16 peer_caps;
@@ -140,6 +140,7 @@ struct tipc_link {
char if_name[TIPC_MAX_IF_NAME];
u32 priority;
char net_plane;
+ struct tipc_mon_state mon_state;
u16 rst_cnt;
/* Failover/synch */
@@ -711,18 +712,25 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
bool setup = false;
u16 bc_snt = l->bc_sndlink->snd_nxt - 1;
u16 bc_acked = l->bc_rcvlink->acked;
-
- link_profile_stats(l);
+ struct tipc_mon_state *mstate = &l->mon_state;
switch (l->state) {
case LINK_ESTABLISHED:
case LINK_SYNCHING:
- if (l->silent_intv_cnt > l->abort_limit)
- return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
mtyp = STATE_MSG;
+ link_profile_stats(l);
+ tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id);
+ if (mstate->reset || (l->silent_intv_cnt > l->abort_limit))
+ return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
state = bc_acked != bc_snt;
- probe = l->silent_intv_cnt;
- l->silent_intv_cnt++;
+ state |= l->bc_rcvlink->rcv_unacked;
+ state |= l->rcv_unacked;
+ state |= !skb_queue_empty(&l->transmq);
+ state |= !skb_queue_empty(&l->deferdq);
+ probe = mstate->probing;
+ probe |= l->silent_intv_cnt;
+ if (probe || mstate->monitoring)
+ l->silent_intv_cnt++;
break;
case LINK_RESET:
setup = l->rst_cnt++ <= 4;
@@ -833,6 +841,7 @@ void tipc_link_reset(struct tipc_link *l)
l->stats.recv_info = 0;
l->stale_count = 0;
l->bc_peer_is_up = false;
+ memset(&l->mon_state, 0, sizeof(l->mon_state));
tipc_link_reset_stats(l);
}
@@ -1241,6 +1250,9 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
struct tipc_msg *hdr;
struct sk_buff_head *dfq = &l->deferdq;
bool node_up = link_is_up(l->bc_rcvlink);
+ struct tipc_mon_state *mstate = &l->mon_state;
+ int dlen = 0;
+ void *data;
/* Don't send protocol message during reset or link failover */
if (tipc_link_is_blocked(l))
@@ -1253,12 +1265,13 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
- TIPC_MAX_IF_NAME, l->addr,
+ tipc_max_domain_size, l->addr,
tipc_own_addr(l->net), 0, 0, 0);
if (!skb)
return;
hdr = buf_msg(skb);
+ data = msg_data(hdr);
msg_set_session(hdr, l->session);
msg_set_bearer_id(hdr, l->bearer_id);
msg_set_net_plane(hdr, l->net_plane);
@@ -1274,14 +1287,18 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
if (mtyp == STATE_MSG) {
msg_set_seq_gap(hdr, rcvgap);
- msg_set_size(hdr, INT_H_SIZE);
msg_set_probe(hdr, probe);
+ tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
+ msg_set_size(hdr, INT_H_SIZE + dlen);
+ skb_trim(skb, INT_H_SIZE + dlen);
l->stats.sent_states++;
l->rcv_unacked = 0;
} else {
/* RESET_MSG or ACTIVATE_MSG */
msg_set_max_pkt(hdr, l->advertised_mtu);
- strcpy(msg_data(hdr), l->if_name);
+ strcpy(data, l->if_name);
+ msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME);
+ skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME);
}
if (probe)
l->stats.sent_probes++;
@@ -1374,7 +1391,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
u16 rcv_nxt = l->rcv_nxt;
+ u16 dlen = msg_data_sz(hdr);
int mtyp = msg_type(hdr);
+ void *data;
char *if_name;
int rc = 0;
@@ -1384,6 +1403,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (tipc_own_addr(l->net) > msg_prevnode(hdr))
l->net_plane = msg_net_plane(hdr);
+ skb_linearize(skb);
+ hdr = buf_msg(skb);
+ data = msg_data(hdr);
+
switch (mtyp) {
case RESET_MSG:
@@ -1394,8 +1417,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
/* fall thru' */
case ACTIVATE_MSG:
- skb_linearize(skb);
- hdr = buf_msg(skb);
/* Complete own link name with peer's interface name */
if_name = strrchr(l->name, ':') + 1;
@@ -1403,7 +1424,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME)
break;
- strncpy(if_name, msg_data(hdr), TIPC_MAX_IF_NAME);
+ strncpy(if_name, data, TIPC_MAX_IF_NAME);
/* Update own tolerance if peer indicates a non-zero value */
if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
@@ -1451,6 +1472,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
rc = TIPC_LINK_UP_EVT;
break;
}
+ tipc_mon_rcv(l->net, data, dlen, l->addr,
+ &l->mon_state, l->bearer_id);
/* Send NACK if peer has sent pkts we haven't received yet */
if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
new file mode 100644
index 000000000000..0d489e81fcca
--- /dev/null
+++ b/net/tipc/monitor.c
@@ -0,0 +1,651 @@
+/*
+ * net/tipc/monitor.c
+ *
+ * Copyright (c) 2016, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "addr.h"
+#include "monitor.h"
+
+#define MAX_MON_DOMAIN 64
+#define MON_TIMEOUT 120000
+#define MAX_PEER_DOWN_EVENTS 4
+
+/* struct tipc_mon_domain: domain record to be transferred between peers
+ * @len: actual size of domain record
+ * @gen: current generation of sender's domain
+ * @ack_gen: most recent generation of self's domain acked by peer
+ * @member_cnt: number of domain member nodes described in this record
+ * @up_map: bit map indicating which of the members the sender considers up
+ * @members: identity of the domain members
+ */
+struct tipc_mon_domain {
+ u16 len;
+ u16 gen;
+ u16 ack_gen;
+ u16 member_cnt;
+ u64 up_map;
+ u32 members[MAX_MON_DOMAIN];
+};
+
+/* struct tipc_peer: state of a peer node and its domain
+ * @addr: tipc node identity of peer
+ * @head_map: shows which other nodes currently consider peer 'up'
+ * @domain: most recent domain record from peer
+ * @hash: position in hashed lookup list
+ * @list: position in linked list, in circular ascending order by 'addr'
+ * @applied: number of reported domain members applied on this monitor list
+ * @is_up: peer is up as seen from this node
+ * @is_head: peer is assigned domain head as seen from this node
+ * @is_local: peer is in local domain and should be continuously monitored
+ * @down_cnt: - numbers of other peers which have reported this on lost
+ */
+struct tipc_peer {
+ u32 addr;
+ struct tipc_mon_domain *domain;
+ struct hlist_node hash;
+ struct list_head list;
+ u8 applied;
+ u8 down_cnt;
+ bool is_up;
+ bool is_head;
+ bool is_local;
+};
+
+struct tipc_monitor {
+ struct hlist_head peers[NODE_HTABLE_SIZE];
+ int peer_cnt;
+ struct tipc_peer *self;
+ rwlock_t lock;
+ struct tipc_mon_domain cache;
+ u16 list_gen;
+ u16 dom_gen;
+ struct net *net;
+ struct timer_list timer;
+ unsigned long timer_intv;
+};
+
+static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
+{
+ return tipc_net(net)->monitors[bearer_id];
+}
+
+const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
+
+/* dom_rec_len(): actual length of domain record for transport
+ */
+static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
+{
+ return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
+}
+
+/* dom_size() : calculate size of own domain based on number of peers
+ */
+static int dom_size(int peers)
+{
+ int i = 0;
+
+ while ((i * i) < peers)
+ i++;
+ return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
+}
+
+static void map_set(u64 *up_map, int i, unsigned int v)
+{
+ *up_map &= ~(1ULL << i);
+ *up_map |= ((u64)v << i);
+}
+
+static int map_get(u64 up_map, int i)
+{
+ return (up_map & (1 << i)) >> i;
+}
+
+static struct tipc_peer *peer_prev(struct tipc_peer *peer)
+{
+ return list_last_entry(&peer->list, struct tipc_peer, list);
+}
+
+static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
+{
+ return list_first_entry(&peer->list, struct tipc_peer, list);
+}
+
+static struct tipc_peer *peer_head(struct tipc_peer *peer)
+{
+ while (!peer->is_head)
+ peer = peer_prev(peer);
+ return peer;
+}
+
+static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
+{
+ struct tipc_peer *peer;
+ unsigned int thash = tipc_hashfn(addr);
+
+ hlist_for_each_entry(peer, &mon->peers[thash], hash) {
+ if (peer->addr == addr)
+ return peer;
+ }
+ return NULL;
+}
+
+static struct tipc_peer *get_self(struct net *net, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+
+ return mon->self;
+}
+
+static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
+{
+ struct tipc_net *tn = tipc_net(net);
+
+ return mon->peer_cnt > tn->mon_threshold;
+}
+
+/* mon_identify_lost_members() : - identify amd mark potentially lost members
+ */
+static void mon_identify_lost_members(struct tipc_peer *peer,
+ struct tipc_mon_domain *dom_bef,
+ int applied_bef)
+{
+ struct tipc_peer *member = peer;
+ struct tipc_mon_domain *dom_aft = peer->domain;
+ int applied_aft = peer->applied;
+ int i;
+
+ for (i = 0; i < applied_bef; i++) {
+ member = peer_nxt(member);
+
+ /* Do nothing if self or peer already see member as down */
+ if (!member->is_up || !map_get(dom_bef->up_map, i))
+ continue;
+
+ /* Loss of local node must be detected by active probing */
+ if (member->is_local)
+ continue;
+
+ /* Start probing if member was removed from applied domain */
+ if (!applied_aft || (applied_aft < i)) {
+ member->down_cnt = 1;
+ continue;
+ }
+
+ /* Member loss is confirmed if it is still in applied domain */
+ if (!map_get(dom_aft->up_map, i))
+ member->down_cnt++;
+ }
+}
+
+/* mon_apply_domain() : match a peer's domain record against monitor list
+ */
+static void mon_apply_domain(struct tipc_monitor *mon,
+ struct tipc_peer *peer)
+{
+ struct tipc_mon_domain *dom = peer->domain;
+ struct tipc_peer *member;
+ u32 addr;
+ int i;
+
+ if (!dom || !peer->is_up)
+ return;
+
+ /* Scan across domain members and match against monitor list */
+ peer->applied = 0;
+ member = peer_nxt(peer);
+ for (i = 0; i < dom->member_cnt; i++) {
+ addr = dom->members[i];
+ if (addr != member->addr)
+ return;
+ peer->applied++;
+ member = peer_nxt(member);
+ }
+}
+
+/* mon_update_local_domain() : update after peer addition/removal/up/down
+ */
+static void mon_update_local_domain(struct tipc_monitor *mon)
+{
+ struct tipc_peer *self = mon->self;
+ struct tipc_mon_domain *cache = &mon->cache;
+ struct tipc_mon_domain *dom = self->domain;
+ struct tipc_peer *peer = self;
+ u64 prev_up_map = dom->up_map;
+ u16 member_cnt, i;
+ bool diff;
+
+ /* Update local domain size based on current size of cluster */
+ member_cnt = dom_size(mon->peer_cnt) - 1;
+ self->applied = member_cnt;
+
+ /* Update native and cached outgoing local domain records */
+ dom->len = dom_rec_len(dom, member_cnt);
+ diff = dom->member_cnt != member_cnt;
+ dom->member_cnt = member_cnt;
+ for (i = 0; i < member_cnt; i++) {
+ peer = peer_nxt(peer);
+ diff |= dom->members[i] != peer->addr;
+ dom->members[i] = peer->addr;
+ map_set(&dom->up_map, i, peer->is_up);
+ cache->members[i] = htonl(peer->addr);
+ }
+ diff |= dom->up_map != prev_up_map;
+ if (!diff)
+ return;
+ dom->gen = ++mon->dom_gen;
+ cache->len = htons(dom->len);
+ cache->gen = htons(dom->gen);
+ cache->member_cnt = htons(member_cnt);
+ cache->up_map = cpu_to_be64(dom->up_map);
+ mon_apply_domain(mon, self);
+}
+
+/* mon_update_neighbors() : update preceding neighbors of added/removed peer
+ */
+static void mon_update_neighbors(struct tipc_monitor *mon,
+ struct tipc_peer *peer)
+{
+ int dz, i;
+
+ dz = dom_size(mon->peer_cnt);
+ for (i = 0; i < dz; i++) {
+ mon_apply_domain(mon, peer);
+ peer = peer_prev(peer);
+ }
+}
+
+/* mon_assign_roles() : reassign peer roles after a network change
+ * The monitor list is consistent at this stage; i.e., each peer is monitoring
+ * a set of domain members as matched between domain record and the monitor list
+ */
+static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
+{
+ struct tipc_peer *peer = peer_nxt(head);
+ struct tipc_peer *self = mon->self;
+ int i = 0;
+
+ for (; peer != self; peer = peer_nxt(peer)) {
+ peer->is_local = false;
+
+ /* Update domain member */
+ if (i++ < head->applied) {
+ peer->is_head = false;
+ if (head == self)
+ peer->is_local = true;
+ continue;
+ }
+ /* Assign next domain head */
+ if (!peer->is_up)
+ continue;
+ if (peer->is_head)
+ break;
+ head = peer;
+ head->is_head = true;
+ i = 0;
+ }
+ mon->list_gen++;
+}
+
+void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *prev, *head;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer)
+ goto exit;
+ prev = peer_prev(peer);
+ list_del(&peer->list);
+ hlist_del(&peer->hash);
+ kfree(peer->domain);
+ kfree(peer);
+ mon->peer_cnt--;
+ head = peer_head(prev);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_update_neighbors(mon, prev);
+
+ /* Revert to full-mesh monitoring if we reach threshold */
+ if (!tipc_mon_is_active(net, mon)) {
+ list_for_each_entry(peer, &self->list, list) {
+ kfree(peer->domain);
+ peer->domain = NULL;
+ peer->applied = 0;
+ }
+ }
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
+ struct tipc_peer **peer)
+{
+ struct tipc_peer *self = mon->self;
+ struct tipc_peer *cur, *prev, *p;
+
+ p = kzalloc(sizeof(*p), GFP_ATOMIC);
+ *peer = p;
+ if (!p)
+ return false;
+ p->addr = addr;
+
+ /* Add new peer to lookup list */
+ INIT_LIST_HEAD(&p->list);
+ hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
+
+ /* Sort new peer into iterator list, in ascending circular order */
+ prev = self;
+ list_for_each_entry(cur, &self->list, list) {
+ if ((addr > prev->addr) && (addr < cur->addr))
+ break;
+ if (((addr < cur->addr) || (addr > prev->addr)) &&
+ (prev->addr > cur->addr))
+ break;
+ prev = cur;
+ }
+ list_add_tail(&p->list, &cur->list);
+ mon->peer_cnt++;
+ mon_update_neighbors(mon, p);
+ return true;
+}
+
+void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *head;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
+ goto exit;
+ peer->is_up = true;
+ head = peer_head(peer);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *head;
+ struct tipc_mon_domain *dom;
+ int applied;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer) {
+ pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
+ goto exit;
+ }
+ applied = peer->applied;
+ peer->applied = 0;
+ dom = peer->domain;
+ peer->domain = NULL;
+ if (peer->is_head)
+ mon_identify_lost_members(peer, dom, applied);
+ kfree(dom);
+ peer->is_up = false;
+ peer->is_head = false;
+ peer->is_local = false;
+ peer->down_cnt = 0;
+ head = peer_head(peer);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+/* tipc_mon_rcv - process monitor domain event message
+ */
+void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
+ struct tipc_mon_state *state, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_mon_domain *arrv_dom = data;
+ struct tipc_mon_domain dom_bef;
+ struct tipc_mon_domain *dom;
+ struct tipc_peer *peer;
+ u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
+ int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
+ u16 new_gen = ntohs(arrv_dom->gen);
+ u16 acked_gen = ntohs(arrv_dom->ack_gen);
+ bool probing = state->probing;
+ int i, applied_bef;
+
+ state->probing = false;
+ if (!dlen)
+ return;
+
+ /* Sanity check received domain record */
+ if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
+ pr_warn_ratelimited("Received illegal domain record\n");
+ return;
+ }
+
+ /* Synch generation numbers with peer if link just came up */
+ if (!state->synched) {
+ state->peer_gen = new_gen - 1;
+ state->acked_gen = acked_gen;
+ state->synched = true;
+ }
+
+ if (more(acked_gen, state->acked_gen))
+ state->acked_gen = acked_gen;
+
+ /* Drop duplicate unless we are waiting for a probe response */
+ if (!more(new_gen, state->peer_gen) && !probing)
+ return;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer || !peer->is_up)
+ goto exit;
+
+ /* Peer is confirmed, stop any ongoing probing */
+ peer->down_cnt = 0;
+
+ /* Task is done for duplicate record */
+ if (!more(new_gen, state->peer_gen))
+ goto exit;
+
+ state->peer_gen = new_gen;
+
+ /* Cache current domain record for later use */
+ dom_bef.member_cnt = 0;
+ dom = peer->domain;
+ if (dom)
+ memcpy(&dom_bef, dom, dom->len);
+
+ /* Transform and store received domain record */
+ if (!dom || (dom->len < new_dlen)) {
+ kfree(dom);
+ dom = kmalloc(new_dlen, GFP_ATOMIC);
+ peer->domain = dom;
+ if (!dom)
+ goto exit;
+ }
+ dom->len = new_dlen;
+ dom->gen = new_gen;
+ dom->member_cnt = new_member_cnt;
+ dom->up_map = be64_to_cpu(arrv_dom->up_map);
+ for (i = 0; i < new_member_cnt; i++)
+ dom->members[i] = ntohl(arrv_dom->members[i]);
+
+ /* Update peers affected by this domain record */
+ applied_bef = peer->applied;
+ mon_apply_domain(mon, peer);
+ mon_identify_lost_members(peer, &dom_bef, applied_bef);
+ mon_assign_roles(mon, peer_head(peer));
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+void tipc_mon_prep(struct net *net, void *data, int *dlen,
+ struct tipc_mon_state *state, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_mon_domain *dom = data;
+ u16 gen = mon->dom_gen;
+ u16 len;
+
+ if (!tipc_mon_is_active(net, mon))
+ return;
+
+ /* Send only a dummy record with ack if peer has acked our last sent */
+ if (likely(state->acked_gen == gen)) {
+ len = dom_rec_len(dom, 0);
+ *dlen = len;
+ dom->len = htons(len);
+ dom->gen = htons(gen);
+ dom->ack_gen = htons(state->peer_gen);
+ dom->member_cnt = 0;
+ return;
+ }
+ /* Send the full record */
+ read_lock_bh(&mon->lock);
+ len = ntohs(mon->cache.len);
+ *dlen = len;
+ memcpy(data, &mon->cache, len);
+ read_unlock_bh(&mon->lock);
+ dom->ack_gen = htons(state->peer_gen);
+}
+
+void tipc_mon_get_state(struct net *net, u32 addr,
+ struct tipc_mon_state *state,
+ int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *peer;
+
+ /* Used cached state if table has not changed */
+ if (!state->probing &&
+ (state->list_gen == mon->list_gen) &&
+ (state->acked_gen == mon->dom_gen))
+ return;
+
+ read_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (peer) {
+ state->probing = state->acked_gen != mon->dom_gen;
+ state->probing |= peer->down_cnt;
+ state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
+ state->monitoring = peer->is_local;
+ state->monitoring |= peer->is_head;
+ state->list_gen = mon->list_gen;
+ }
+ read_unlock_bh(&mon->lock);
+}
+
+static void mon_timeout(unsigned long m)
+{
+ struct tipc_monitor *mon = (void *)m;
+ struct tipc_peer *self;
+ int best_member_cnt = dom_size(mon->peer_cnt) - 1;
+
+ write_lock_bh(&mon->lock);
+ self = mon->self;
+ if (self && (best_member_cnt != self->applied)) {
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, self);
+ }
+ write_unlock_bh(&mon->lock);
+ mod_timer(&mon->timer, jiffies + mon->timer_intv);
+}
+
+int tipc_mon_create(struct net *net, int bearer_id)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_monitor *mon;
+ struct tipc_peer *self;
+ struct tipc_mon_domain *dom;
+
+ if (tn->monitors[bearer_id])
+ return 0;
+
+ mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
+ self = kzalloc(sizeof(*self), GFP_ATOMIC);
+ dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
+ if (!mon || !self || !dom) {
+ kfree(mon);
+ kfree(self);
+ kfree(dom);
+ return -ENOMEM;
+ }
+ tn->monitors[bearer_id] = mon;
+ rwlock_init(&mon->lock);
+ mon->net = net;
+ mon->peer_cnt = 1;
+ mon->self = self;
+ self->domain = dom;
+ self->addr = tipc_own_addr(net);
+ self->is_up = true;
+ self->is_head = true;
+ INIT_LIST_HEAD(&self->list);
+ setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
+ mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
+ mod_timer(&mon->timer, jiffies + mon->timer_intv);
+ return 0;
+}
+
+void tipc_mon_delete(struct net *net, int bearer_id)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *tmp;
+
+ write_lock_bh(&mon->lock);
+ tn->monitors[bearer_id] = NULL;
+ list_for_each_entry_safe(peer, tmp, &self->list, list) {
+ list_del(&peer->list);
+ hlist_del(&peer->hash);
+ kfree(peer->domain);
+ kfree(peer);
+ }
+ mon->self = NULL;
+ write_unlock_bh(&mon->lock);
+ del_timer_sync(&mon->timer);
+ kfree(self->domain);
+ kfree(self);
+ kfree(mon);
+}
diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h
new file mode 100644
index 000000000000..598459cbed5d
--- /dev/null
+++ b/net/tipc/monitor.h
@@ -0,0 +1,73 @@
+/*
+ * net/tipc/monitor.h
+ *
+ * Copyright (c) 2015, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_MONITOR_H
+#define _TIPC_MONITOR_H
+
+/* struct tipc_mon_state: link instance's cache of monitor list and domain state
+ * @list_gen: current generation of this node's monitor list
+ * @gen: current generation of this node's local domain
+ * @peer_gen: most recent domain generation received from peer
+ * @acked_gen: most recent generation of self's domain acked by peer
+ * @monitoring: this peer endpoint should continuously monitored
+ * @probing: peer endpoint should be temporarily probed for potential loss
+ * @synched: domain record's generation has been synched with peer after reset
+ */
+struct tipc_mon_state {
+ u16 list_gen;
+ u16 peer_gen;
+ u16 acked_gen;
+ bool monitoring :1;
+ bool probing :1;
+ bool reset :1;
+ bool synched :1;
+};
+
+int tipc_mon_create(struct net *net, int bearer_id);
+void tipc_mon_delete(struct net *net, int bearer_id);
+
+void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id);
+void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id);
+void tipc_mon_prep(struct net *net, void *data, int *dlen,
+ struct tipc_mon_state *state, int bearer_id);
+void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
+ struct tipc_mon_state *state, int bearer_id);
+void tipc_mon_get_state(struct net *net, u32 addr,
+ struct tipc_mon_state *state,
+ int bearer_id);
+void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id);
+
+extern const int tipc_max_domain_size;
+#endif
diff --git a/net/tipc/node.c b/net/tipc/node.c
index e01e2c71b5a1..a3fc0a3f4077 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -40,6 +40,7 @@
#include "name_distr.h"
#include "socket.h"
#include "bcast.h"
+#include "monitor.h"
#include "discover.h"
#include "netlink.h"
@@ -205,17 +206,6 @@ u16 tipc_node_get_capabilities(struct net *net, u32 addr)
return caps;
}
-/*
- * A trivial power-of-two bitmask technique is used for speed, since this
- * operation is done for every incoming TIPC packet. The number of hash table
- * entries has been chosen so that no hash chain exceeds 8 nodes and will
- * usually be much smaller (typically only a single node).
- */
-static unsigned int tipc_hashfn(u32 addr)
-{
- return addr & (NODE_HTABLE_SIZE - 1);
-}
-
static void tipc_node_kref_release(struct kref *kref)
{
struct tipc_node *n = container_of(kref, struct tipc_node, kref);
@@ -279,6 +269,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
u32 addr = 0;
u32 flags = n->action_flags;
u32 link_id = 0;
+ u32 bearer_id;
struct list_head *publ_list;
if (likely(!flags)) {
@@ -288,6 +279,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
addr = n->addr;
link_id = n->link_id;
+ bearer_id = link_id & 0xffff;
publ_list = &n->publ_list;
n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
@@ -301,13 +293,16 @@ static void tipc_node_write_unlock(struct tipc_node *n)
if (flags & TIPC_NOTIFY_NODE_UP)
tipc_named_node_up(net, addr);
- if (flags & TIPC_NOTIFY_LINK_UP)
+ if (flags & TIPC_NOTIFY_LINK_UP) {
+ tipc_mon_peer_up(net, addr, bearer_id);
tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
TIPC_NODE_SCOPE, link_id, addr);
-
- if (flags & TIPC_NOTIFY_LINK_DOWN)
+ }
+ if (flags & TIPC_NOTIFY_LINK_DOWN) {
+ tipc_mon_peer_down(net, addr, bearer_id);
tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
link_id, addr);
+ }
}
struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
@@ -378,14 +373,13 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l)
{
unsigned long tol = tipc_link_tolerance(l);
unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4;
- unsigned long keepalive_intv = msecs_to_jiffies(intv);
/* Link with lowest tolerance determines timer interval */
- if (keepalive_intv < n->keepalive_intv)
- n->keepalive_intv = keepalive_intv;
+ if (intv < n->keepalive_intv)
+ n->keepalive_intv = intv;
- /* Ensure link's abort limit corresponds to current interval */
- tipc_link_set_abort_limit(l, tol / jiffies_to_msecs(n->keepalive_intv));
+ /* Ensure link's abort limit corresponds to current tolerance */
+ tipc_link_set_abort_limit(l, tol / n->keepalive_intv);
}
static void tipc_node_delete(struct tipc_node *node)
@@ -526,7 +520,7 @@ static void tipc_node_timeout(unsigned long data)
if (rc & TIPC_LINK_DOWN_EVT)
tipc_node_link_down(n, bearer_id, false);
}
- mod_timer(&n->timer, jiffies + n->keepalive_intv);
+ mod_timer(&n->timer, jiffies + msecs_to_jiffies(n->keepalive_intv));
}
/**
@@ -692,6 +686,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
struct tipc_link *l = le->link;
struct tipc_media_addr *maddr;
struct sk_buff_head xmitq;
+ int old_bearer_id = bearer_id;
if (!l)
return;
@@ -711,6 +706,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
tipc_link_fsm_evt(l, LINK_RESET_EVT);
}
tipc_node_write_unlock(n);
+ if (delete)
+ tipc_mon_remove_peer(n->net, n->addr, old_bearer_id);
tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
tipc_sk_rcv(n->net, &le->inputq);
}
@@ -735,6 +732,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
bool accept_addr = false;
bool reset = true;
char *if_name;
+ unsigned long intv;
*dupl_addr = false;
*respond = false;
@@ -840,9 +838,11 @@ void tipc_node_check_dest(struct net *net, u32 onode,
le->link = l;
n->link_cnt++;
tipc_node_calculate_timer(n, l);
- if (n->link_cnt == 1)
- if (!mod_timer(&n->timer, jiffies + n->keepalive_intv))
+ if (n->link_cnt == 1) {
+ intv = jiffies + msecs_to_jiffies(n->keepalive_intv);
+ if (!mod_timer(&n->timer, intv))
tipc_node_get(n);
+ }
}
memcpy(&le->maddr, maddr, sizeof(*maddr));
exit:
@@ -950,7 +950,7 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
state = SELF_UP_PEER_UP;
break;
case SELF_LOST_CONTACT_EVT:
- state = SELF_DOWN_PEER_LEAVING;
+ state = SELF_DOWN_PEER_DOWN;
break;
case SELF_ESTABL_CONTACT_EVT:
case PEER_LOST_CONTACT_EVT:
@@ -969,7 +969,7 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
state = SELF_UP_PEER_UP;
break;
case PEER_LOST_CONTACT_EVT:
- state = SELF_LEAVING_PEER_DOWN;
+ state = SELF_DOWN_PEER_DOWN;
break;
case SELF_LOST_CONTACT_EVT:
case PEER_ESTABL_CONTACT_EVT:
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 272d20a795d5..215849ce453d 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -418,13 +418,12 @@ static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
if (!entry)
return NULL;
- buf = kmalloc(len, GFP_ATOMIC);
+ buf = kmemdup(data, len, GFP_ATOMIC);
if (!buf) {
kfree(entry);
return NULL;
}
- memcpy(buf, data, len);
entry->iov.iov_base = buf;
entry->iov.iov_len = len;
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index c9cf2be3674a..b016c011970b 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -63,7 +63,7 @@
*/
struct udp_media_addr {
__be16 proto;
- __be16 udp_port;
+ __be16 port;
union {
struct in_addr ipv4;
struct in6_addr ipv6;
@@ -108,9 +108,9 @@ static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size)
struct udp_media_addr *ua = (struct udp_media_addr *)&a->value;
if (ntohs(ua->proto) == ETH_P_IP)
- snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port));
+ snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port));
else if (ntohs(ua->proto) == ETH_P_IPV6)
- snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port));
+ snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port));
else
pr_err("Invalid UDP media address\n");
return 0;
@@ -178,8 +178,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
skb->dev = rt->dst.dev;
ttl = ip4_dst_hoplimit(&rt->dst);
udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
- dst->ipv4.s_addr, 0, ttl, 0, src->udp_port,
- dst->udp_port, false, true);
+ dst->ipv4.s_addr, 0, ttl, 0, src->port,
+ dst->port, false, true);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct dst_entry *ndst;
@@ -196,8 +196,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
ttl = ip6_dst_hoplimit(ndst);
err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
ndst->dev, &src->ipv6,
- &dst->ipv6, 0, ttl, 0, src->udp_port,
- dst->udp_port, false);
+ &dst->ipv6, 0, ttl, 0, src->port,
+ dst->port, false);
#endif
}
return err;
@@ -292,12 +292,12 @@ err:
ip4 = (struct sockaddr_in *)&sa_local;
local->proto = htons(ETH_P_IP);
- local->udp_port = ip4->sin_port;
+ local->port = ip4->sin_port;
local->ipv4.s_addr = ip4->sin_addr.s_addr;
ip4 = (struct sockaddr_in *)&sa_remote;
remote->proto = htons(ETH_P_IP);
- remote->udp_port = ip4->sin_port;
+ remote->port = ip4->sin_port;
remote->ipv4.s_addr = ip4->sin_addr.s_addr;
return 0;
@@ -312,13 +312,13 @@ err:
return -EINVAL;
local->proto = htons(ETH_P_IPV6);
- local->udp_port = ip6->sin6_port;
+ local->port = ip6->sin6_port;
memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
ub->ifindex = ip6->sin6_scope_id;
ip6 = (struct sockaddr_in6 *)&sa_remote;
remote->proto = htons(ETH_P_IPV6);
- remote->udp_port = ip6->sin6_port;
+ remote->port = ip6->sin6_port;
memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
return 0;
#endif
@@ -386,7 +386,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
err = -EAFNOSUPPORT;
goto err;
}
- udp_conf.local_udp_port = local.udp_port;
+ udp_conf.local_udp_port = local.port;
err = udp_sock_create(net, &udp_conf, &ub->ubsock);
if (err)
goto err;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index ecca3896b9f7..39d9abd309ea 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -748,6 +748,36 @@ int wiphy_register(struct wiphy *wiphy)
nl80211_send_reg_change_event(&request);
}
+ /* Check that nobody globally advertises any capabilities they do not
+ * advertise on all possible interface types.
+ */
+ if (wiphy->extended_capabilities_len &&
+ wiphy->num_iftype_ext_capab &&
+ wiphy->iftype_ext_capab) {
+ u8 supported_on_all, j;
+ const struct wiphy_iftype_ext_capab *capab;
+
+ capab = wiphy->iftype_ext_capab;
+ for (j = 0; j < wiphy->extended_capabilities_len; j++) {
+ if (capab[0].extended_capabilities_len > j)
+ supported_on_all =
+ capab[0].extended_capabilities[j];
+ else
+ supported_on_all = 0x00;
+ for (i = 1; i < wiphy->num_iftype_ext_capab; i++) {
+ if (j >= capab[i].extended_capabilities_len) {
+ supported_on_all = 0x00;
+ break;
+ }
+ supported_on_all &=
+ capab[i].extended_capabilities[j];
+ }
+ if (WARN_ON(wiphy->extended_capabilities[j] &
+ ~supported_on_all))
+ break;
+ }
+ }
+
rdev->wiphy.registered = true;
rtnl_unlock();
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 025b7a5d508b..a4d547f99f8d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -214,7 +214,7 @@ struct cfg80211_event {
size_t req_ie_len;
size_t resp_ie_len;
struct cfg80211_bss *bss;
- u16 status;
+ int status; /* -1 = failed; 0..65535 = status code */
} cr;
struct {
const u8 *req_ie;
@@ -374,7 +374,7 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
const u8 *req_ie, size_t req_ie_len,
const u8 *resp_ie, size_t resp_ie_len,
- u16 status, bool wextev,
+ int status, bool wextev,
struct cfg80211_bss *bss);
void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
size_t ie_len, u16 reason, bool from_ap);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d7599014055d..c503e96bfd5a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -167,6 +167,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs)
if (attrs[NL80211_ATTR_IFINDEX]) {
int ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]);
+
netdev = __dev_get_by_index(netns, ifindex);
if (netdev) {
if (netdev->ieee80211_ptr)
@@ -731,6 +732,7 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
if (tb[NL80211_KEY_DEFAULT_TYPES]) {
struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
+
err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1,
tb[NL80211_KEY_DEFAULT_TYPES],
nl80211_key_default_policy);
@@ -1264,7 +1266,7 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg,
struct nl80211_dump_wiphy_state {
s64 filter_wiphy;
long start;
- long split_start, band_start, chan_start;
+ long split_start, band_start, chan_start, capa_start;
bool split;
};
@@ -1382,6 +1384,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
rdev->ops->get_antenna) {
u32 tx_ant = 0, rx_ant = 0;
int res;
+
res = rdev_get_antenna(rdev, &tx_ant, &rx_ant);
if (!res) {
if (nla_put_u32(msg,
@@ -1761,6 +1764,47 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
nla_nest_end(msg, nested);
}
+ state->split_start++;
+ break;
+ case 13:
+ if (rdev->wiphy.num_iftype_ext_capab &&
+ rdev->wiphy.iftype_ext_capab) {
+ struct nlattr *nested_ext_capab, *nested;
+
+ nested = nla_nest_start(msg,
+ NL80211_ATTR_IFTYPE_EXT_CAPA);
+ if (!nested)
+ goto nla_put_failure;
+
+ for (i = state->capa_start;
+ i < rdev->wiphy.num_iftype_ext_capab; i++) {
+ const struct wiphy_iftype_ext_capab *capab;
+
+ capab = &rdev->wiphy.iftype_ext_capab[i];
+
+ nested_ext_capab = nla_nest_start(msg, i);
+ if (!nested_ext_capab ||
+ nla_put_u32(msg, NL80211_ATTR_IFTYPE,
+ capab->iftype) ||
+ nla_put(msg, NL80211_ATTR_EXT_CAPA,
+ capab->extended_capabilities_len,
+ capab->extended_capabilities) ||
+ nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK,
+ capab->extended_capabilities_len,
+ capab->extended_capabilities_mask))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nested_ext_capab);
+ if (state->split)
+ break;
+ }
+ nla_nest_end(msg, nested);
+ if (i < rdev->wiphy.num_iftype_ext_capab) {
+ state->capa_start = i + 1;
+ break;
+ }
+ }
+
/* done */
state->split_start = 0;
break;
@@ -2116,7 +2160,6 @@ static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info)
return rdev_set_wds_peer(rdev, dev, bssid);
}
-
static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev;
@@ -2251,6 +2294,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] &&
info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) {
u32 tx_ant, rx_ant;
+
if ((!rdev->wiphy.available_antennas_tx &&
!rdev->wiphy.available_antennas_rx) ||
!rdev->ops->set_antenna)
@@ -2919,6 +2963,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
pairwise = !!mac_addr;
if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
u32 kt = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
+
if (kt >= NUM_NL80211_KEYTYPES)
return -EINVAL;
if (kt != NL80211_KEYTYPE_GROUP &&
@@ -3962,7 +4007,6 @@ static int nl80211_dump_station(struct sk_buff *skb,
sta_idx++;
}
-
out:
cb->args[2] = sta_idx;
err = skb->len;
@@ -4763,7 +4807,6 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
path_idx++;
}
-
out:
cb->args[2] = path_idx;
err = skb->len;
@@ -5053,7 +5096,6 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
enum nl80211_user_reg_hint_type user_reg_hint_type;
u32 owner_nlportid;
-
/*
* You should only get this when cfg80211 hasn't yet initialized
* completely when built-in to the kernel right between the time
@@ -5262,7 +5304,6 @@ do { \
} \
} while (0)
-
if (!info->attrs[NL80211_ATTR_MESH_CONFIG])
return -EINVAL;
if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX,
@@ -5409,7 +5450,6 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
IEEE80211_PATH_METRIC_VENDOR :
IEEE80211_PATH_METRIC_AIRTIME;
-
if (tb[NL80211_MESH_SETUP_IE]) {
struct nlattr *ieattr =
tb[NL80211_MESH_SETUP_IE];
@@ -5796,10 +5836,8 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
}
}
- r = set_regdom(rd, REGD_SOURCE_CRDA);
- /* set_regdom took ownership */
- rd = NULL;
-
+ /* set_regdom takes ownership of rd */
+ return set_regdom(rd, REGD_SOURCE_CRDA);
bad_reg:
kfree(rd);
return r;
@@ -6033,6 +6071,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
/* all channels */
for (band = 0; band < NUM_NL80211_BANDS; band++) {
int j;
+
if (!wiphy->bands[band])
continue;
for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
@@ -6442,6 +6481,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
/* all channels */
for (band = 0; band < NUM_NL80211_BANDS; band++) {
int j;
+
if (!wiphy->bands[band])
continue;
for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
@@ -6511,7 +6551,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
nla_data(ssid), nla_len(ssid));
request->match_sets[i].ssid.ssid_len =
nla_len(ssid);
- /* special attribute - old implemenation w/a */
+ /* special attribute - old implementation w/a */
request->match_sets[i].rssi_thold =
default_match_rssi;
rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI];
@@ -7204,6 +7244,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
if (key.idx >= 0) {
int i;
bool ok = false;
+
for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) {
if (key.p.cipher == rdev->wiphy.cipher_suites[i]) {
ok = true;
@@ -7282,6 +7323,7 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) {
u16 proto;
+
proto = nla_get_u16(
info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]);
settings->control_port_ethertype = cpu_to_be16(proto);
@@ -8435,6 +8477,7 @@ static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
for (i = 0; i < rates_len; i++) {
int rate = (rates[i] & 0x7f) * 5;
int ridx;
+
for (ridx = 0; ridx < sband->n_bitrates; ridx++) {
struct ieee80211_rate *srate =
&sband->bitrates[ridx];
@@ -8743,7 +8786,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
if (params.wait < NL80211_MIN_REMAIN_ON_CHANNEL_TIME ||
params.wait > rdev->wiphy.max_remain_on_channel_duration)
return -EINVAL;
-
}
params.offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK];
@@ -10590,7 +10632,6 @@ int cfg80211_vendor_cmd_reply(struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_reply);
-
static int nl80211_set_qos_map(struct sk_buff *skb,
struct genl_info *info)
{
@@ -10945,7 +10986,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_WIPHY,
.doit = nl80211_set_wiphy,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_RTNL,
},
{
@@ -10961,7 +11002,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_INTERFACE,
.doit = nl80211_set_interface,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -10969,7 +11010,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_NEW_INTERFACE,
.doit = nl80211_new_interface,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
NL80211_FLAG_NEED_RTNL,
},
@@ -10977,7 +11018,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_DEL_INTERFACE,
.doit = nl80211_del_interface,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -10985,7 +11026,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_GET_KEY,
.doit = nl80211_get_key,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -10993,7 +11034,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_KEY,
.doit = nl80211_set_key,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL |
NL80211_FLAG_CLEAR_SKB,
@@ -11002,7 +11043,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_NEW_KEY,
.doit = nl80211_new_key,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL |
NL80211_FLAG_CLEAR_SKB,
@@ -11011,14 +11052,14 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_DEL_KEY,
.doit = nl80211_del_key,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
{
.cmd = NL80211_CMD_SET_BEACON,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_set_beacon,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
@@ -11026,7 +11067,7 @@ static const struct genl_ops nl80211_ops[] = {
{
.cmd = NL80211_CMD_START_AP,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_start_ap,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
@@ -11034,7 +11075,7 @@ static const struct genl_ops nl80211_ops[] = {
{
.cmd = NL80211_CMD_STOP_AP,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_stop_ap,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
@@ -11051,7 +11092,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_STATION,
.doit = nl80211_set_station,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11059,7 +11100,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_NEW_STATION,
.doit = nl80211_new_station,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11067,7 +11108,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_DEL_STATION,
.doit = nl80211_del_station,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11076,7 +11117,7 @@ static const struct genl_ops nl80211_ops[] = {
.doit = nl80211_get_mpath,
.dumpit = nl80211_dump_mpath,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11085,7 +11126,7 @@ static const struct genl_ops nl80211_ops[] = {
.doit = nl80211_get_mpp,
.dumpit = nl80211_dump_mpp,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11093,7 +11134,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_MPATH,
.doit = nl80211_set_mpath,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11101,7 +11142,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_NEW_MPATH,
.doit = nl80211_new_mpath,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11109,7 +11150,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_DEL_MPATH,
.doit = nl80211_del_mpath,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11117,7 +11158,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_BSS,
.doit = nl80211_set_bss,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11156,7 +11197,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_MESH_CONFIG,
.doit = nl80211_update_mesh_config,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11164,7 +11205,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_TRIGGER_SCAN,
.doit = nl80211_trigger_scan,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11172,7 +11213,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_ABORT_SCAN,
.doit = nl80211_abort_scan,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11185,7 +11226,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_START_SCHED_SCAN,
.doit = nl80211_start_sched_scan,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11193,7 +11234,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_STOP_SCHED_SCAN,
.doit = nl80211_stop_sched_scan,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11201,7 +11242,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_AUTHENTICATE,
.doit = nl80211_authenticate,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL |
NL80211_FLAG_CLEAR_SKB,
@@ -11210,7 +11251,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_ASSOCIATE,
.doit = nl80211_associate,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11218,7 +11259,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_DEAUTHENTICATE,
.doit = nl80211_deauthenticate,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11226,7 +11267,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_DISASSOCIATE,
.doit = nl80211_disassociate,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11234,7 +11275,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_JOIN_IBSS,
.doit = nl80211_join_ibss,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11242,7 +11283,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_LEAVE_IBSS,
.doit = nl80211_leave_ibss,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11252,7 +11293,7 @@ static const struct genl_ops nl80211_ops[] = {
.doit = nl80211_testmode_do,
.dumpit = nl80211_testmode_dump,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
NL80211_FLAG_NEED_RTNL,
},
@@ -11261,7 +11302,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_CONNECT,
.doit = nl80211_connect,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11269,7 +11310,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_DISCONNECT,
.doit = nl80211_disconnect,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11277,7 +11318,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_WIPHY_NETNS,
.doit = nl80211_wiphy_netns,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
NL80211_FLAG_NEED_RTNL,
},
@@ -11290,7 +11331,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_PMKSA,
.doit = nl80211_setdel_pmksa,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11298,7 +11339,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_DEL_PMKSA,
.doit = nl80211_setdel_pmksa,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11306,7 +11347,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_FLUSH_PMKSA,
.doit = nl80211_flush_pmksa,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11314,7 +11355,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_REMAIN_ON_CHANNEL,
.doit = nl80211_remain_on_channel,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11322,7 +11363,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
.doit = nl80211_cancel_remain_on_channel,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11330,7 +11371,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_TX_BITRATE_MASK,
.doit = nl80211_set_tx_bitrate_mask,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -11338,7 +11379,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_REGISTER_FRAME,
.doit = nl80211_register_mgmt,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -11346,7 +11387,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_FRAME,
.doit = nl80211_tx_mgmt,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11354,7 +11395,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_FRAME_WAIT_CANCEL,
.doit = nl80211_tx_mgmt_cancel_wait,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11362,7 +11403,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_POWER_SAVE,
.doit = nl80211_set_power_save,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -11378,7 +11419,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_CQM,
.doit = nl80211_set_cqm,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -11386,7 +11427,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_CHANNEL,
.doit = nl80211_set_channel,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -11394,7 +11435,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_WDS_PEER,
.doit = nl80211_set_wds_peer,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -11402,7 +11443,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_JOIN_MESH,
.doit = nl80211_join_mesh,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11410,7 +11451,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_LEAVE_MESH,
.doit = nl80211_leave_mesh,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11418,7 +11459,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_JOIN_OCB,
.doit = nl80211_join_ocb,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11426,7 +11467,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_LEAVE_OCB,
.doit = nl80211_leave_ocb,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11443,7 +11484,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_WOWLAN,
.doit = nl80211_set_wowlan,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
NL80211_FLAG_NEED_RTNL,
},
@@ -11452,7 +11493,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_REKEY_OFFLOAD,
.doit = nl80211_set_rekey_data,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL |
NL80211_FLAG_CLEAR_SKB,
@@ -11461,7 +11502,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_TDLS_MGMT,
.doit = nl80211_tdls_mgmt,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11469,7 +11510,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_TDLS_OPER,
.doit = nl80211_tdls_oper,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11477,7 +11518,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_UNEXPECTED_FRAME,
.doit = nl80211_register_unexpected_frame,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -11485,7 +11526,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_PROBE_CLIENT,
.doit = nl80211_probe_client,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11493,7 +11534,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_REGISTER_BEACONS,
.doit = nl80211_register_beacons,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
NL80211_FLAG_NEED_RTNL,
},
@@ -11501,7 +11542,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_NOACK_MAP,
.doit = nl80211_set_noack_map,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -11509,7 +11550,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_START_P2P_DEVICE,
.doit = nl80211_start_p2p_device,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -11517,7 +11558,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_STOP_P2P_DEVICE,
.doit = nl80211_stop_p2p_device,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11525,7 +11566,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_MCAST_RATE,
.doit = nl80211_set_mcast_rate,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -11533,7 +11574,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_MAC_ACL,
.doit = nl80211_set_mac_acl,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
@@ -11541,7 +11582,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_RADAR_DETECT,
.doit = nl80211_start_radar_detection,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11554,7 +11595,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_UPDATE_FT_IES,
.doit = nl80211_update_ft_ies,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11562,7 +11603,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_CRIT_PROTOCOL_START,
.doit = nl80211_crit_protocol_start,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11570,7 +11611,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_CRIT_PROTOCOL_STOP,
.doit = nl80211_crit_protocol_stop,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11585,7 +11626,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_COALESCE,
.doit = nl80211_set_coalesce,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
NL80211_FLAG_NEED_RTNL,
},
@@ -11593,7 +11634,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_CHANNEL_SWITCH,
.doit = nl80211_channel_switch,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11602,7 +11643,7 @@ static const struct genl_ops nl80211_ops[] = {
.doit = nl80211_vendor_cmd,
.dumpit = nl80211_vendor_cmd_dump,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
NL80211_FLAG_NEED_RTNL,
},
@@ -11610,7 +11651,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_SET_QOS_MAP,
.doit = nl80211_set_qos_map,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11618,7 +11659,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_ADD_TX_TS,
.doit = nl80211_add_tx_ts,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11626,7 +11667,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_DEL_TX_TS,
.doit = nl80211_del_tx_ts,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11634,7 +11675,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH,
.doit = nl80211_tdls_channel_switch,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -11642,7 +11683,7 @@ static const struct genl_ops nl80211_ops[] = {
.cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH,
.doit = nl80211_tdls_cancel_channel_switch,
.policy = nl80211_policy,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
@@ -12092,7 +12133,7 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *bssid,
const u8 *req_ie, size_t req_ie_len,
const u8 *resp_ie, size_t resp_ie_len,
- u16 status, gfp_t gfp)
+ int status, gfp_t gfp)
{
struct sk_buff *msg;
void *hdr;
@@ -12110,7 +12151,10 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
(bssid && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) ||
- nla_put_u16(msg, NL80211_ATTR_STATUS_CODE, status) ||
+ nla_put_u16(msg, NL80211_ATTR_STATUS_CODE,
+ status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE :
+ status) ||
+ (status < 0 && nla_put_flag(msg, NL80211_ATTR_TIMED_OUT)) ||
(req_ie &&
nla_put(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie)) ||
(resp_ie &&
@@ -12126,7 +12170,6 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
nla_put_failure:
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
-
}
void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
@@ -12165,7 +12208,6 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
nla_put_failure:
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
-
}
void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
@@ -12203,7 +12245,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
nla_put_failure:
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
-
}
void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
@@ -13545,7 +13586,6 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
if (hdr)
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
-
}
EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 84d4edf1d545..a63f402b10b7 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -55,7 +55,7 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *bssid,
const u8 *req_ie, size_t req_ie_len,
const u8 *resp_ie, size_t resp_ie_len,
- u16 status, gfp_t gfp);
+ int status, gfp_t gfp);
void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *bssid,
const u8 *req_ie, size_t req_ie_len,
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 584fdc347221..add6824c44fd 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -244,9 +244,7 @@ void cfg80211_conn_work(struct work_struct *work)
if (cfg80211_conn_do_work(wdev)) {
__cfg80211_connect_result(
wdev->netdev, bssid,
- NULL, 0, NULL, 0,
- WLAN_STATUS_UNSPECIFIED_FAILURE,
- false, NULL);
+ NULL, 0, NULL, 0, -1, false, NULL);
}
wdev_unlock(wdev);
}
@@ -648,7 +646,7 @@ static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
const u8 *req_ie, size_t req_ie_len,
const u8 *resp_ie, size_t resp_ie_len,
- u16 status, bool wextev,
+ int status, bool wextev,
struct cfg80211_bss *bss)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -757,7 +755,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid,
struct cfg80211_bss *bss, const u8 *req_ie,
size_t req_ie_len, const u8 *resp_ie,
- size_t resp_ie_len, u16 status, gfp_t gfp)
+ size_t resp_ie_len, int status, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);