summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c5
-rw-r--r--net/9p/trans_fd.c6
-rw-r--r--net/9p/trans_virtio.c13
-rw-r--r--net/9p/trans_xen.c4
-rw-r--r--net/ceph/ceph_hash.c12
-rw-r--r--net/ceph/crypto.c4
-rw-r--r--net/ceph/messenger.c1
-rw-r--r--net/ceph/mon_client.c5
-rw-r--r--net/ceph/pagevec.c4
-rw-r--r--net/core/dev.c17
-rw-r--r--net/core/filter.c4
-rw-r--r--net/ipv4/af_inet.c12
-rw-r--r--net/ipv4/route.c14
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_output.c8
-rw-r--r--net/ipv4/udp_offload.c49
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/output_core.c31
-rw-r--r--net/ipv6/route.c7
-rw-r--r--net/ipv6/udp_offload.c85
-rw-r--r--net/mac80211/agg-rx.c41
-rw-r--r--net/mac80211/agg-tx.c49
-rw-r--r--net/mac80211/ibss.c7
-rw-r--r--net/mac80211/ieee80211_i.h3
-rw-r--r--net/mac80211/led.c11
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mesh.c27
-rw-r--r--net/mac80211/mesh.h2
-rw-r--r--net/mac80211/mesh_hwmp.c4
-rw-r--r--net/mac80211/mesh_pathtbl.c3
-rw-r--r--net/mac80211/mlme.c32
-rw-r--r--net/mac80211/ocb.c10
-rw-r--r--net/mac80211/sta_info.c15
-rw-r--r--net/mac80211/sta_info.h12
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netlabel/netlabel_addrlist.h4
-rw-r--r--net/nfc/netlink.c6
-rw-r--r--net/openvswitch/datapath.c14
-rw-r--r--net/openvswitch/flow.c6
-rw-r--r--net/sched/act_csum.c6
-rw-r--r--net/sched/cls_api.c7
-rw-r--r--net/sched/cls_bpf.c8
-rw-r--r--net/sctp/ipv6.c5
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/socket.c67
-rw-r--r--net/sctp/stream.c32
-rw-r--r--net/smc/smc_core.c4
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c14
-rw-r--r--net/sunrpc/clnt.c14
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--net/sunrpc/rpcb_clnt.c6
-rw-r--r--net/sunrpc/sched.c3
-rw-r--r--net/sunrpc/sunrpc_syms.c3
-rw-r--r--net/sunrpc/svc_xprt.c106
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c19
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c27
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c363
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c11
-rw-r--r--net/sunrpc/xprtrdma/transport.c19
-rw-r--r--net/sunrpc/xprtrdma/verbs.c236
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h119
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/tipc/group.c2
-rw-r--r--net/tipc/msg.c24
-rw-r--r--net/tipc/msg.h7
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/wireless/nl80211.c30
-rw-r--r--net/wireless/reg.c42
-rw-r--r--net/xfrm/xfrm_policy.c30
72 files changed, 1132 insertions, 637 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 4674235b0d9b..b433aff5ff13 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -82,7 +82,7 @@ int p9_show_client_options(struct seq_file *m, struct p9_client *clnt)
{
if (clnt->msize != 8192)
seq_printf(m, ",msize=%u", clnt->msize);
- seq_printf(m, "trans=%s", clnt->trans_mod->name);
+ seq_printf(m, ",trans=%s", clnt->trans_mod->name);
switch (clnt->proto_version) {
case p9_proto_legacy:
@@ -773,8 +773,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
}
again:
/* Wait for the response */
- err = wait_event_interruptible(*req->wq,
- req->status >= REQ_STATUS_RCVD);
+ err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD);
/*
* Make sure our req is coherent with regard to updates in other
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 903a190319b9..985046ae4231 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -724,12 +724,12 @@ static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt)
{
if (clnt->trans_mod == &p9_tcp_trans) {
if (clnt->trans_opts.tcp.port != P9_PORT)
- seq_printf(m, "port=%u", clnt->trans_opts.tcp.port);
+ seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port);
} else if (clnt->trans_mod == &p9_fd_trans) {
if (clnt->trans_opts.fd.rfd != ~0)
- seq_printf(m, "rfd=%u", clnt->trans_opts.fd.rfd);
+ seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd);
if (clnt->trans_opts.fd.wfd != ~0)
- seq_printf(m, "wfd=%u", clnt->trans_opts.fd.wfd);
+ seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd);
}
return 0;
}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index f24b25c25106..f3a4efcf1456 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -286,8 +286,8 @@ req_retry:
if (err == -ENOSPC) {
chan->ring_bufs_avail = 0;
spin_unlock_irqrestore(&chan->lock, flags);
- err = wait_event_interruptible(*chan->vc_wq,
- chan->ring_bufs_avail);
+ err = wait_event_killable(*chan->vc_wq,
+ chan->ring_bufs_avail);
if (err == -ERESTARTSYS)
return err;
@@ -327,7 +327,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
* Other zc request to finish here
*/
if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
- err = wait_event_interruptible(vp_wq,
+ err = wait_event_killable(vp_wq,
(atomic_read(&vp_pinned) < chan->p9_max_pages));
if (err == -ERESTARTSYS)
return err;
@@ -471,8 +471,8 @@ req_retry_pinned:
if (err == -ENOSPC) {
chan->ring_bufs_avail = 0;
spin_unlock_irqrestore(&chan->lock, flags);
- err = wait_event_interruptible(*chan->vc_wq,
- chan->ring_bufs_avail);
+ err = wait_event_killable(*chan->vc_wq,
+ chan->ring_bufs_avail);
if (err == -ERESTARTSYS)
goto err_out;
@@ -489,8 +489,7 @@ req_retry_pinned:
virtqueue_kick(chan->vq);
spin_unlock_irqrestore(&chan->lock, flags);
p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
- err = wait_event_interruptible(*req->wq,
- req->status >= REQ_STATUS_RCVD);
+ err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD);
/*
* Non kernel buffers are pinned, unpin them
*/
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 6ad3e043c617..325c56043007 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -156,8 +156,8 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
ring = &priv->rings[num];
again:
- while (wait_event_interruptible(ring->wq,
- p9_xen_write_todo(ring, size)) != 0)
+ while (wait_event_killable(ring->wq,
+ p9_xen_write_todo(ring, size)) != 0)
;
spin_lock_irqsave(&ring->lock, flags);
diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c
index 67bb1f11e613..9a5850f264ed 100644
--- a/net/ceph/ceph_hash.c
+++ b/net/ceph/ceph_hash.c
@@ -47,28 +47,38 @@ unsigned int ceph_str_hash_rjenkins(const char *str, unsigned int length)
/* handle the last 11 bytes */
c = c + length;
- switch (len) { /* all the case statements fall through */
+ switch (len) {
case 11:
c = c + ((__u32)k[10] << 24);
+ /* fall through */
case 10:
c = c + ((__u32)k[9] << 16);
+ /* fall through */
case 9:
c = c + ((__u32)k[8] << 8);
/* the first byte of c is reserved for the length */
+ /* fall through */
case 8:
b = b + ((__u32)k[7] << 24);
+ /* fall through */
case 7:
b = b + ((__u32)k[6] << 16);
+ /* fall through */
case 6:
b = b + ((__u32)k[5] << 8);
+ /* fall through */
case 5:
b = b + k[4];
+ /* fall through */
case 4:
a = a + ((__u32)k[3] << 24);
+ /* fall through */
case 3:
a = a + ((__u32)k[2] << 16);
+ /* fall through */
case 2:
a = a + ((__u32)k[1] << 8);
+ /* fall through */
case 1:
a = a + k[0];
/* case 0: nothing left to add */
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 489610ac1cdd..bf9d079cbafd 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -37,7 +37,9 @@ static int set_secret(struct ceph_crypto_key *key, void *buf)
return -ENOTSUPP;
}
- WARN_ON(!key->len);
+ if (!key->len)
+ return -EINVAL;
+
key->key = kmemdup(buf, key->len, GFP_NOIO);
if (!key->key) {
ret = -ENOMEM;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index ad93342c90d7..8a4d3758030b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -430,6 +430,7 @@ static void ceph_sock_state_change(struct sock *sk)
switch (sk->sk_state) {
case TCP_CLOSE:
dout("%s TCP_CLOSE\n", __func__);
+ /* fall through */
case TCP_CLOSE_WAIT:
dout("%s TCP_CLOSE_WAIT\n", __func__);
con_sock_state_closing(con);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 9ae1bab8c05d..1547107f4854 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1279,9 +1279,10 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
/*
* Older OSDs don't set reply tid even if the orignal
- * request had a non-zero tid. Workaround this weirdness
- * by falling through to the allocate case.
+ * request had a non-zero tid. Work around this weirdness
+ * by allocating a new message.
*/
+ /* fall through */
case CEPH_MSG_MON_MAP:
case CEPH_MSG_MDS_MAP:
case CEPH_MSG_OSD_MAP:
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index ee43bc13221c..a3d0adc828e6 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -25,9 +25,9 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
return ERR_PTR(-ENOMEM);
while (got < num_pages) {
- rc = get_user_pages_unlocked(
+ rc = get_user_pages_fast(
(unsigned long)data + ((unsigned long)got * PAGE_SIZE),
- num_pages - got, pages + got, write_page ? FOLL_WRITE : 0);
+ num_pages - got, write_page, pages + got);
if (rc < 0)
break;
BUG_ON(rc == 0);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ee29f4f5fa9..07ed21d64f92 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2746,7 +2746,8 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
{
if (tx_path)
- return skb->ip_summed != CHECKSUM_PARTIAL;
+ return skb->ip_summed != CHECKSUM_PARTIAL &&
+ skb->ip_summed != CHECKSUM_UNNECESSARY;
return skb->ip_summed == CHECKSUM_NONE;
}
@@ -7139,13 +7140,17 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
__dev_xdp_attached(dev, bpf_op, NULL))
return -EBUSY;
- if (bpf_op == ops->ndo_bpf)
- prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
- dev);
- else
- prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+ bpf_op == ops->ndo_bpf);
if (IS_ERR(prog))
return PTR_ERR(prog);
+
+ if (!(flags & XDP_FLAGS_HW_MODE) &&
+ bpf_prog_is_dev_bound(prog->aux)) {
+ NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported");
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
}
err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
diff --git a/net/core/filter.c b/net/core/filter.c
index 1afa17935954..6a85e67fafce 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1646,9 +1646,9 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_MEM,
+ .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
- .arg3_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
.arg4_type = ARG_CONST_SIZE_OR_ZERO,
.arg5_type = ARG_ANYTHING,
};
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ce4aa827be05..f00499a46927 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1223,9 +1223,10 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
struct sk_buff *inet_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
- bool fixedid = false, gso_partial, encap;
+ bool udpfrag = false, fixedid = false, gso_partial, encap;
struct sk_buff *segs = ERR_PTR(-EINVAL);
const struct net_offload *ops;
+ unsigned int offset = 0;
struct iphdr *iph;
int proto, tot_len;
int nhoff;
@@ -1260,6 +1261,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
segs = ERR_PTR(-EPROTONOSUPPORT);
if (!skb->encapsulation || encap) {
+ udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
/* fixed ID is invalid if DF bit is not set */
@@ -1279,7 +1281,13 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
skb = segs;
do {
iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
- if (skb_is_gso(skb)) {
+ if (udpfrag) {
+ iph->frag_off = htons(offset >> 3);
+ if (skb->next)
+ iph->frag_off |= htons(IP_MF);
+ offset += skb->len - nhoff - ihl;
+ tot_len = skb->len - nhoff;
+ } else if (skb_is_gso(skb)) {
if (!fixedid) {
iph->id = htons(id);
id += skb_shinfo(skb)->gso_segs;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3b427757b1f8..43b69af242e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -651,9 +651,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
struct rtable *rt;
+ u32 genid, hval;
unsigned int i;
int depth;
- u32 hval = fnhe_hashfun(daddr);
+
+ genid = fnhe_genid(dev_net(nh->nh_dev));
+ hval = fnhe_hashfun(daddr);
spin_lock_bh(&fnhe_lock);
@@ -676,12 +679,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
}
if (fnhe) {
+ if (fnhe->fnhe_genid != genid)
+ fnhe->fnhe_genid = genid;
if (gw)
fnhe->fnhe_gw = gw;
- if (pmtu) {
+ if (pmtu)
fnhe->fnhe_pmtu = pmtu;
- fnhe->fnhe_expires = max(1UL, expires);
- }
+ fnhe->fnhe_expires = max(1UL, expires);
/* Update all cached dsts too */
rt = rcu_dereference(fnhe->fnhe_rth_input);
if (rt)
@@ -700,7 +704,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_next = hash->chain;
rcu_assign_pointer(hash->chain, fnhe);
}
- fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
+ fnhe->fnhe_genid = genid;
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f844c06c0676..734cfc8ff76e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2964,7 +2964,7 @@ void tcp_rearm_rto(struct sock *sk)
/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
static void tcp_set_xmit_timer(struct sock *sk)
{
- if (!tcp_schedule_loss_probe(sk))
+ if (!tcp_schedule_loss_probe(sk, true))
tcp_rearm_rto(sk);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 540b7d92cc70..a4d214c7b506 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2391,7 +2391,7 @@ repair:
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
- tcp_schedule_loss_probe(sk);
+ tcp_schedule_loss_probe(sk, false);
is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
@@ -2399,7 +2399,7 @@ repair:
return !tp->packets_out && !tcp_write_queue_empty(sk);
}
-bool tcp_schedule_loss_probe(struct sock *sk)
+bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -2440,7 +2440,9 @@ bool tcp_schedule_loss_probe(struct sock *sk)
}
/* If the RTO formula yields an earlier time, then use that time. */
- rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */
+ rto_delta_us = advancing_rto ?
+ jiffies_to_usecs(inet_csk(sk)->icsk_rto) :
+ tcp_rto_delta_us(sk); /* How far in future is RTO? */
if (rto_delta_us > 0)
timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index e360d55be555..01801b77bd0d 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -187,16 +187,57 @@ out_unlock:
}
EXPORT_SYMBOL(skb_udp_tunnel_segment);
-static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb,
- netdev_features_t features)
+static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
+ unsigned int mss;
+ __wsum csum;
+ struct udphdr *uh;
+ struct iphdr *iph;
if (skb->encapsulation &&
(skb_shinfo(skb)->gso_type &
- (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)))
+ (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
segs = skb_udp_tunnel_segment(skb, features, false);
+ goto out;
+ }
+
+ if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+ goto out;
+
+ mss = skb_shinfo(skb)->gso_size;
+ if (unlikely(skb->len <= mss))
+ goto out;
+
+ /* Do software UFO. Complete and fill in the UDP checksum as
+ * HW cannot do checksum of UDP packets sent as multiple
+ * IP fragments.
+ */
+ uh = udp_hdr(skb);
+ iph = ip_hdr(skb);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, skb->len, 0);
+ uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ /* If there is no outer header we can fake a checksum offload
+ * due to the fact that we have already done the checksum in
+ * software prior to segmenting the frame.
+ */
+ if (!skb->encap_hdr_csum)
+ features |= NETIF_F_HW_CSUM;
+
+ /* Fragment the skb. IP headers of the fragments are updated in
+ * inet_gso_segment()
+ */
+ segs = skb_segment(skb, features);
+out:
return segs;
}
@@ -330,7 +371,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
static const struct net_offload udpv4_offload = {
.callbacks = {
- .gso_segment = udp4_tunnel_segment,
+ .gso_segment = udp4_ufo_fragment,
.gro_receive = udp4_gro_receive,
.gro_complete = udp4_gro_complete,
},
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b90bad7a4e56..4cfd8e0696fe 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -460,7 +460,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
tpi->proto);
if (tunnel) {
- ip6_tnl_rcv(tunnel, skb, tpi, NULL, false);
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
return PACKET_RCVD;
}
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 4a7e5ffa5108..4fe7c90962dd 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -31,6 +31,37 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
return id;
}
+/* This function exists only for tap drivers that must support broken
+ * clients requesting UFO without specifying an IPv6 fragment ID.
+ *
+ * This is similar to ipv6_select_ident() but we use an independent hash
+ * seed to limit information leakage.
+ *
+ * The network header must be set before calling this.
+ */
+__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
+{
+ static u32 ip6_proxy_idents_hashrnd __read_mostly;
+ struct in6_addr buf[2];
+ struct in6_addr *addrs;
+ u32 id;
+
+ addrs = skb_header_pointer(skb,
+ skb_network_offset(skb) +
+ offsetof(struct ipv6hdr, saddr),
+ sizeof(buf), buf);
+ if (!addrs)
+ return 0;
+
+ net_get_random_once(&ip6_proxy_idents_hashrnd,
+ sizeof(ip6_proxy_idents_hashrnd));
+
+ id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
+ &addrs[1], &addrs[0]);
+ return htonl(id);
+}
+EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
+
__be32 ipv6_select_ident(struct net *net,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 05eb7bc36156..7a8d1500d374 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -472,6 +472,11 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
&match->rt6i_siblings, rt6i_siblings) {
route_choosen--;
if (route_choosen == 0) {
+ struct inet6_dev *idev = sibling->rt6i_idev;
+
+ if (!netif_carrier_ok(sibling->dst.dev) &&
+ idev->cnf.ignore_routes_with_linkdown)
+ break;
if (rt6_score_route(sibling, oif, strict) < 0)
break;
match = sibling;
@@ -1019,7 +1024,7 @@ static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
{
struct net_device *dev = rt->dst.dev;
- if (rt->rt6i_flags & RTF_LOCAL) {
+ if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
/* for copies of local routes, dst->dev needs to be the
* device if it is a master device, the master device if
* device is enslaved, and the loopback as the default
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 455fd4e39333..a0f89ad76f9d 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -17,15 +17,94 @@
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
-static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb,
- netdev_features_t features)
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
+ unsigned int mss;
+ unsigned int unfrag_ip6hlen, unfrag_len;
+ struct frag_hdr *fptr;
+ u8 *packet_start, *prevhdr;
+ u8 nexthdr;
+ u8 frag_hdr_sz = sizeof(struct frag_hdr);
+ __wsum csum;
+ int tnl_hlen;
+ int err;
+
+ mss = skb_shinfo(skb)->gso_size;
+ if (unlikely(skb->len <= mss))
+ goto out;
if (skb->encapsulation && skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
segs = skb_udp_tunnel_segment(skb, features, true);
+ else {
+ const struct ipv6hdr *ipv6h;
+ struct udphdr *uh;
+
+ if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+ goto out;
+
+ /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+ * do checksum of UDP packets sent as multiple IP fragments.
+ */
+
+ uh = udp_hdr(skb);
+ ipv6h = ipv6_hdr(skb);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, skb->len, 0);
+ uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
+ &ipv6h->daddr, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ /* If there is no outer header we can fake a checksum offload
+ * due to the fact that we have already done the checksum in
+ * software prior to segmenting the frame.
+ */
+ if (!skb->encap_hdr_csum)
+ features |= NETIF_F_HW_CSUM;
+
+ /* Check if there is enough headroom to insert fragment header. */
+ tnl_hlen = skb_tnl_header_len(skb);
+ if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
+ if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
+ goto out;
+ }
+
+ /* Find the unfragmentable header and shift it left by frag_hdr_sz
+ * bytes to insert fragment header.
+ */
+ err = ip6_find_1stfragopt(skb, &prevhdr);
+ if (err < 0)
+ return ERR_PTR(err);
+ unfrag_ip6hlen = err;
+ nexthdr = *prevhdr;
+ *prevhdr = NEXTHDR_FRAGMENT;
+ unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
+ unfrag_ip6hlen + tnl_hlen;
+ packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
+ memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
+
+ SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
+ skb->mac_header -= frag_hdr_sz;
+ skb->network_header -= frag_hdr_sz;
+
+ fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
+ fptr->nexthdr = nexthdr;
+ fptr->reserved = 0;
+ fptr->identification = ipv6_proxy_select_ident(dev_net(skb->dev), skb);
+
+ /* Fragment the skb. ipv6 header and the remaining fields of the
+ * fragment header are updated in ipv6_gso_segment()
+ */
+ segs = skb_segment(skb, features);
+ }
+out:
return segs;
}
@@ -75,7 +154,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
static const struct net_offload udpv6_offload = {
.callbacks = {
- .gso_segment = udp6_tunnel_segment,
+ .gso_segment = udp6_ufo_fragment,
.gro_receive = udp6_gro_receive,
.gro_complete = udp6_gro_complete,
},
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 88cc1ae935ea..d444752dbf40 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -151,21 +151,17 @@ EXPORT_SYMBOL(ieee80211_stop_rx_ba_session);
* After accepting the AddBA Request we activated a timer,
* resetting it after each frame that arrives from the originator.
*/
-static void sta_rx_agg_session_timer_expired(unsigned long data)
+static void sta_rx_agg_session_timer_expired(struct timer_list *t)
{
- /* not an elegant detour, but there is no choice as the timer passes
- * only one argument, and various sta_info are needed here, so init
- * flow in sta_info_create gives the TID as data, while the timer_to_id
- * array gives the sta through container_of */
- u8 *ptid = (u8 *)data;
- u8 *timer_to_id = ptid - *ptid;
- struct sta_info *sta = container_of(timer_to_id, struct sta_info,
- timer_to_tid[0]);
+ struct tid_ampdu_rx *tid_rx_timer =
+ from_timer(tid_rx_timer, t, session_timer);
+ struct sta_info *sta = tid_rx_timer->sta;
+ u8 tid = tid_rx_timer->tid;
struct tid_ampdu_rx *tid_rx;
unsigned long timeout;
rcu_read_lock();
- tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[*ptid]);
+ tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
if (!tid_rx) {
rcu_read_unlock();
return;
@@ -180,21 +176,18 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
rcu_read_unlock();
ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n",
- sta->sta.addr, (u16)*ptid);
+ sta->sta.addr, tid);
- set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired);
+ set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired);
ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
}
-static void sta_rx_agg_reorder_timer_expired(unsigned long data)
+static void sta_rx_agg_reorder_timer_expired(struct timer_list *t)
{
- u8 *ptid = (u8 *)data;
- u8 *timer_to_id = ptid - *ptid;
- struct sta_info *sta = container_of(timer_to_id, struct sta_info,
- timer_to_tid[0]);
+ struct tid_ampdu_rx *tid_rx = from_timer(tid_rx, t, reorder_timer);
rcu_read_lock();
- ieee80211_release_reorder_timeout(sta, *ptid);
+ ieee80211_release_reorder_timeout(tid_rx->sta, tid_rx->tid);
rcu_read_unlock();
}
@@ -356,14 +349,12 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
spin_lock_init(&tid_agg_rx->reorder_lock);
/* rx timer */
- setup_deferrable_timer(&tid_agg_rx->session_timer,
- sta_rx_agg_session_timer_expired,
- (unsigned long)&sta->timer_to_tid[tid]);
+ timer_setup(&tid_agg_rx->session_timer,
+ sta_rx_agg_session_timer_expired, TIMER_DEFERRABLE);
/* rx reorder timer */
- setup_timer(&tid_agg_rx->reorder_timer,
- sta_rx_agg_reorder_timer_expired,
- (unsigned long)&sta->timer_to_tid[tid]);
+ timer_setup(&tid_agg_rx->reorder_timer,
+ sta_rx_agg_reorder_timer_expired, 0);
/* prepare reordering buffer */
tid_agg_rx->reorder_buf =
@@ -399,6 +390,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
tid_agg_rx->auto_seq = auto_seq;
tid_agg_rx->started = false;
tid_agg_rx->reorder_buf_filtered = 0;
+ tid_agg_rx->tid = tid;
+ tid_agg_rx->sta = sta;
status = WLAN_STATUS_SUCCESS;
/* activate it for RX */
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index bef516ec47f9..5f8ab5be369f 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -330,6 +330,11 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
spin_lock_bh(&sta->lock);
+ /* free struct pending for start, if present */
+ tid_tx = sta->ampdu_mlme.tid_start_tx[tid];
+ kfree(tid_tx);
+ sta->ampdu_mlme.tid_start_tx[tid] = NULL;
+
tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
if (!tid_tx) {
spin_unlock_bh(&sta->lock);
@@ -422,15 +427,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
* add Block Ack response will arrive from the recipient.
* If this timer expires sta_addba_resp_timer_expired will be executed.
*/
-static void sta_addba_resp_timer_expired(unsigned long data)
+static void sta_addba_resp_timer_expired(struct timer_list *t)
{
- /* not an elegant detour, but there is no choice as the timer passes
- * only one argument, and both sta_info and TID are needed, so init
- * flow in sta_info_create gives the TID as data, while the timer_to_id
- * array gives the sta through container_of */
- u16 tid = *(u8 *)data;
- struct sta_info *sta = container_of((void *)data,
- struct sta_info, timer_to_tid[tid]);
+ struct tid_ampdu_tx *tid_tx_timer =
+ from_timer(tid_tx_timer, t, addba_resp_timer);
+ struct sta_info *sta = tid_tx_timer->sta;
+ u8 tid = tid_tx_timer->tid;
struct tid_ampdu_tx *tid_tx;
/* check if the TID waits for addBA response */
@@ -525,21 +527,17 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
* After accepting the AddBA Response we activated a timer,
* resetting it after each frame that we send.
*/
-static void sta_tx_agg_session_timer_expired(unsigned long data)
+static void sta_tx_agg_session_timer_expired(struct timer_list *t)
{
- /* not an elegant detour, but there is no choice as the timer passes
- * only one argument, and various sta_info are needed here, so init
- * flow in sta_info_create gives the TID as data, while the timer_to_id
- * array gives the sta through container_of */
- u8 *ptid = (u8 *)data;
- u8 *timer_to_id = ptid - *ptid;
- struct sta_info *sta = container_of(timer_to_id, struct sta_info,
- timer_to_tid[0]);
+ struct tid_ampdu_tx *tid_tx_timer =
+ from_timer(tid_tx_timer, t, session_timer);
+ struct sta_info *sta = tid_tx_timer->sta;
+ u8 tid = tid_tx_timer->tid;
struct tid_ampdu_tx *tid_tx;
unsigned long timeout;
rcu_read_lock();
- tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[*ptid]);
+ tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
if (!tid_tx || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
rcu_read_unlock();
return;
@@ -555,9 +553,9 @@ static void sta_tx_agg_session_timer_expired(unsigned long data)
rcu_read_unlock();
ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n",
- sta->sta.addr, (u16)*ptid);
+ sta->sta.addr, tid);
- ieee80211_stop_tx_ba_session(&sta->sta, *ptid);
+ ieee80211_stop_tx_ba_session(&sta->sta, tid);
}
int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
@@ -670,16 +668,15 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
__set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
tid_tx->timeout = timeout;
+ tid_tx->sta = sta;
+ tid_tx->tid = tid;
/* response timer */
- setup_timer(&tid_tx->addba_resp_timer,
- sta_addba_resp_timer_expired,
- (unsigned long)&sta->timer_to_tid[tid]);
+ timer_setup(&tid_tx->addba_resp_timer, sta_addba_resp_timer_expired, 0);
/* tx timer */
- setup_deferrable_timer(&tid_tx->session_timer,
- sta_tx_agg_session_timer_expired,
- (unsigned long)&sta->timer_to_tid[tid]);
+ timer_setup(&tid_tx->session_timer,
+ sta_tx_agg_session_timer_expired, TIMER_DEFERRABLE);
/* assign a dialog token */
sta->ampdu_mlme.dialog_token_allocator++;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index e9c6aa3ed05b..db07e0de9a03 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1711,10 +1711,10 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
sdata_unlock(sdata);
}
-static void ieee80211_ibss_timer(unsigned long data)
+static void ieee80211_ibss_timer(struct timer_list *t)
{
struct ieee80211_sub_if_data *sdata =
- (struct ieee80211_sub_if_data *) data;
+ from_timer(sdata, t, u.ibss.timer);
ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
@@ -1723,8 +1723,7 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
- setup_timer(&ifibss->timer, ieee80211_ibss_timer,
- (unsigned long) sdata);
+ timer_setup(&ifibss->timer, ieee80211_ibss_timer, 0);
INIT_LIST_HEAD(&ifibss->incomplete_stations);
spin_lock_init(&ifibss->incomplete_lock);
INIT_WORK(&ifibss->csa_connection_drop_work,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 68f874e73561..885d00b41911 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1057,6 +1057,7 @@ struct tpt_led_trigger {
const struct ieee80211_tpt_blink *blink_table;
unsigned int blink_table_len;
struct timer_list timer;
+ struct ieee80211_local *local;
unsigned long prev_traffic;
unsigned long tx_bytes, rx_bytes;
unsigned int active, want;
@@ -1932,7 +1933,7 @@ static inline int ieee80211_ac_from_tid(int tid)
void ieee80211_dynamic_ps_enable_work(struct work_struct *work);
void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
-void ieee80211_dynamic_ps_timer(unsigned long data);
+void ieee80211_dynamic_ps_timer(struct timer_list *t);
void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
bool powersave);
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index 0505845b7ab8..ba0b507ea691 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -248,10 +248,10 @@ static unsigned long tpt_trig_traffic(struct ieee80211_local *local,
return DIV_ROUND_UP(delta, 1024 / 8);
}
-static void tpt_trig_timer(unsigned long data)
+static void tpt_trig_timer(struct timer_list *t)
{
- struct ieee80211_local *local = (void *)data;
- struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;
+ struct tpt_led_trigger *tpt_trig = from_timer(tpt_trig, t, timer);
+ struct ieee80211_local *local = tpt_trig->local;
struct led_classdev *led_cdev;
unsigned long on, off, tpt;
int i;
@@ -306,8 +306,9 @@ __ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
tpt_trig->blink_table = blink_table;
tpt_trig->blink_table_len = blink_table_len;
tpt_trig->want = flags;
+ tpt_trig->local = local;
- setup_timer(&tpt_trig->timer, tpt_trig_timer, (unsigned long)local);
+ timer_setup(&tpt_trig->timer, tpt_trig_timer, 0);
local->tpt_led_trigger = tpt_trig;
@@ -326,7 +327,7 @@ static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
tpt_trig_traffic(local, tpt_trig);
tpt_trig->running = true;
- tpt_trig_timer((unsigned long)local);
+ tpt_trig_timer(&tpt_trig->timer);
mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ));
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8aa1f5b6a051..e054a2fd8d38 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -633,8 +633,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
ieee80211_dynamic_ps_enable_work);
INIT_WORK(&local->dynamic_ps_disable_work,
ieee80211_dynamic_ps_disable_work);
- setup_timer(&local->dynamic_ps_timer,
- ieee80211_dynamic_ps_timer, (unsigned long) local);
+ timer_setup(&local->dynamic_ps_timer, ieee80211_dynamic_ps_timer, 0);
INIT_WORK(&local->sched_scan_stopped_work,
ieee80211_sched_scan_stopped_work);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 7a76c4a6df30..5e27364e10ac 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -37,9 +37,10 @@ void ieee80211s_stop(void)
kmem_cache_destroy(rm_cache);
}
-static void ieee80211_mesh_housekeeping_timer(unsigned long data)
+static void ieee80211_mesh_housekeeping_timer(struct timer_list *t)
{
- struct ieee80211_sub_if_data *sdata = (void *) data;
+ struct ieee80211_sub_if_data *sdata =
+ from_timer(sdata, t, u.mesh.housekeeping_timer);
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
@@ -528,18 +529,18 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
return 0;
}
-static void ieee80211_mesh_path_timer(unsigned long data)
+static void ieee80211_mesh_path_timer(struct timer_list *t)
{
struct ieee80211_sub_if_data *sdata =
- (struct ieee80211_sub_if_data *) data;
+ from_timer(sdata, t, u.mesh.mesh_path_timer);
ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
-static void ieee80211_mesh_path_root_timer(unsigned long data)
+static void ieee80211_mesh_path_root_timer(struct timer_list *t)
{
struct ieee80211_sub_if_data *sdata =
- (struct ieee80211_sub_if_data *) data;
+ from_timer(sdata, t, u.mesh.mesh_path_root_timer);
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags);
@@ -1442,9 +1443,8 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
static u8 zero_addr[ETH_ALEN] = {};
- setup_timer(&ifmsh->housekeeping_timer,
- ieee80211_mesh_housekeeping_timer,
- (unsigned long) sdata);
+ timer_setup(&ifmsh->housekeeping_timer,
+ ieee80211_mesh_housekeeping_timer, 0);
ifmsh->accepting_plinks = true;
atomic_set(&ifmsh->mpaths, 0);
@@ -1458,12 +1458,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
mesh_pathtbl_init(sdata);
- setup_timer(&ifmsh->mesh_path_timer,
- ieee80211_mesh_path_timer,
- (unsigned long) sdata);
- setup_timer(&ifmsh->mesh_path_root_timer,
- ieee80211_mesh_path_root_timer,
- (unsigned long) sdata);
+ timer_setup(&ifmsh->mesh_path_timer, ieee80211_mesh_path_timer, 0);
+ timer_setup(&ifmsh->mesh_path_root_timer,
+ ieee80211_mesh_path_root_timer, 0);
INIT_LIST_HEAD(&ifmsh->preq_queue.list);
skb_queue_head_init(&ifmsh->ps.bc_buf);
spin_lock_init(&ifmsh->mesh_preq_queue_lock);
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 465b7853edc0..ee56f18cad3f 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -296,7 +296,7 @@ void mesh_path_tx_pending(struct mesh_path *mpath);
int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata);
void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata);
int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr);
-void mesh_path_timer(unsigned long data);
+void mesh_path_timer(struct timer_list *t);
void mesh_path_flush_by_nexthop(struct sta_info *sta);
void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 146ec6c0f12f..4f7826d7b47c 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1194,9 +1194,9 @@ endlookup:
return err;
}
-void mesh_path_timer(unsigned long data)
+void mesh_path_timer(struct timer_list *t)
{
- struct mesh_path *mpath = (void *) data;
+ struct mesh_path *mpath = from_timer(mpath, t, timer);
struct ieee80211_sub_if_data *sdata = mpath->sdata;
int ret;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 97269caafecd..86c8dfef56a4 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -399,8 +399,7 @@ struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata,
skb_queue_head_init(&new_mpath->frame_queue);
new_mpath->exp_time = jiffies;
spin_lock_init(&new_mpath->state_lock);
- setup_timer(&new_mpath->timer, mesh_path_timer,
- (unsigned long) new_mpath);
+ timer_setup(&new_mpath->timer, mesh_path_timer, 0);
return new_mpath;
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e4ededa1909d..04460440d731 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1066,10 +1066,10 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
}
EXPORT_SYMBOL(ieee80211_chswitch_done);
-static void ieee80211_chswitch_timer(unsigned long data)
+static void ieee80211_chswitch_timer(struct timer_list *t)
{
struct ieee80211_sub_if_data *sdata =
- (struct ieee80211_sub_if_data *) data;
+ from_timer(sdata, t, u.mgd.chswitch_timer);
ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
}
@@ -1577,9 +1577,9 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
}
}
-void ieee80211_dynamic_ps_timer(unsigned long data)
+void ieee80211_dynamic_ps_timer(struct timer_list *t)
{
- struct ieee80211_local *local = (void *) data;
+ struct ieee80211_local *local = from_timer(local, t, dynamic_ps_timer);
ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work);
}
@@ -3711,10 +3711,10 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
sdata_unlock(sdata);
}
-static void ieee80211_sta_timer(unsigned long data)
+static void ieee80211_sta_timer(struct timer_list *t)
{
struct ieee80211_sub_if_data *sdata =
- (struct ieee80211_sub_if_data *) data;
+ from_timer(sdata, t, u.mgd.timer);
ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
@@ -3991,10 +3991,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
sdata_unlock(sdata);
}
-static void ieee80211_sta_bcn_mon_timer(unsigned long data)
+static void ieee80211_sta_bcn_mon_timer(struct timer_list *t)
{
struct ieee80211_sub_if_data *sdata =
- (struct ieee80211_sub_if_data *) data;
+ from_timer(sdata, t, u.mgd.bcn_mon_timer);
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn)
@@ -4005,10 +4005,10 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data)
&sdata->u.mgd.beacon_connection_loss_work);
}
-static void ieee80211_sta_conn_mon_timer(unsigned long data)
+static void ieee80211_sta_conn_mon_timer(struct timer_list *t)
{
struct ieee80211_sub_if_data *sdata =
- (struct ieee80211_sub_if_data *) data;
+ from_timer(sdata, t, u.mgd.conn_mon_timer);
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
@@ -4139,14 +4139,10 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_mgd_work);
INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work,
ieee80211_tdls_peer_del_work);
- setup_timer(&ifmgd->timer, ieee80211_sta_timer,
- (unsigned long) sdata);
- setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
- (unsigned long) sdata);
- setup_timer(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer,
- (unsigned long) sdata);
- setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer,
- (unsigned long) sdata);
+ timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0);
+ timer_setup(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 0);
+ timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0);
+ timer_setup(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, 0);
INIT_DELAYED_WORK(&ifmgd->tx_tspec_wk,
ieee80211_sta_handle_tspec_ac_params_wk);
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index 88e6ebbbe24f..d351dc1162be 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -150,9 +150,10 @@ void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata)
sdata_unlock(sdata);
}
-static void ieee80211_ocb_housekeeping_timer(unsigned long data)
+static void ieee80211_ocb_housekeeping_timer(struct timer_list *t)
{
- struct ieee80211_sub_if_data *sdata = (void *)data;
+ struct ieee80211_sub_if_data *sdata =
+ from_timer(sdata, t, u.ocb.housekeeping_timer);
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_ocb *ifocb = &sdata->u.ocb;
@@ -165,9 +166,8 @@ void ieee80211_ocb_setup_sdata(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_ocb *ifocb = &sdata->u.ocb;
- setup_timer(&ifocb->housekeeping_timer,
- ieee80211_ocb_housekeeping_timer,
- (unsigned long)sdata);
+ timer_setup(&ifocb->housekeeping_timer,
+ ieee80211_ocb_housekeeping_timer, 0);
INIT_LIST_HEAD(&ifocb->incomplete_stations);
spin_lock_init(&ifocb->incomplete_lock);
}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index a3060e55122c..0c5627f8a104 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -379,14 +379,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
if (sta_prepare_rate_control(local, sta, gfp))
goto free_txq;
- for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
- /*
- * timer_to_tid must be initialized with identity mapping
- * to enable session_timer's data differentiation. See
- * sta_rx_agg_session_timer_expired for usage.
- */
- sta->timer_to_tid[i] = i;
- }
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
skb_queue_head_init(&sta->ps_tx_buf[i]);
skb_queue_head_init(&sta->tx_filtered[i]);
@@ -1064,9 +1056,9 @@ int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata,
return ret;
}
-static void sta_info_cleanup(unsigned long data)
+static void sta_info_cleanup(struct timer_list *t)
{
- struct ieee80211_local *local = (struct ieee80211_local *) data;
+ struct ieee80211_local *local = from_timer(local, t, sta_cleanup);
struct sta_info *sta;
bool timer_needed = false;
@@ -1098,8 +1090,7 @@ int sta_info_init(struct ieee80211_local *local)
mutex_init(&local->sta_mtx);
INIT_LIST_HEAD(&local->sta_list);
- setup_timer(&local->sta_cleanup, sta_info_cleanup,
- (unsigned long)local);
+ timer_setup(&local->sta_cleanup, sta_info_cleanup, 0);
return 0;
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c54acd10562..cd53619435b6 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -126,6 +126,8 @@ enum ieee80211_agg_stop_reason {
AGG_STOP_DESTROY_STA,
};
+struct sta_info;
+
/**
* struct tid_ampdu_tx - TID aggregation information (Tx).
*
@@ -133,8 +135,10 @@ enum ieee80211_agg_stop_reason {
* @session_timer: check if we keep Tx-ing on the TID (by timeout value)
* @addba_resp_timer: timer for peer's response to addba request
* @pending: pending frames queue -- use sta's spinlock to protect
+ * @sta: station we are attached to
* @dialog_token: dialog token for aggregation session
* @timeout: session timeout value to be filled in ADDBA requests
+ * @tid: TID number
* @state: session state (see above)
* @last_tx: jiffies of last tx activity
* @stop_initiator: initiator of a session stop
@@ -158,6 +162,7 @@ struct tid_ampdu_tx {
struct timer_list session_timer;
struct timer_list addba_resp_timer;
struct sk_buff_head pending;
+ struct sta_info *sta;
unsigned long state;
unsigned long last_tx;
u16 timeout;
@@ -169,6 +174,7 @@ struct tid_ampdu_tx {
u16 failed_bar_ssn;
bool bar_pending;
bool amsdu;
+ u8 tid;
};
/**
@@ -181,12 +187,14 @@ struct tid_ampdu_tx {
* @reorder_time: jiffies when skb was added
* @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
* @reorder_timer: releases expired frames from the reorder buffer.
+ * @sta: station we are attached to
* @last_rx: jiffies of last rx activity
* @head_seq_num: head sequence number in reordering buffer.
* @stored_mpdu_num: number of MPDUs in reordering buffer
* @ssn: Starting Sequence Number expected to be aggregated.
* @buf_size: buffer size for incoming A-MPDUs
* @timeout: reset timer value (in TUs).
+ * @tid: TID number
* @rcu_head: RCU head used for freeing this struct
* @reorder_lock: serializes access to reorder buffer, see below.
* @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
@@ -208,6 +216,7 @@ struct tid_ampdu_rx {
u64 reorder_buf_filtered;
struct sk_buff_head *reorder_buf;
unsigned long *reorder_time;
+ struct sta_info *sta;
struct timer_list session_timer;
struct timer_list reorder_timer;
unsigned long last_rx;
@@ -216,6 +225,7 @@ struct tid_ampdu_rx {
u16 ssn;
u16 buf_size;
u16 timeout;
+ u8 tid;
u8 auto_seq:1,
removed:1,
started:1;
@@ -447,7 +457,6 @@ struct ieee80211_sta_rx_stats {
* plus one for non-QoS frames)
* @tid_seq: per-TID sequence numbers for sending to this STA
* @ampdu_mlme: A-MPDU state machine state
- * @timer_to_tid: identity mapping to ID timers
* @mesh: mesh STA information
* @debugfs_dir: debug filesystem directory dentry
* @dead: set to true when sta is unlinked
@@ -554,7 +563,6 @@ struct sta_info {
* Aggregation information, locked with lock.
*/
struct sta_ampdu_mlme ampdu_mlme;
- u8 timer_to_tid[IEEE80211_NUM_TIDS];
#ifdef CONFIG_MAC80211_DEBUGFS
struct dentry *debugfs_dir;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6e0adfefb9ed..59c08997bfdf 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -533,6 +533,7 @@ nla_put_failure:
return -1;
}
+#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
static size_t ctnetlink_proto_size(const struct nf_conn *ct)
{
const struct nf_conntrack_l3proto *l3proto;
@@ -552,6 +553,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
return len + len4;
}
+#endif
static inline size_t ctnetlink_acct_size(const struct nf_conn *ct)
{
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index d0f38bc9af6d..ac709f0f197b 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -87,7 +87,7 @@ static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
struct list_head *i = s;
struct netlbl_af4list *n = __af4list_entry(s);
while (i != h && !n->valid) {
- i = rcu_dereference(i->next);
+ i = rcu_dereference(list_next_rcu(i));
n = __af4list_entry(i);
}
return n;
@@ -154,7 +154,7 @@ static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
struct list_head *i = s;
struct netlbl_af6list *n = __af6list_entry(s);
while (i != h && !n->valid) {
- i = rcu_dereference(i->next);
+ i = rcu_dereference(list_next_rcu(i));
n = __af6list_entry(i);
}
return n;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f6359c277212..c0b83dc9d993 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -75,7 +75,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
if (!hdr)
return -EMSGSIZE;
- genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+ genl_dump_check_consistent(cb, hdr);
if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) ||
nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) ||
@@ -603,7 +603,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
return -EMSGSIZE;
if (cb)
- genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+ genl_dump_check_consistent(cb, hdr);
if (nfc_genl_setup_device_added(dev, msg))
goto nla_put_failure;
@@ -1356,7 +1356,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
goto nla_put_failure;
if (cb)
- genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+ genl_dump_check_consistent(cb, hdr);
if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) ||
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 0dab33fb9844..99cfafc2a139 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -308,6 +308,8 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
const struct dp_upcall_info *upcall_info,
uint32_t cutlen)
{
+ unsigned short gso_type = skb_shinfo(skb)->gso_type;
+ struct sw_flow_key later_key;
struct sk_buff *segs, *nskb;
int err;
@@ -318,9 +320,21 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
if (segs == NULL)
return -EINVAL;
+ if (gso_type & SKB_GSO_UDP) {
+ /* The initial flow key extracted by ovs_flow_key_extract()
+ * in this case is for a first fragment, so we need to
+ * properly mark later fragments.
+ */
+ later_key = *key;
+ later_key.ip.frag = OVS_FRAG_TYPE_LATER;
+ }
+
/* Queue all of the segments. */
skb = segs;
do {
+ if (gso_type & SKB_GSO_UDP && skb != segs)
+ key = &later_key;
+
err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
if (err)
break;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 864ddb1e3642..dbe2379329c5 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -631,7 +631,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
key->ip.frag = OVS_FRAG_TYPE_LATER;
return 0;
}
- if (nh->frag_off & htons(IP_MF))
+ if (nh->frag_off & htons(IP_MF) ||
+ skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
else
key->ip.frag = OVS_FRAG_TYPE_NONE;
@@ -747,6 +748,9 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
if (key->ip.frag == OVS_FRAG_TYPE_LATER)
return 0;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ key->ip.frag = OVS_FRAG_TYPE_FIRST;
+
/* Transport layer. */
if (key->ip.proto == NEXTHDR_TCP) {
if (tcphdr_ok(skb)) {
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 1c40caadcff9..d836f998117b 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -229,6 +229,9 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
const struct iphdr *iph;
u16 ul;
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ return 1;
+
/*
* Support both UDP and UDPLITE checksum algorithms, Don't use
* udph->len to get the real length without any protocol check,
@@ -282,6 +285,9 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
const struct ipv6hdr *ip6h;
u16 ul;
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ return 1;
+
/*
* Support both UDP and UDPLITE checksum algorithms, Don't use
* udph->len to get the real length without any protocol check,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ab255b421781..7d97f612c9b9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -205,13 +205,14 @@ static void tcf_chain_head_change(struct tcf_chain *chain,
static void tcf_chain_flush(struct tcf_chain *chain)
{
- struct tcf_proto *tp;
+ struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
tcf_chain_head_change(chain, NULL);
- while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
+ while (tp) {
RCU_INIT_POINTER(chain->filter_chain, tp->next);
- tcf_chain_put(chain);
tcf_proto_destroy(tp);
+ tp = rtnl_dereference(chain->filter_chain);
+ tcf_chain_put(chain);
}
}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index fb680dafac5a..a9f3e317055c 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -382,15 +382,13 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
{
struct bpf_prog *fp;
char *name = NULL;
+ bool skip_sw;
u32 bpf_fd;
bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
+ skip_sw = gen_flags & TCA_CLS_FLAGS_SKIP_SW;
- if (gen_flags & TCA_CLS_FLAGS_SKIP_SW)
- fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS,
- qdisc_dev(tp->q));
- else
- fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
+ fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, skip_sw);
if (IS_ERR(fp))
return PTR_ERR(fp);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a6dfa86c0201..3b18085e3b10 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -807,9 +807,10 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
addr->v6.sin6_flowinfo = 0;
addr->v6.sin6_port = sh->source;
addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
- if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
+ if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
- }
+ else
+ addr->v6.sin6_scope_id = 0;
}
*addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 514465b03829..9bf575f2e8ed 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3594,8 +3594,8 @@ struct sctp_chunk *sctp_make_strreset_req(
__u16 stream_num, __be16 *stream_list,
bool out, bool in)
{
+ __u16 stream_len = stream_num * sizeof(__u16);
struct sctp_strreset_outreq outreq;
- __u16 stream_len = stream_num * 2;
struct sctp_strreset_inreq inreq;
struct sctp_chunk *retval;
__u16 outlen, inlen;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b029757bea03..3204a9b29407 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -84,8 +84,8 @@
/* Forward declarations for internal helper functions. */
static int sctp_writeable(struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
-static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p,
- size_t msg_len);
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+ size_t msg_len, struct sock **orig_sk);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -1970,9 +1970,16 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
- if (err)
+ /* sk can be changed by peel off when waiting for buf. */
+ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
+ if (err) {
+ if (err == -ESRCH) {
+ /* asoc is already dead. */
+ new_asoc = NULL;
+ err = -EPIPE;
+ }
goto out_free;
+ }
}
/* If an address is passed with the sendto/sendmsg call, it is used
@@ -3133,9 +3140,9 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
*/
static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
{
+ struct sctp_sock *sp = sctp_sk(sk);
struct sctp_assoc_value params;
struct sctp_association *asoc;
- struct sctp_sock *sp = sctp_sk(sk);
int val;
if (optlen == sizeof(int)) {
@@ -3151,26 +3158,35 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
if (copy_from_user(&params, optval, optlen))
return -EFAULT;
val = params.assoc_value;
- } else
+ } else {
return -EINVAL;
+ }
- if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN)))
- return -EINVAL;
+ if (val) {
+ int min_len, max_len;
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id && sctp_style(sk, UDP))
- return -EINVAL;
+ min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len;
+ min_len -= sizeof(struct sctphdr) +
+ sizeof(struct sctp_data_chunk);
+
+ max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk);
+ if (val < min_len || val > max_len)
+ return -EINVAL;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
if (asoc) {
if (val == 0) {
- val = asoc->pathmtu;
- val -= sp->pf->af->net_header_len;
+ val = asoc->pathmtu - sp->pf->af->net_header_len;
val -= sizeof(struct sctphdr) +
- sizeof(struct sctp_data_chunk);
+ sizeof(struct sctp_data_chunk);
}
asoc->user_frag = val;
asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
} else {
+ if (params.assoc_id && sctp_style(sk, UDP))
+ return -EINVAL;
sp->user_frag = val;
}
@@ -5015,12 +5031,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
if (!asoc)
return -EINVAL;
- /* If there is a thread waiting on more sndbuf space for
- * sending on this asoc, it cannot be peeled.
- */
- if (waitqueue_active(&asoc->wait))
- return -EBUSY;
-
/* An association cannot be branched off from an already peeled-off
* socket, nor is this supported for tcp style sockets.
*/
@@ -7989,7 +7999,7 @@ void sctp_sock_rfree(struct sk_buff *skb)
/* Helper function to wait for space in the sndbuf. */
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len)
+ size_t msg_len, struct sock **orig_sk)
{
struct sock *sk = asoc->base.sk;
int err = 0;
@@ -8006,10 +8016,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
for (;;) {
prepare_to_wait_exclusive(&asoc->wait, &wait,
TASK_INTERRUPTIBLE);
+ if (asoc->base.dead)
+ goto do_dead;
if (!*timeo_p)
goto do_nonblock;
- if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
- asoc->base.dead)
+ if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
goto do_error;
if (signal_pending(current))
goto do_interrupted;
@@ -8022,11 +8033,17 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
release_sock(sk);
current_timeo = schedule_timeout(current_timeo);
lock_sock(sk);
+ if (sk != asoc->base.sk) {
+ release_sock(sk);
+ sk = asoc->base.sk;
+ lock_sock(sk);
+ }
*timeo_p = current_timeo;
}
out:
+ *orig_sk = sk;
finish_wait(&asoc->wait, &wait);
/* Release the association's refcnt. */
@@ -8034,6 +8051,10 @@ out:
return err;
+do_dead:
+ err = -ESRCH;
+ goto out;
+
do_error:
err = -EPIPE;
goto out;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index b8c8cabb1a58..a11db21dc8a0 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -282,15 +282,31 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
str_nums = params->srs_number_streams;
str_list = params->srs_stream_list;
- if (out && str_nums)
- for (i = 0; i < str_nums; i++)
- if (str_list[i] >= stream->outcnt)
- goto out;
+ if (str_nums) {
+ int param_len = 0;
- if (in && str_nums)
- for (i = 0; i < str_nums; i++)
- if (str_list[i] >= stream->incnt)
- goto out;
+ if (out) {
+ for (i = 0; i < str_nums; i++)
+ if (str_list[i] >= stream->outcnt)
+ goto out;
+
+ param_len = str_nums * sizeof(__u16) +
+ sizeof(struct sctp_strreset_outreq);
+ }
+
+ if (in) {
+ for (i = 0; i < str_nums; i++)
+ if (str_list[i] >= stream->incnt)
+ goto out;
+
+ param_len += str_nums * sizeof(__u16) +
+ sizeof(struct sctp_strreset_inreq);
+ }
+
+ if (param_len > SCTP_MAX_CHUNK_LEN -
+ sizeof(struct sctp_reconf_chunk))
+ goto out;
+ }
nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL);
if (!nstr_list) {
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2578fbd95664..94f21116dac5 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -562,7 +562,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
{
struct smc_connection *conn = &smc->conn;
struct smc_link_group *lgr = conn->lgr;
- struct smc_buf_desc *buf_desc = NULL;
+ struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
struct list_head *buf_list;
int bufsize, bufsize_short;
int sk_buf_size;
@@ -575,7 +575,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
/* use socket send buffer size (w/o overhead) as start value */
sk_buf_size = smc->sk.sk_sndbuf / 2;
- for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
+ for (bufsize_short = smc_compress_bufsize(sk_buf_size);
bufsize_short >= 0; bufsize_short--) {
if (is_rmb) {
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 7b1ee5a0b03c..73165e9ca5bf 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
return stat;
if (integ_len > buf->len)
return stat;
- if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
- BUG();
+ if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) {
+ WARN_ON_ONCE(1);
+ return stat;
+ }
/* copy out mic... */
if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
- BUG();
+ return stat;
if (mic.len > RPC_MAX_AUTH_SIZE)
return stat;
mic.data = kmalloc(mic.len, GFP_KERNEL);
@@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
BUG_ON(integ_len % 4);
*p++ = htonl(integ_len);
*p++ = htonl(gc->gc_seq);
- if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len))
- BUG();
+ if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
+ WARN_ON_ONCE(1);
+ goto out_err;
+ }
if (resbuf->tail[0].iov_base == NULL) {
if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
goto out_err;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2ad827db2704..a801da812f86 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1491,7 +1491,6 @@ rpc_restart_call(struct rpc_task *task)
}
EXPORT_SYMBOL_GPL(rpc_restart_call);
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
const char
*rpc_proc_name(const struct rpc_task *task)
{
@@ -1505,7 +1504,6 @@ const char
} else
return "no proc";
}
-#endif
/*
* 0. Initial state
@@ -1519,6 +1517,7 @@ call_start(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client;
int idx = task->tk_msg.rpc_proc->p_statidx;
+ trace_rpc_request(task);
dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
clnt->cl_program->name, clnt->cl_vers,
rpc_proc_name(task),
@@ -1586,6 +1585,7 @@ call_reserveresult(struct rpc_task *task)
switch (status) {
case -ENOMEM:
rpc_delay(task, HZ >> 2);
+ /* fall through */
case -EAGAIN: /* woken up; retry */
task->tk_action = call_retry_reserve;
return;
@@ -1647,10 +1647,13 @@ call_refreshresult(struct rpc_task *task)
/* Use rate-limiting and a max number of retries if refresh
* had status 0 but failed to update the cred.
*/
+ /* fall through */
case -ETIMEDOUT:
rpc_delay(task, 3*HZ);
+ /* fall through */
case -EAGAIN:
status = -EACCES;
+ /* fall through */
case -EKEYEXPIRED:
if (!task->tk_cred_retry)
break;
@@ -1911,6 +1914,7 @@ call_connect_status(struct rpc_task *task)
task->tk_action = call_bind;
return;
}
+ /* fall through */
case -ECONNRESET:
case -ECONNABORTED:
case -ENETUNREACH:
@@ -1924,6 +1928,7 @@ call_connect_status(struct rpc_task *task)
break;
/* retry with existing socket, after a delay */
rpc_delay(task, 3*HZ);
+ /* fall through */
case -EAGAIN:
/* Check for timeouts before looping back to call_bind */
case -ETIMEDOUT:
@@ -2025,6 +2030,7 @@ call_transmit_status(struct rpc_task *task)
rpc_exit(task, task->tk_status);
break;
}
+ /* fall through */
case -ECONNRESET:
case -ECONNABORTED:
case -EADDRINUSE:
@@ -2145,6 +2151,7 @@ call_status(struct rpc_task *task)
* were a timeout.
*/
rpc_delay(task, 3*HZ);
+ /* fall through */
case -ETIMEDOUT:
task->tk_action = call_timeout;
break;
@@ -2152,14 +2159,17 @@ call_status(struct rpc_task *task)
case -ECONNRESET:
case -ECONNABORTED:
rpc_force_rebind(clnt);
+ /* fall through */
case -EADDRINUSE:
rpc_delay(task, 3*HZ);
+ /* fall through */
case -EPIPE:
case -ENOTCONN:
task->tk_action = call_bind;
break;
case -ENOBUFS:
rpc_delay(task, HZ>>2);
+ /* fall through */
case -EAGAIN:
task->tk_action = call_transmit;
break;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 61a504fb1ae2..7803f3b6aa53 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1410,8 +1410,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
return PTR_ERR(gssd_dentry);
}
- dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n",
- net, NET_NAME(net));
+ dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n",
+ net->ns.inum, NET_NAME(net));
mutex_lock(&sn->pipefs_sb_lock);
sn->pipefs_sb = sb;
err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
@@ -1462,8 +1462,8 @@ static void rpc_kill_sb(struct super_block *sb)
goto out;
}
sn->pipefs_sb = NULL;
- dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n",
- net, NET_NAME(net));
+ dprintk("RPC: sending pipefs UMOUNT notification for net %x%s\n",
+ net->ns.inum, NET_NAME(net));
blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_UMOUNT,
sb);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index ea0676f199c8..c526f8fb37c9 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -216,9 +216,9 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
smp_wmb();
sn->rpcb_users = 1;
dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: "
- "%p, rpcb_local_clnt4: %p) for net %p%s\n",
- sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
- net, (net == &init_net) ? " (init_net)" : "");
+ "%p, rpcb_local_clnt4: %p) for net %x%s\n",
+ sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
+ net->ns.inum, (net == &init_net) ? " (init_net)" : "");
}
/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5dea47eb31bb..b1b49edd7c4d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -274,10 +274,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
static void rpc_set_active(struct rpc_task *task)
{
- trace_rpc_task_begin(task->tk_client, task, NULL);
-
rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
+ trace_rpc_task_begin(task->tk_client, task, NULL);
}
/*
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c73de181467a..56f9eff74150 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -65,10 +65,13 @@ err_proc:
static __net_exit void sunrpc_exit_net(struct net *net)
{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
rpc_pipefs_exit_net(net);
unix_gid_cache_destroy(net);
ip_map_cache_destroy(net);
rpc_proc_exit(net);
+ WARN_ON_ONCE(!list_empty(&sn->all_clients));
}
static struct pernet_operations sunrpc_net_ops = {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 71de77bd4423..e8e0831229cf 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
svc_xprt_received(new);
}
-int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
- struct net *net, const int family,
- const unsigned short port, int flags)
+static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+ struct net *net, const int family,
+ const unsigned short port, int flags)
{
struct svc_xprt_class *xcl;
@@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
struct svc_pool *pool;
struct svc_rqst *rqstp = NULL;
int cpu;
- bool queued = false;
if (!svc_xprt_has_something_to_do(xprt))
goto out;
@@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
atomic_long_inc(&pool->sp_stats.packets);
-redo_search:
+ dprintk("svc: transport %p put into queue\n", xprt);
+ spin_lock_bh(&pool->sp_lock);
+ list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
+ pool->sp_stats.sockets_queued++;
+ spin_unlock_bh(&pool->sp_lock);
+
/* find a thread for this xprt */
rcu_read_lock();
list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- /* Do a lockless check first */
- if (test_bit(RQ_BUSY, &rqstp->rq_flags))
+ if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
-
- /*
- * Once the xprt has been queued, it can only be dequeued by
- * the task that intends to service it. All we can do at that
- * point is to try to wake this thread back up so that it can
- * do so.
- */
- if (!queued) {
- spin_lock_bh(&rqstp->rq_lock);
- if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
- /* already busy, move on... */
- spin_unlock_bh(&rqstp->rq_lock);
- continue;
- }
-
- /* this one will do */
- rqstp->rq_xprt = xprt;
- svc_xprt_get(xprt);
- spin_unlock_bh(&rqstp->rq_lock);
- }
- rcu_read_unlock();
-
atomic_long_inc(&pool->sp_stats.threads_woken);
wake_up_process(rqstp->rq_task);
- put_cpu();
- goto out;
- }
- rcu_read_unlock();
-
- /*
- * We didn't find an idle thread to use, so we need to queue the xprt.
- * Do so and then search again. If we find one, we can't hook this one
- * up to it directly but we can wake the thread up in the hopes that it
- * will pick it up once it searches for a xprt to service.
- */
- if (!queued) {
- queued = true;
- dprintk("svc: transport %p put into queue\n", xprt);
- spin_lock_bh(&pool->sp_lock);
- list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
- pool->sp_stats.sockets_queued++;
- spin_unlock_bh(&pool->sp_lock);
- goto redo_search;
+ goto out_unlock;
}
+ set_bit(SP_CONGESTED, &pool->sp_flags);
rqstp = NULL;
+out_unlock:
+ rcu_read_unlock();
put_cpu();
out:
trace_svc_xprt_do_enqueue(xprt, rqstp);
@@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)
static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{
- struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool;
long time_left = 0;
/* rq_xprt should be clear on entry */
WARN_ON_ONCE(rqstp->rq_xprt);
- /* Normally we will wait up to 5 seconds for any required
- * cache information to be provided.
- */
- rqstp->rq_chandle.thread_wait = 5*HZ;
-
- xprt = svc_xprt_dequeue(pool);
- if (xprt) {
- rqstp->rq_xprt = xprt;
-
- /* As there is a shortage of threads and this request
- * had to be queued, don't allow the thread to wait so
- * long for cache updates.
- */
- rqstp->rq_chandle.thread_wait = 1*HZ;
- clear_bit(SP_TASK_PENDING, &pool->sp_flags);
- return xprt;
- }
+ rqstp->rq_xprt = svc_xprt_dequeue(pool);
+ if (rqstp->rq_xprt)
+ goto out_found;
/*
* We have to be able to interrupt this wait
* to bring down the daemons ...
*/
set_current_state(TASK_INTERRUPTIBLE);
+ smp_mb__before_atomic();
+ clear_bit(SP_CONGESTED, &pool->sp_flags);
clear_bit(RQ_BUSY, &rqstp->rq_flags);
- smp_mb();
+ smp_mb__after_atomic();
if (likely(rqst_should_sleep(rqstp)))
time_left = schedule_timeout(timeout);
@@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
try_to_freeze();
- spin_lock_bh(&rqstp->rq_lock);
set_bit(RQ_BUSY, &rqstp->rq_flags);
- spin_unlock_bh(&rqstp->rq_lock);
-
- xprt = rqstp->rq_xprt;
- if (xprt != NULL)
- return xprt;
+ smp_mb__after_atomic();
+ rqstp->rq_xprt = svc_xprt_dequeue(pool);
+ if (rqstp->rq_xprt)
+ goto out_found;
if (!time_left)
atomic_long_inc(&pool->sp_stats.threads_timedout);
@@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (signalled() || kthread_should_stop())
return ERR_PTR(-EINTR);
return ERR_PTR(-EAGAIN);
+out_found:
+ /* Normally we will wait up to 5 seconds for any required
+ * cache information to be provided.
+ */
+ if (!test_bit(SP_CONGESTED, &pool->sp_flags))
+ rqstp->rq_chandle.thread_wait = 5*HZ;
+ else
+ rqstp->rq_chandle.thread_wait = 1*HZ;
+ return rqstp->rq_xprt;
}
static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6160d17a31c4..333b9d697ae5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1139,6 +1139,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
case -EAGAIN:
xprt_add_backlog(xprt, task);
dprintk("RPC: waiting for request slot\n");
+ /* fall through */
default:
task->tk_status = -EAGAIN;
}
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 823a781ec89c..8b818bb3518a 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -43,7 +43,7 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req))
return PTR_ERR(req);
- req->rl_backchannel = true;
+ __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL);
@@ -223,8 +223,8 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
*p++ = xdr_zero;
*p = xdr_zero;
- if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN,
- &rqst->rq_snd_buf, rpcrdma_noch))
+ if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
+ &rqst->rq_snd_buf, rpcrdma_noch))
return -EIO;
return 0;
}
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index fa759dd2b0f3..29fc84c7ff98 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -306,28 +306,9 @@ out_reset:
}
}
-/* Use a slow, safe mechanism to invalidate all memory regions
- * that were registered for "req".
- */
-static void
-fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- bool sync)
-{
- struct rpcrdma_mw *mw;
-
- while (!list_empty(&req->rl_registered)) {
- mw = rpcrdma_pop_mw(&req->rl_registered);
- if (sync)
- fmr_op_recover_mr(mw);
- else
- rpcrdma_defer_mr_recovery(mw);
- }
-}
-
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map,
.ro_unmap_sync = fmr_op_unmap_sync,
- .ro_unmap_safe = fmr_op_unmap_safe,
.ro_recover_mr = fmr_op_recover_mr,
.ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 35d7517ef0e6..773e66e10a15 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -420,7 +420,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
- rpcrdma_set_signaled(&r_xprt->rx_ep, &reg_wr->wr);
rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
if (rc)
goto out_senderr;
@@ -508,12 +507,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
f->fr_cqe.done = frwr_wc_localinv_wake;
reinit_completion(&f->fr_linv_done);
- /* Initialize CQ count, since there is always a signaled
- * WR being posted here. The new cqcount depends on how
- * many SQEs are about to be consumed.
- */
- rpcrdma_init_cqcount(&r_xprt->rx_ep, count);
-
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer.
@@ -546,7 +539,6 @@ reset_mrs:
/* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted.
*/
- rpcrdma_init_cqcount(&r_xprt->rx_ep, -count);
while (bad_wr) {
f = container_of(bad_wr, struct rpcrdma_frmr,
fr_invwr);
@@ -559,28 +551,9 @@ reset_mrs:
goto unmap;
}
-/* Use a slow, safe mechanism to invalidate all memory regions
- * that were registered for "req".
- */
-static void
-frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- bool sync)
-{
- struct rpcrdma_mw *mw;
-
- while (!list_empty(&req->rl_registered)) {
- mw = rpcrdma_pop_mw(&req->rl_registered);
- if (sync)
- frwr_op_recover_mr(mw);
- else
- rpcrdma_defer_mr_recovery(mw);
- }
-}
-
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
.ro_unmap_sync = frwr_op_unmap_sync,
- .ro_unmap_safe = frwr_op_unmap_safe,
.ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index f1889f4d4803..ed34dc0f144c 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -75,11 +76,11 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
/* Maximum Read list size */
maxsegs += 2; /* segment for head and tail buffers */
- size = maxsegs * sizeof(struct rpcrdma_read_chunk);
+ size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
/* Minimal Read chunk size */
size += sizeof(__be32); /* segment count */
- size += sizeof(struct rpcrdma_segment);
+ size += rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
dprintk("RPC: %s: max call header size = %u\n",
@@ -102,7 +103,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
/* Maximum Write list size */
maxsegs += 2; /* segment for head and tail buffers */
size = sizeof(__be32); /* segment count */
- size += maxsegs * sizeof(struct rpcrdma_segment);
+ size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
dprintk("RPC: %s: max reply header size = %u\n",
@@ -511,27 +512,60 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
return 0;
}
-/* Prepare the RPC-over-RDMA header SGE.
+/**
+ * rpcrdma_unmap_sendctx - DMA-unmap Send buffers
+ * @sc: sendctx containing SGEs to unmap
+ *
+ */
+void
+rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
+{
+ struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
+ struct ib_sge *sge;
+ unsigned int count;
+
+ dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
+ __func__, sc->sc_unmap_count, sc);
+
+ /* The first two SGEs contain the transport header and
+ * the inline buffer. These are always left mapped so
+ * they can be cheaply re-used.
+ */
+ sge = &sc->sc_sges[2];
+ for (count = sc->sc_unmap_count; count; ++sge, --count)
+ ib_dma_unmap_page(ia->ri_device,
+ sge->addr, sge->length, DMA_TO_DEVICE);
+
+ if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
+ smp_mb__after_atomic();
+ wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
+ }
+}
+
+/* Prepare an SGE for the RPC-over-RDMA transport header.
*/
static bool
rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
u32 len)
{
+ struct rpcrdma_sendctx *sc = req->rl_sendctx;
struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
- struct ib_sge *sge = &req->rl_send_sge[0];
+ struct ib_sge *sge = sc->sc_sges;
- if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) {
- if (!__rpcrdma_dma_map_regbuf(ia, rb))
- return false;
- sge->addr = rdmab_addr(rb);
- sge->lkey = rdmab_lkey(rb);
- }
+ if (!rpcrdma_dma_map_regbuf(ia, rb))
+ goto out_regbuf;
+ sge->addr = rdmab_addr(rb);
sge->length = len;
+ sge->lkey = rdmab_lkey(rb);
ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
sge->length, DMA_TO_DEVICE);
- req->rl_send_wr.num_sge++;
+ sc->sc_wr.num_sge++;
return true;
+
+out_regbuf:
+ pr_err("rpcrdma: failed to DMA map a Send buffer\n");
+ return false;
}
/* Prepare the Send SGEs. The head and tail iovec, and each entry
@@ -541,10 +575,11 @@ static bool
rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{
+ struct rpcrdma_sendctx *sc = req->rl_sendctx;
unsigned int sge_no, page_base, len, remaining;
struct rpcrdma_regbuf *rb = req->rl_sendbuf;
struct ib_device *device = ia->ri_device;
- struct ib_sge *sge = req->rl_send_sge;
+ struct ib_sge *sge = sc->sc_sges;
u32 lkey = ia->ri_pd->local_dma_lkey;
struct page *page, **ppages;
@@ -552,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
* DMA-mapped. Sync the content that has changed.
*/
if (!rpcrdma_dma_map_regbuf(ia, rb))
- return false;
+ goto out_regbuf;
sge_no = 1;
sge[sge_no].addr = rdmab_addr(rb);
sge[sge_no].length = xdr->head[0].iov_len;
@@ -607,7 +642,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
sge[sge_no].length = len;
sge[sge_no].lkey = lkey;
- req->rl_mapped_sges++;
+ sc->sc_unmap_count++;
ppages++;
remaining -= len;
page_base = 0;
@@ -633,56 +668,61 @@ map_tail:
goto out_mapping_err;
sge[sge_no].length = len;
sge[sge_no].lkey = lkey;
- req->rl_mapped_sges++;
+ sc->sc_unmap_count++;
}
out:
- req->rl_send_wr.num_sge = sge_no + 1;
+ sc->sc_wr.num_sge += sge_no;
+ if (sc->sc_unmap_count)
+ __set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
return true;
+out_regbuf:
+ pr_err("rpcrdma: failed to DMA map a Send buffer\n");
+ return false;
+
out_mapping_overflow:
+ rpcrdma_unmap_sendctx(sc);
pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
return false;
out_mapping_err:
+ rpcrdma_unmap_sendctx(sc);
pr_err("rpcrdma: Send mapping error\n");
return false;
}
-bool
-rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
- u32 hdrlen, struct xdr_buf *xdr,
- enum rpcrdma_chunktype rtype)
+/**
+ * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
+ * @r_xprt: controlling transport
+ * @req: context of RPC Call being marshalled
+ * @hdrlen: size of transport header, in bytes
+ * @xdr: xdr_buf containing RPC Call
+ * @rtype: chunk type being encoded
+ *
+ * Returns 0 on success; otherwise a negative errno is returned.
+ */
+int
+rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_req *req, u32 hdrlen,
+ struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{
- req->rl_send_wr.num_sge = 0;
- req->rl_mapped_sges = 0;
-
- if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen))
- goto out_map;
+ req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
+ if (!req->rl_sendctx)
+ return -ENOBUFS;
+ req->rl_sendctx->sc_wr.num_sge = 0;
+ req->rl_sendctx->sc_unmap_count = 0;
+ req->rl_sendctx->sc_req = req;
+ __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
+
+ if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
+ return -EIO;
if (rtype != rpcrdma_areadch)
- if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype))
- goto out_map;
-
- return true;
-
-out_map:
- pr_err("rpcrdma: failed to DMA map a Send buffer\n");
- return false;
-}
-
-void
-rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
-{
- struct ib_device *device = ia->ri_device;
- struct ib_sge *sge;
- int count;
+ if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype))
+ return -EIO;
- sge = &req->rl_send_sge[2];
- for (count = req->rl_mapped_sges; count--; sge++)
- ib_dma_unmap_page(device, sge->addr, sge->length,
- DMA_TO_DEVICE);
- req->rl_mapped_sges = 0;
+ return 0;
}
/**
@@ -833,12 +873,10 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
transfertypes[rtype], transfertypes[wtype],
xdr_stream_pos(xdr));
- if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req,
- xdr_stream_pos(xdr),
- &rqst->rq_snd_buf, rtype)) {
- ret = -EIO;
+ ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
+ &rqst->rq_snd_buf, rtype);
+ if (ret)
goto out_err;
- }
return 0;
out_err:
@@ -970,14 +1008,13 @@ rpcrdma_mark_remote_invalidation(struct list_head *mws,
* straightforward to check the RPC header's direction field.
*/
static bool
-rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
- __be32 xid, __be32 proc)
+rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
{
struct xdr_stream *xdr = &rep->rr_stream;
__be32 *p;
- if (proc != rdma_msg)
+ if (rep->rr_proc != rdma_msg)
return false;
/* Peek at stream contents without advancing. */
@@ -992,7 +1029,7 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
return false;
/* RPC header */
- if (*p++ != xid)
+ if (*p++ != rep->rr_xid)
return false;
if (*p != cpu_to_be32(RPC_CALL))
return false;
@@ -1212,105 +1249,170 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
return -EREMOTEIO;
}
+/* Perform XID lookup, reconstruction of the RPC reply, and
+ * RPC completion while holding the transport lock to ensure
+ * the rep, rqst, and rq_task pointers remain stable.
+ */
+void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+ struct rpc_rqst *rqst = rep->rr_rqst;
+ unsigned long cwnd;
+ int status;
+
+ xprt->reestablish_timeout = 0;
+
+ switch (rep->rr_proc) {
+ case rdma_msg:
+ status = rpcrdma_decode_msg(r_xprt, rep, rqst);
+ break;
+ case rdma_nomsg:
+ status = rpcrdma_decode_nomsg(r_xprt, rep);
+ break;
+ case rdma_error:
+ status = rpcrdma_decode_error(r_xprt, rep, rqst);
+ break;
+ default:
+ status = -EIO;
+ }
+ if (status < 0)
+ goto out_badheader;
+
+out:
+ spin_lock(&xprt->recv_lock);
+ cwnd = xprt->cwnd;
+ xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
+ if (xprt->cwnd > cwnd)
+ xprt_release_rqst_cong(rqst->rq_task);
+
+ xprt_complete_rqst(rqst->rq_task, status);
+ xprt_unpin_rqst(rqst);
+ spin_unlock(&xprt->recv_lock);
+ return;
+
+/* If the incoming reply terminated a pending RPC, the next
+ * RPC call will post a replacement receive buffer as it is
+ * being marshaled.
+ */
+out_badheader:
+ dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
+ rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
+ r_xprt->rx_stats.bad_reply_count++;
+ status = -EIO;
+ goto out;
+}
+
+void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ /* Invalidate and unmap the data payloads before waking
+ * the waiting application. This guarantees the memory
+ * regions are properly fenced from the server before the
+ * application accesses the data. It also ensures proper
+ * send flow control: waking the next RPC waits until this
+ * RPC has relinquished all its Send Queue entries.
+ */
+ if (!list_empty(&req->rl_registered))
+ r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
+ &req->rl_registered);
+
+ /* Ensure that any DMA mapped pages associated with
+ * the Send of the RPC Call have been unmapped before
+ * allowing the RPC to complete. This protects argument
+ * memory not controlled by the RPC client from being
+ * re-used before we're done with it.
+ */
+ if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+ r_xprt->rx_stats.reply_waits_for_send++;
+ out_of_line_wait_on_bit(&req->rl_flags,
+ RPCRDMA_REQ_F_TX_RESOURCES,
+ bit_wait,
+ TASK_UNINTERRUPTIBLE);
+ }
+}
+
+/* Reply handling runs in the poll worker thread. Anything that
+ * might wait is deferred to a separate workqueue.
+ */
+void rpcrdma_deferred_completion(struct work_struct *work)
+{
+ struct rpcrdma_rep *rep =
+ container_of(work, struct rpcrdma_rep, rr_work);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
+
+ rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
+ rpcrdma_release_rqst(rep->rr_rxprt, req);
+ rpcrdma_complete_rqst(rep);
+}
+
/* Process received RPC/RDMA messages.
*
* Errors must result in the RPC task either being awakened, or
* allowed to timeout, to discover the errors at that time.
*/
-void
-rpcrdma_reply_handler(struct work_struct *work)
+void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
{
- struct rpcrdma_rep *rep =
- container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
- struct xdr_stream *xdr = &rep->rr_stream;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
- __be32 *p, xid, vers, proc;
- unsigned long cwnd;
- int status;
+ u32 credits;
+ __be32 *p;
dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus;
- xdr_init_decode(xdr, &rep->rr_hdrbuf,
+ xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
rep->rr_hdrbuf.head[0].iov_base);
/* Fixed transport header fields */
- p = xdr_inline_decode(xdr, 4 * sizeof(*p));
+ p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
if (unlikely(!p))
goto out_shortreply;
- xid = *p++;
- vers = *p++;
- p++; /* credits */
- proc = *p++;
+ rep->rr_xid = *p++;
+ rep->rr_vers = *p++;
+ credits = be32_to_cpu(*p++);
+ rep->rr_proc = *p++;
+
+ if (rep->rr_vers != rpcrdma_version)
+ goto out_badversion;
- if (rpcrdma_is_bcall(r_xprt, rep, xid, proc))
+ if (rpcrdma_is_bcall(r_xprt, rep))
return;
/* Match incoming rpcrdma_rep to an rpcrdma_req to
* get context for handling any incoming chunks.
*/
spin_lock(&xprt->recv_lock);
- rqst = xprt_lookup_rqst(xprt, xid);
+ rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
if (!rqst)
goto out_norqst;
xprt_pin_rqst(rqst);
+
+ if (credits == 0)
+ credits = 1; /* don't deadlock */
+ else if (credits > buf->rb_max_requests)
+ credits = buf->rb_max_requests;
+ buf->rb_credits = credits;
+
spin_unlock(&xprt->recv_lock);
+
req = rpcr_to_rdmar(rqst);
req->rl_reply = rep;
+ rep->rr_rqst = rqst;
+ clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
- __func__, rep, req, be32_to_cpu(xid));
-
- /* Invalidate and unmap the data payloads before waking the
- * waiting application. This guarantees the memory regions
- * are properly fenced from the server before the application
- * accesses the data. It also ensures proper send flow control:
- * waking the next RPC waits until this RPC has relinquished
- * all its Send Queue entries.
- */
- if (!list_empty(&req->rl_registered)) {
- rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
- r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
- &req->rl_registered);
- }
-
- xprt->reestablish_timeout = 0;
- if (vers != rpcrdma_version)
- goto out_badversion;
+ __func__, rep, req, be32_to_cpu(rep->rr_xid));
- switch (proc) {
- case rdma_msg:
- status = rpcrdma_decode_msg(r_xprt, rep, rqst);
- break;
- case rdma_nomsg:
- status = rpcrdma_decode_nomsg(r_xprt, rep);
- break;
- case rdma_error:
- status = rpcrdma_decode_error(r_xprt, rep, rqst);
- break;
- default:
- status = -EIO;
- }
- if (status < 0)
- goto out_badheader;
-
-out:
- spin_lock(&xprt->recv_lock);
- cwnd = xprt->cwnd;
- xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
- if (xprt->cwnd > cwnd)
- xprt_release_rqst_cong(rqst->rq_task);
-
- xprt_complete_rqst(rqst->rq_task, status);
- xprt_unpin_rqst(rqst);
- spin_unlock(&xprt->recv_lock);
- dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
- __func__, xprt, rqst, status);
+ if (list_empty(&req->rl_registered) &&
+ !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
+ rpcrdma_complete_rqst(rep);
+ else
+ queue_work(rpcrdma_receive_wq, &rep->rr_work);
return;
out_badstatus:
@@ -1321,37 +1423,22 @@ out_badstatus:
}
return;
-/* If the incoming reply terminated a pending RPC, the next
- * RPC call will post a replacement receive buffer as it is
- * being marshaled.
- */
out_badversion:
dprintk("RPC: %s: invalid version %d\n",
- __func__, be32_to_cpu(vers));
- status = -EIO;
- r_xprt->rx_stats.bad_reply_count++;
- goto out;
-
-out_badheader:
- dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
- rqst->rq_task->tk_pid, __func__, be32_to_cpu(proc));
- r_xprt->rx_stats.bad_reply_count++;
- status = -EIO;
- goto out;
+ __func__, be32_to_cpu(rep->rr_vers));
+ goto repost;
-/* The req was still available, but by the time the recv_lock
- * was acquired, the rqst and task had been released. Thus the RPC
- * has already been terminated.
+/* The RPC transaction has already been terminated, or the header
+ * is corrupt.
*/
out_norqst:
spin_unlock(&xprt->recv_lock);
dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
- __func__, be32_to_cpu(xid));
+ __func__, be32_to_cpu(rep->rr_xid));
goto repost;
out_shortreply:
dprintk("RPC: %s: short/invalid reply\n", __func__);
- goto repost;
/* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 992594b7cc6b..af7893501e40 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -133,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
if (ret)
goto out_err;
+ /* Bump page refcnt so Send completion doesn't release
+ * the rq_buffer before all retransmits are complete.
+ */
+ get_page(virt_to_page(rqst->rq_buffer));
ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
if (ret)
goto out_unmap;
@@ -165,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
return -EINVAL;
}
- /* svc_rdma_sendto releases this page */
page = alloc_page(RPCRDMA_DEF_GFP);
if (!page)
return -ENOMEM;
@@ -184,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
+ put_page(virt_to_page(rqst->rq_buffer));
kfree(rqst->rq_rbuffer);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5caf8e722a11..46ec069150d5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -290,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
ib_event_msg(event->event), event->event,
event->element.qp);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
break;
}
}
@@ -322,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
goto out;
- svc_xprt_enqueue(&xprt->sc_xprt);
- goto out;
+ goto out_enqueue;
flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR)
@@ -333,6 +333,8 @@ flushed:
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 1);
+out_enqueue:
+ svc_xprt_enqueue(&xprt->sc_xprt);
out:
svc_xprt_put(&xprt->sc_xprt);
}
@@ -358,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ svc_xprt_enqueue(&xprt->sc_xprt);
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
@@ -569,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
case RDMA_CM_EVENT_DEVICE_REMOVAL:
dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
xprt, cma_id);
- if (xprt)
+ if (xprt) {
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ svc_xprt_enqueue(&xprt->sc_xprt);
+ }
break;
default:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index c84e2b644e13..646c24494ea7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -678,16 +679,14 @@ xprt_rdma_free(struct rpc_task *task)
struct rpc_rqst *rqst = task->tk_rqstp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- if (req->rl_backchannel)
+ if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
return;
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
- if (!list_empty(&req->rl_registered))
- ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
- rpcrdma_unmap_sges(ia, req);
+ if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
+ rpcrdma_release_rqst(r_xprt, req);
rpcrdma_buffer_put(req);
}
@@ -728,7 +727,8 @@ xprt_rdma_send_request(struct rpc_task *task)
/* On retransmit, remove any previously registered chunks */
if (unlikely(!list_empty(&req->rl_registered)))
- r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
+ r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
+ &req->rl_registered);
rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0)
@@ -742,6 +742,7 @@ xprt_rdma_send_request(struct rpc_task *task)
goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie;
+ set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
@@ -789,11 +790,13 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
r_xprt->rx_stats.failed_marshal_count,
r_xprt->rx_stats.bad_reply_count,
r_xprt->rx_stats.nomsg_call_count);
- seq_printf(seq, "%lu %lu %lu %lu\n",
+ seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
r_xprt->rx_stats.mrs_recovered,
r_xprt->rx_stats.mrs_orphaned,
r_xprt->rx_stats.mrs_allocated,
- r_xprt->rx_stats.local_inv_needed);
+ r_xprt->rx_stats.local_inv_needed,
+ r_xprt->rx_stats.empty_sendctx_q,
+ r_xprt->rx_stats.reply_waits_for_send);
}
static int
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 11a1fbf7e59e..710b3f77db82 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -49,9 +50,10 @@
#include <linux/interrupt.h>
#include <linux/slab.h>
-#include <linux/prefetch.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc_rdma.h>
+
+#include <asm-generic/barrier.h>
#include <asm/bitops.h>
#include <rdma/ib_cm.h>
@@ -73,7 +75,7 @@ static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
-static struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
+struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
int
rpcrdma_alloc_wq(void)
@@ -126,30 +128,17 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
static void
rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_sendctx *sc =
+ container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
+
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
-}
-
-/* Perform basic sanity checking to avoid using garbage
- * to update the credit grant value.
- */
-static void
-rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
-{
- struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
- __be32 *p = rep->rr_rdmabuf->rg_base;
- u32 credits;
- credits = be32_to_cpup(p + 2);
- if (credits == 0)
- credits = 1; /* don't deadlock */
- else if (credits > buffer->rb_max_requests)
- credits = buffer->rb_max_requests;
-
- atomic_set(&buffer->rb_credits, credits);
+ rpcrdma_sendctx_put_locked(sc);
}
/**
@@ -181,11 +170,8 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rdmab_addr(rep->rr_rdmabuf),
wc->byte_len, DMA_FROM_DEVICE);
- if (wc->byte_len >= RPCRDMA_HDRLEN_ERR)
- rpcrdma_update_granted_credits(rep);
-
out_schedule:
- queue_work(rpcrdma_receive_wq, &rep->rr_work);
+ rpcrdma_reply_handler(rep);
return;
out_fail:
@@ -295,7 +281,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DISCONNECTED:
connstate = -ECONNABORTED;
connected:
- atomic_set(&xprt->rx_buf.rb_credits, 1);
+ xprt->rx_buf.rb_credits = 1;
ep->rep_connected = connstate;
rpcrdma_conn_func(ep);
wake_up_all(&ep->rep_connect_wait);
@@ -564,16 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_sge);
/* set trigger for requesting send completion */
- ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
- if (ep->rep_cqinit <= 2)
- ep->rep_cqinit = 0; /* always signal? */
- rpcrdma_init_cqcount(ep, 0);
+ ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
+ cdata->max_requests >> 2);
+ ep->rep_send_count = ep->rep_send_batch;
init_waitqueue_head(&ep->rep_connect_wait);
INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
sendcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_send_wr + 1,
- 0, IB_POLL_SOFTIRQ);
+ 1, IB_POLL_WORKQUEUE);
if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq);
dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -583,7 +568,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
recvcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_recv_wr + 1,
- 0, IB_POLL_SOFTIRQ);
+ 0, IB_POLL_WORKQUEUE);
if (IS_ERR(recvcq)) {
rc = PTR_ERR(recvcq);
dprintk("RPC: %s: failed to create recv CQ: %i\n",
@@ -846,6 +831,168 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
ib_drain_qp(ia->ri_id->qp);
}
+/* Fixed-size circular FIFO queue. This implementation is wait-free and
+ * lock-free.
+ *
+ * Consumer is the code path that posts Sends. This path dequeues a
+ * sendctx for use by a Send operation. Multiple consumer threads
+ * are serialized by the RPC transport lock, which allows only one
+ * ->send_request call at a time.
+ *
+ * Producer is the code path that handles Send completions. This path
+ * enqueues a sendctx that has been completed. Multiple producer
+ * threads are serialized by the ib_poll_cq() function.
+ */
+
+/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
+ * queue activity, and ib_drain_qp has flushed all remaining Send
+ * requests.
+ */
+static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
+{
+ unsigned long i;
+
+ for (i = 0; i <= buf->rb_sc_last; i++)
+ kfree(buf->rb_sc_ctxs[i]);
+ kfree(buf->rb_sc_ctxs);
+}
+
+static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_sendctx *sc;
+
+ sc = kzalloc(sizeof(*sc) +
+ ia->ri_max_send_sges * sizeof(struct ib_sge),
+ GFP_KERNEL);
+ if (!sc)
+ return NULL;
+
+ sc->sc_wr.wr_cqe = &sc->sc_cqe;
+ sc->sc_wr.sg_list = sc->sc_sges;
+ sc->sc_wr.opcode = IB_WR_SEND;
+ sc->sc_cqe.done = rpcrdma_wc_send;
+ return sc;
+}
+
+static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_sendctx *sc;
+ unsigned long i;
+
+ /* Maximum number of concurrent outstanding Send WRs. Capping
+ * the circular queue size stops Send Queue overflow by causing
+ * the ->send_request call to fail temporarily before too many
+ * Sends are posted.
+ */
+ i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS;
+ dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i);
+ buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL);
+ if (!buf->rb_sc_ctxs)
+ return -ENOMEM;
+
+ buf->rb_sc_last = i - 1;
+ for (i = 0; i <= buf->rb_sc_last; i++) {
+ sc = rpcrdma_sendctx_create(&r_xprt->rx_ia);
+ if (!sc)
+ goto out_destroy;
+
+ sc->sc_xprt = r_xprt;
+ buf->rb_sc_ctxs[i] = sc;
+ }
+
+ return 0;
+
+out_destroy:
+ rpcrdma_sendctxs_destroy(buf);
+ return -ENOMEM;
+}
+
+/* The sendctx queue is not guaranteed to have a size that is a
+ * power of two, thus the helpers in circ_buf.h cannot be used.
+ * The other option is to use modulus (%), which can be expensive.
+ */
+static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
+ unsigned long item)
+{
+ return likely(item < buf->rb_sc_last) ? item + 1 : 0;
+}
+
+/**
+ * rpcrdma_sendctx_get_locked - Acquire a send context
+ * @buf: transport buffers from which to acquire an unused context
+ *
+ * Returns pointer to a free send completion context; or NULL if
+ * the queue is empty.
+ *
+ * Usage: Called to acquire an SGE array before preparing a Send WR.
+ *
+ * The caller serializes calls to this function (per rpcrdma_buffer),
+ * and provides an effective memory barrier that flushes the new value
+ * of rb_sc_head.
+ */
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_xprt *r_xprt;
+ struct rpcrdma_sendctx *sc;
+ unsigned long next_head;
+
+ next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head);
+
+ if (next_head == READ_ONCE(buf->rb_sc_tail))
+ goto out_emptyq;
+
+ /* ORDER: item must be accessed _before_ head is updated */
+ sc = buf->rb_sc_ctxs[next_head];
+
+ /* Releasing the lock in the caller acts as a memory
+ * barrier that flushes rb_sc_head.
+ */
+ buf->rb_sc_head = next_head;
+
+ return sc;
+
+out_emptyq:
+ /* The queue is "empty" if there have not been enough Send
+ * completions recently. This is a sign the Send Queue is
+ * backing up. Cause the caller to pause and try again.
+ */
+ dprintk("RPC: %s: empty sendctx queue\n", __func__);
+ r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
+ r_xprt->rx_stats.empty_sendctx_q++;
+ return NULL;
+}
+
+/**
+ * rpcrdma_sendctx_put_locked - Release a send context
+ * @sc: send context to release
+ *
+ * Usage: Called from Send completion to return a sendctxt
+ * to the queue.
+ *
+ * The caller serializes calls to this function (per rpcrdma_buffer).
+ */
+void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
+{
+ struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
+ unsigned long next_tail;
+
+ /* Unmap SGEs of previously completed by unsignaled
+ * Sends by walking up the queue until @sc is found.
+ */
+ next_tail = buf->rb_sc_tail;
+ do {
+ next_tail = rpcrdma_sendctx_next(buf, next_tail);
+
+ /* ORDER: item must be accessed _before_ tail is updated */
+ rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]);
+
+ } while (buf->rb_sc_ctxs[next_tail] != sc);
+
+ /* Paired with READ_ONCE */
+ smp_store_release(&buf->rb_sc_tail, next_tail);
+}
+
static void
rpcrdma_mr_recovery_worker(struct work_struct *work)
{
@@ -941,13 +1088,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
spin_lock(&buffer->rb_reqslock);
list_add(&req->rl_all, &buffer->rb_allreqs);
spin_unlock(&buffer->rb_reqslock);
- req->rl_cqe.done = rpcrdma_wc_send;
req->rl_buffer = &r_xprt->rx_buf;
INIT_LIST_HEAD(&req->rl_registered);
- req->rl_send_wr.next = NULL;
- req->rl_send_wr.wr_cqe = &req->rl_cqe;
- req->rl_send_wr.sg_list = req->rl_send_sge;
- req->rl_send_wr.opcode = IB_WR_SEND;
return req;
}
@@ -974,7 +1116,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rep->rr_cqe.done = rpcrdma_wc_receive;
rep->rr_rxprt = r_xprt;
- INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
+ INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion);
rep->rr_recv_wr.next = NULL;
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
@@ -995,7 +1137,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_bc_srv_max_requests = 0;
- atomic_set(&buf->rb_credits, 1);
spin_lock_init(&buf->rb_mwlock);
spin_lock_init(&buf->rb_lock);
spin_lock_init(&buf->rb_recovery_lock);
@@ -1022,7 +1163,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
rc = PTR_ERR(req);
goto out;
}
- req->rl_backchannel = false;
list_add(&req->rl_list, &buf->rb_send_bufs);
}
@@ -1040,6 +1180,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
list_add(&rep->rr_list, &buf->rb_recv_bufs);
}
+ rc = rpcrdma_sendctxs_create(r_xprt);
+ if (rc)
+ goto out;
+
return 0;
out:
rpcrdma_buffer_destroy(buf);
@@ -1116,6 +1260,8 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
cancel_delayed_work_sync(&buf->rb_recovery_worker);
cancel_delayed_work_sync(&buf->rb_refresh_worker);
+ rpcrdma_sendctxs_destroy(buf);
+
while (!list_empty(&buf->rb_recv_bufs)) {
struct rpcrdma_rep *rep;
@@ -1231,7 +1377,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
struct rpcrdma_buffer *buffers = req->rl_buffer;
struct rpcrdma_rep *rep = req->rl_reply;
- req->rl_send_wr.num_sge = 0;
req->rl_reply = NULL;
spin_lock(&buffers->rb_lock);
@@ -1363,7 +1508,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct rpcrdma_ep *ep,
struct rpcrdma_req *req)
{
- struct ib_send_wr *send_wr = &req->rl_send_wr;
+ struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
struct ib_send_wr *send_wr_fail;
int rc;
@@ -1377,7 +1522,14 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
dprintk("RPC: %s: posting %d s/g entries\n",
__func__, send_wr->num_sge);
- rpcrdma_set_signaled(ep, send_wr);
+ if (!ep->rep_send_count ||
+ test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+ send_wr->send_flags |= IB_SEND_SIGNALED;
+ ep->rep_send_count = ep->rep_send_batch;
+ } else {
+ send_wr->send_flags &= ~IB_SEND_SIGNALED;
+ --ep->rep_send_count;
+ }
rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
if (rc)
goto out_postsend_err;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e26a97d2f922..51686d9eac5f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -93,8 +94,8 @@ enum {
*/
struct rpcrdma_ep {
- atomic_t rep_cqcount;
- int rep_cqinit;
+ unsigned int rep_send_count;
+ unsigned int rep_send_batch;
int rep_connected;
struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait;
@@ -104,25 +105,6 @@ struct rpcrdma_ep {
struct delayed_work rep_connect_worker;
};
-static inline void
-rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count)
-{
- atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count);
-}
-
-/* To update send queue accounting, provider must take a
- * send completion every now and then.
- */
-static inline void
-rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr)
-{
- send_wr->send_flags = 0;
- if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) {
- rpcrdma_init_cqcount(ep, 0);
- send_wr->send_flags = IB_SEND_SIGNALED;
- }
-}
-
/* Pre-allocate extra Work Requests for handling backward receives
* and sends. This is a fixed value because the Work Queues are
* allocated when the forward channel is set up.
@@ -164,12 +146,6 @@ rdmab_lkey(struct rpcrdma_regbuf *rb)
return rb->rg_iov.lkey;
}
-static inline struct rpcrdma_msg *
-rdmab_to_msg(struct rpcrdma_regbuf *rb)
-{
- return (struct rpcrdma_msg *)rb->rg_base;
-}
-
static inline struct ib_device *
rdmab_device(struct rpcrdma_regbuf *rb)
{
@@ -202,22 +178,24 @@ enum {
};
/*
- * struct rpcrdma_rep -- this structure encapsulates state required to recv
- * and complete a reply, asychronously. It needs several pieces of
- * state:
- * o recv buffer (posted to provider)
- * o ib_sge (also donated to provider)
- * o status of reply (length, success or not)
- * o bookkeeping state to get run by reply handler (list, etc)
+ * struct rpcrdma_rep -- this structure encapsulates state required
+ * to receive and complete an RPC Reply, asychronously. It needs
+ * several pieces of state:
*
- * These are allocated during initialization, per-transport instance.
+ * o receive buffer and ib_sge (donated to provider)
+ * o status of receive (success or not, length, inv rkey)
+ * o bookkeeping state to get run by reply handler (XDR stream)
*
- * N of these are associated with a transport instance, and stored in
- * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ * These structures are allocated during transport initialization.
+ * N of these are associated with a transport instance, managed by
+ * struct rpcrdma_buffer. N is the max number of outstanding RPCs.
*/
struct rpcrdma_rep {
struct ib_cqe rr_cqe;
+ __be32 rr_xid;
+ __be32 rr_vers;
+ __be32 rr_proc;
int rr_wc_flags;
u32 rr_inv_rkey;
struct rpcrdma_regbuf *rr_rdmabuf;
@@ -225,10 +203,34 @@ struct rpcrdma_rep {
struct work_struct rr_work;
struct xdr_buf rr_hdrbuf;
struct xdr_stream rr_stream;
+ struct rpc_rqst *rr_rqst;
struct list_head rr_list;
struct ib_recv_wr rr_recv_wr;
};
+/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
+ */
+struct rpcrdma_req;
+struct rpcrdma_xprt;
+struct rpcrdma_sendctx {
+ struct ib_send_wr sc_wr;
+ struct ib_cqe sc_cqe;
+ struct rpcrdma_xprt *sc_xprt;
+ struct rpcrdma_req *sc_req;
+ unsigned int sc_unmap_count;
+ struct ib_sge sc_sges[];
+};
+
+/* Limit the number of SGEs that can be unmapped during one
+ * Send completion. This caps the amount of work a single
+ * completion can do before returning to the provider.
+ *
+ * Setting this to zero disables Send completion batching.
+ */
+enum {
+ RPCRDMA_MAX_SEND_BATCH = 7,
+};
+
/*
* struct rpcrdma_mw - external memory region metadata
*
@@ -340,26 +342,30 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
- unsigned int rl_mapped_sges;
unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream;
struct xdr_buf rl_hdrbuf;
- struct ib_send_wr rl_send_wr;
- struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES];
+ struct rpcrdma_sendctx *rl_sendctx;
struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */
struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */
struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
- struct ib_cqe rl_cqe;
struct list_head rl_all;
- bool rl_backchannel;
+ unsigned long rl_flags;
struct list_head rl_registered; /* registered segments */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
};
+/* rl_flags */
+enum {
+ RPCRDMA_REQ_F_BACKCHANNEL = 0,
+ RPCRDMA_REQ_F_PENDING,
+ RPCRDMA_REQ_F_TX_RESOURCES,
+};
+
static inline void
rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
{
@@ -399,12 +405,17 @@ struct rpcrdma_buffer {
struct list_head rb_mws;
struct list_head rb_all;
+ unsigned long rb_sc_head;
+ unsigned long rb_sc_tail;
+ unsigned long rb_sc_last;
+ struct rpcrdma_sendctx **rb_sc_ctxs;
+
spinlock_t rb_lock; /* protect buf lists */
int rb_send_count, rb_recv_count;
struct list_head rb_send_bufs;
struct list_head rb_recv_bufs;
u32 rb_max_requests;
- atomic_t rb_credits; /* most recent credit grant */
+ u32 rb_credits; /* most recent credit grant */
u32 rb_bc_srv_max_requests;
spinlock_t rb_reqslock; /* protect rb_allreqs */
@@ -453,10 +464,12 @@ struct rpcrdma_stats {
unsigned long mrs_recovered;
unsigned long mrs_orphaned;
unsigned long mrs_allocated;
+ unsigned long empty_sendctx_q;
/* accessed when receiving a reply */
unsigned long long total_rdma_reply;
unsigned long long fixup_copy_count;
+ unsigned long reply_waits_for_send;
unsigned long local_inv_needed;
unsigned long nomsg_call_count;
unsigned long bcall_count;
@@ -473,8 +486,6 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mw **);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *);
- void (*ro_unmap_safe)(struct rpcrdma_xprt *,
- struct rpcrdma_req *, bool);
void (*ro_recover_mr)(struct rpcrdma_mw *);
int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *,
@@ -532,6 +543,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *);
bool fmr_is_supported(struct rpcrdma_ia *);
+extern struct workqueue_struct *rpcrdma_receive_wq;
+
/*
* Endpoint calls - xprtrdma/verbs.c
*/
@@ -554,6 +567,8 @@ struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
+void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
@@ -610,12 +625,18 @@ enum rpcrdma_chunktype {
rpcrdma_replych
};
-bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *,
- u32, struct xdr_buf *, enum rpcrdma_chunktype);
-void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *);
+int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_req *req, u32 hdrlen,
+ struct xdr_buf *xdr,
+ enum rpcrdma_chunktype rtype);
+void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc);
int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
-void rpcrdma_reply_handler(struct work_struct *work);
+void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
+void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
+void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_req *req);
+void rpcrdma_deferred_completion(struct work_struct *work);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
{
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4dad5da388d6..9cc850c2719e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -552,6 +552,7 @@ static int xs_local_send_request(struct rpc_task *task)
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
+ /* fall through */
case -EPIPE:
xs_close(xprt);
status = -ENOTCONN;
@@ -1611,6 +1612,7 @@ static void xs_tcp_state_change(struct sock *sk)
xprt->connect_cookie++;
clear_bit(XPRT_CONNECTED, &xprt->state);
xs_tcp_force_close(xprt);
+ /* fall through */
case TCP_CLOSING:
/*
* If the server closed down the connection, make sure that
@@ -2368,6 +2370,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
switch (ret) {
case 0:
xs_set_srcport(transport, sock);
+ /* fall through */
case -EINPROGRESS:
/* SYN_SENT! */
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
@@ -2419,6 +2422,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
default:
printk("%s: connect returned unhandled error %d\n",
__func__, status);
+ /* fall through */
case -EADDRNOTAVAIL:
/* We're probably in TIME_WAIT. Get rid of existing socket,
* and retry
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 7821085a7dd8..12777cac638a 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -539,8 +539,8 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
if (leave) {
- tipc_group_delete_member(grp, m);
__skb_queue_purge(defq);
+ tipc_group_delete_member(grp, m);
break;
}
if (!update)
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 1649d456e22d..b0d07b35909d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -174,7 +174,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == LAST_FRAGMENT) {
TIPC_SKB_CB(head)->validated = false;
- if (unlikely(!tipc_msg_validate(head)))
+ if (unlikely(!tipc_msg_validate(&head)))
goto err;
*buf = head;
TIPC_SKB_CB(head)->tail = NULL;
@@ -201,11 +201,21 @@ err:
* TIPC will ignore the excess, under the assumption that it is optional info
* introduced by a later release of the protocol.
*/
-bool tipc_msg_validate(struct sk_buff *skb)
+bool tipc_msg_validate(struct sk_buff **_skb)
{
- struct tipc_msg *msg;
+ struct sk_buff *skb = *_skb;
+ struct tipc_msg *hdr;
int msz, hsz;
+ /* Ensure that flow control ratio condition is satisfied */
+ if (unlikely(skb->truesize / buf_roundup_len(skb) > 4)) {
+ skb = skb_copy(skb, GFP_ATOMIC);
+ if (!skb)
+ return false;
+ kfree_skb(*_skb);
+ *_skb = skb;
+ }
+
if (unlikely(TIPC_SKB_CB(skb)->validated))
return true;
if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE)))
@@ -217,11 +227,11 @@ bool tipc_msg_validate(struct sk_buff *skb)
if (unlikely(!pskb_may_pull(skb, hsz)))
return false;
- msg = buf_msg(skb);
- if (unlikely(msg_version(msg) != TIPC_VERSION))
+ hdr = buf_msg(skb);
+ if (unlikely(msg_version(hdr) != TIPC_VERSION))
return false;
- msz = msg_size(msg);
+ msz = msg_size(hdr);
if (unlikely(msz < hsz))
return false;
if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE))
@@ -411,7 +421,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
skb_pull(*iskb, offset);
imsz = msg_size(buf_msg(*iskb));
skb_trim(*iskb, imsz);
- if (unlikely(!tipc_msg_validate(*iskb)))
+ if (unlikely(!tipc_msg_validate(iskb)))
goto none;
*pos += align(imsz);
return true;
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index bf8f57ccc70c..3e4384c222f7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -926,7 +926,7 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
}
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
-bool tipc_msg_validate(struct sk_buff *skb);
+bool tipc_msg_validate(struct sk_buff **_skb);
bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
struct sk_buff_head *xmitq);
@@ -954,6 +954,11 @@ static inline u16 buf_seqno(struct sk_buff *skb)
return msg_seqno(buf_msg(skb));
}
+static inline int buf_roundup_len(struct sk_buff *skb)
+{
+ return (skb->len / 1024 + 1) * 1024;
+}
+
/* tipc_skb_peek(): peek and reserve first buffer in list
* @list: list to be peeked in
* Returns pointer to first buffer in list, if any
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 009a81631280..507017fe0f1b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1539,7 +1539,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
__skb_queue_head_init(&xmitq);
/* Ensure message is well-formed before touching the header */
- if (unlikely(!tipc_msg_validate(skb)))
+ if (unlikely(!tipc_msg_validate(&skb)))
goto discard;
hdr = buf_msg(skb);
usr = msg_user(hdr);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bb16f1ec766e..b1ac23ca20c8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2605,10 +2605,32 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
goto nla_put_failure;
}
- if (wdev->ssid_len) {
- if (nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
+ wdev_lock(wdev);
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ if (wdev->ssid_len &&
+ nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
goto nla_put_failure;
+ break;
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ case NL80211_IFTYPE_ADHOC: {
+ const u8 *ssid_ie;
+ if (!wdev->current_bss)
+ break;
+ ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub,
+ WLAN_EID_SSID);
+ if (!ssid_ie)
+ break;
+ if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
+ goto nla_put_failure;
+ break;
+ }
+ default:
+ /* nothing */
+ break;
}
+ wdev_unlock(wdev);
genlmsg_end(msg, hdr);
return 0;
@@ -6291,7 +6313,7 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
if (!hdr)
return -1;
- genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+ genl_dump_check_consistent(cb, hdr);
if (nl80211_put_regdom(regdom, msg))
goto nla_put_failure;
@@ -7722,7 +7744,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
if (!hdr)
return -1;
- genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+ genl_dump_check_consistent(cb, hdr);
if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation))
goto nla_put_failure;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 3871998059de..78e71b0390be 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3644,27 +3644,14 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
}
}
-int __init regulatory_init(void)
+static int __init regulatory_init_db(void)
{
- int err = 0;
+ int err;
err = load_builtin_regdb_keys();
if (err)
return err;
- reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
- if (IS_ERR(reg_pdev))
- return PTR_ERR(reg_pdev);
-
- spin_lock_init(&reg_requests_lock);
- spin_lock_init(&reg_pending_beacons_lock);
- spin_lock_init(&reg_indoor_lock);
-
- rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
-
- user_alpha2[0] = '9';
- user_alpha2[1] = '7';
-
/* We always try to get an update for the static regdomain */
err = regulatory_hint_core(cfg80211_world_regdom->alpha2);
if (err) {
@@ -3692,6 +3679,31 @@ int __init regulatory_init(void)
return 0;
}
+#ifndef MODULE
+late_initcall(regulatory_init_db);
+#endif
+
+int __init regulatory_init(void)
+{
+ reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
+ if (IS_ERR(reg_pdev))
+ return PTR_ERR(reg_pdev);
+
+ spin_lock_init(&reg_requests_lock);
+ spin_lock_init(&reg_pending_beacons_lock);
+ spin_lock_init(&reg_indoor_lock);
+
+ rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
+
+ user_alpha2[0] = '9';
+ user_alpha2[1] = '7';
+
+#ifdef MODULE
+ return regulatory_init_db();
+#else
+ return 0;
+#endif
+}
void regulatory_exit(void)
{
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2f57722f5d03..9542975eb2f9 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1305,6 +1305,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
newp->xfrm_nr = old->xfrm_nr;
newp->index = old->index;
newp->type = old->type;
+ newp->family = old->family;
memcpy(newp->xfrm_vec, old->xfrm_vec,
newp->xfrm_nr*sizeof(struct xfrm_tmpl));
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1360,29 +1361,36 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
struct net *net = xp_net(policy);
int nx;
int i, error;
+ xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
+ xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
xfrm_address_t tmp;
for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
struct xfrm_state *x;
- xfrm_address_t *local;
- xfrm_address_t *remote;
+ xfrm_address_t *remote = daddr;
+ xfrm_address_t *local = saddr;
struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
- remote = &tmpl->id.daddr;
- local = &tmpl->saddr;
- if (xfrm_addr_any(local, tmpl->encap_family)) {
- error = xfrm_get_saddr(net, fl->flowi_oif,
- &tmp, remote,
- tmpl->encap_family, 0);
- if (error)
- goto fail;
- local = &tmp;
+ if (tmpl->mode == XFRM_MODE_TUNNEL ||
+ tmpl->mode == XFRM_MODE_BEET) {
+ remote = &tmpl->id.daddr;
+ local = &tmpl->saddr;
+ if (xfrm_addr_any(local, tmpl->encap_family)) {
+ error = xfrm_get_saddr(net, fl->flowi_oif,
+ &tmp, remote,
+ tmpl->encap_family, 0);
+ if (error)
+ goto fail;
+ local = &tmp;
+ }
}
x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
if (x && x->km.state == XFRM_STATE_VALID) {
xfrm[nx++] = x;
+ daddr = remote;
+ saddr = local;
continue;
}
if (x) {