summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2019-05-04 16:48:54 -0700
committerDavid S. Miller <davem@davemloft.net>2019-05-07 12:09:25 -0700
commit37c0aead7902b1ddf1b668e1ab74c80b9a7fd183 (patch)
tree0fe6429a54468c277bae91eaa708ad31c70d2be1
parenteeb84aa0d0aff3177c93397cdc62be87e54af486 (diff)
net_sched: sch_fq: handle non connected flows
FQ packet scheduler assumed that packets could be classified based on their owning socket. This means that if a UDP server uses one UDP socket to send packets to different destinations, packets all land in one FQ flow. This is unfair, since each TCP flow has a unique bucket, meaning that in case of pressure (fully utilised uplink), TCP flows have more share of the bandwidth. If we instead detect unconnected sockets, we can use a stochastic hash based on the 4-tuple hash. This also means a QUIC server using one UDP socket will properly spread the outgoing packets to different buckets, and in-kernel pacing based on EDT model will no longer risk having big rb-tree on one flow. Note that UDP application might provide the skb->hash in an ancillary message at sendmsg() time to avoid the cost of a dissection in fq packet scheduler. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/sched/sch_fq.c15
1 files changed, 13 insertions, 2 deletions
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index ee138365ec45..26a94e5cd5df 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -270,6 +270,17 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
*/
sk = (struct sock *)((hash << 1) | 1UL);
skb_orphan(skb);
+ } else if (sk->sk_state == TCP_CLOSE) {
+ unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
+ /*
+ * Sockets in TCP_CLOSE are non connected.
+ * Typical use case is UDP sockets, they can send packets
+ * with sendto() to many different destinations.
+ * We probably could use a generic bit advertising
+ * non connected sockets, instead of sk_state == TCP_CLOSE,
+ * if we care enough.
+ */
+ sk = (struct sock *)((hash << 1) | 1UL);
}
root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)];
@@ -290,7 +301,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
* It not, we need to refill credit with
* initial quantum
*/
- if (unlikely(skb->sk &&
+ if (unlikely(skb->sk == sk &&
f->socket_hash != sk->sk_hash)) {
f->credit = q->initial_quantum;
f->socket_hash = sk->sk_hash;
@@ -315,7 +326,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
fq_flow_set_detached(f);
f->sk = sk;
- if (skb->sk)
+ if (skb->sk == sk)
f->socket_hash = sk->sk_hash;
f->credit = q->initial_quantum;