From dde0a648fc00e2156a3358600c5fbfb3f53256ac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 2 May 2020 19:54:18 -0700 Subject: net_sched: sch_fq: avoid touching f->next from fq_gc() A significant amount of cpu cycles is spent in fq_gc() When fq_gc() does its lookup in the rb-tree, it needs the following fields from struct fq_flow : f->sk (lookup key in the rb-tree) f->fq_node (anchor in the rb-tree) f->next (used to determine if the flow is detached) f->age (used to determine if the flow is candidate for gc) This unfortunately spans two cache lines (assuming 64 bytes cache lines) We can avoid using f->next, if we use the low order bit of f->{age|tail} This low order bit is 0, if f->tail points to an sk_buff. We set the low order bit to 1, if the union contains a jiffies value. Combined with the following patch, this makes sure we only need to bring into cpu caches one cache line per flow. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'net/sched/sch_fq.c') diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 4c060134c736..bc9ca1ba507b 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -70,14 +70,14 @@ struct fq_flow { struct sk_buff *head; /* list of skbs for this flow : first skb */ union { struct sk_buff *tail; /* last skb in the list */ - unsigned long age; /* jiffies when flow was emptied, for gc */ + unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */ }; struct rb_node fq_node; /* anchor in fq_root[] trees */ struct sock *sk; int qlen; /* number of packets in flow queue */ int credit; u32 socket_hash; /* sk_hash */ - struct fq_flow *next; /* next pointer in RR lists, or &detached */ + struct fq_flow *next; /* next pointer in RR lists */ struct rb_node rate_node; /* anchor in q->delayed tree */ u64 time_next_packet; @@ -126,20 +126,25 @@ struct fq_sched_data { struct qdisc_watchdog watchdog; }; -/* special value to mark a detached flow (not on old/new list) */ -static struct fq_flow detached, throttled; - +/* + * f->tail and f->age share the same location. + * We can use the low order bit to differentiate if this location points + * to a sk_buff or contains a jiffies value, if we force this value to be odd. + * This assumes f->tail low order bit must be 0 since alignof(struct sk_buff) >= 2 + */ static void fq_flow_set_detached(struct fq_flow *f) { - f->next = &detached; - f->age = jiffies; + f->age = jiffies | 1UL; } static bool fq_flow_is_detached(const struct fq_flow *f) { - return f->next == &detached; + return !!(f->age & 1UL); } +/* special value to mark a throttled flow (not on old/new list) */ +static struct fq_flow throttled; + static bool fq_flow_is_throttled(const struct fq_flow *f) { return f->next == &throttled; -- cgit