summaryrefslogtreecommitdiff
path: root/include/linux/skb_array.h
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-06-15 13:58:34 -0700
committerDavid S. Miller <davem@davemloft.net>2016-06-15 13:58:34 -0700
commit829e64d16026d381528fe08a65e45942297833c6 (patch)
treeae127906844437808567a4bfccd6d465810c3db3 /include/linux/skb_array.h
parentb2313077ed0db35ee186905d8076a737248edd24 (diff)
parent7d7072e3bad5569f636d3c54a36da40976bfd505 (diff)
Merge branch 'skb_array'
Michael S. Tsirkin says: ==================== skb_array: array based FIFO for skbs This is in response to the proposal by Jason to make tun rx packet queue lockless using a circular buffer. My testing seems to show that at least for the common usecase in networking, which isn't lockless, circular buffer with indices does not perform that well, because each index access causes a cache line to bounce between CPUs, and index access causes stalls due to the dependency. By comparison, an array of pointers where NULL means invalid and !NULL means valid, can be updated without messing up barriers at all and does not have this issue. On the flip side, cache pressure may be caused by using large queues. tun has a queue of 1000 entries by default and that's 8K. At this point I'm not sure this can be solved efficiently. The correct solution might be sizing the queues appropriately. Here's an implementation of this idea: it can be used more or less whenever sk_buff_head can be used, except you need to know the queue size in advance. As this might be useful outside of networking, I implemented a generic array of void pointers, with a type-safe wrapper for skbs. It remains to be seen whether resizing is required, in case it is I included patches implementing resizing by holding both the consumer and the producer locks. I think this code works fine without any extra memory barriers since we always read and write the same location, so the accesses can not be reordered. Multiple writes of the same value into memory would mess things up for us, I don't think compilers would do it though. But if people feel it's better to be safe wrt compiler optimizations, specifying queue as volatile would probably do it in a cleaner way than converting all accesses to READ_ONCE/WRITE_ONCE. Thoughts? The only issue is with calls within a loop using the __ptr_ring_XXX accessors - in theory compiler could hoist accesses out of the loop. Following volatile-considered-harmful.txt I merely documented that callers that busy-poll should invoke cpu_relax(). Most people will use the external skb_array_XXX APIs with a spinlock, so this should not be an issue for them. Eric Dumazet suggested adding an extra pointer to skb for when we have a single outstanding packet. I could not figure out a way to implement this without a shared consumer/producer lock though, which would cause cache line bounces by itself. Jesper, Jason, I know that both of you tested this, please post Tested-by tags for whatever was tested. changes since v7 fix typos noticed by Jesper Brouer changes since v6 resize implemented. peek/full calls are no longer lockless replaced _FIELD macros with _CALL which invoke a function on the pointer rather than just returning a value destroy now scans the array and frees all queued skbs changes since v5 implemented a generic ptr_ring api, and made skb_array a type-safe wrapper apis for taking the spinlock in different contexts following expected usecase in tun changes since v4 (v3 was never posted) documentation dropped SKB_ARRAY_MIN_SIZE heuristic unit test (in userspace, included as patch 2) changes since v2: fixed integer overflow pointed out by Eric. added some comments. changes since v1: fixed bug pointed out by Eric. ==================== Tested-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux/skb_array.h')
-rw-r--r--include/linux/skb_array.h169
1 files changed, 169 insertions, 0 deletions
diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
new file mode 100644
index 000000000000..678bfbf78ac4
--- /dev/null
+++ b/include/linux/skb_array.h
@@ -0,0 +1,169 @@
+/*
+ * Definitions for the 'struct skb_array' datastructure.
+ *
+ * Author:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * Limited-size FIFO of skbs. Can be used more or less whenever
+ * sk_buff_head can be used, except you need to know the queue size in
+ * advance.
+ * Implemented as a type-safe wrapper around ptr_ring.
+ */
+
+#ifndef _LINUX_SKB_ARRAY_H
+#define _LINUX_SKB_ARRAY_H 1
+
+#ifdef __KERNEL__
+#include <linux/ptr_ring.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#endif
+
+struct skb_array {
+ struct ptr_ring ring;
+};
+
+/* Might be slightly faster than skb_array_full below, but callers invoking
+ * this in a loop must use a compiler barrier, for example cpu_relax().
+ */
+static inline bool __skb_array_full(struct skb_array *a)
+{
+ return __ptr_ring_full(&a->ring);
+}
+
+static inline bool skb_array_full(struct skb_array *a)
+{
+ return ptr_ring_full(&a->ring);
+}
+
+static inline int skb_array_produce(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce(&a->ring, skb);
+}
+
+static inline int skb_array_produce_irq(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce_irq(&a->ring, skb);
+}
+
+static inline int skb_array_produce_bh(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce_bh(&a->ring, skb);
+}
+
+static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb)
+{
+ return ptr_ring_produce_any(&a->ring, skb);
+}
+
+/* Might be slightly faster than skb_array_empty below, but only safe if the
+ * array is never resized. Also, callers invoking this in a loop must take care
+ * to use a compiler barrier, for example cpu_relax().
+ */
+static inline bool __skb_array_empty(struct skb_array *a)
+{
+ return !__ptr_ring_peek(&a->ring);
+}
+
+static inline bool skb_array_empty(struct skb_array *a)
+{
+ return ptr_ring_empty(&a->ring);
+}
+
+static inline bool skb_array_empty_bh(struct skb_array *a)
+{
+ return ptr_ring_empty_bh(&a->ring);
+}
+
+static inline bool skb_array_empty_irq(struct skb_array *a)
+{
+ return ptr_ring_empty_irq(&a->ring);
+}
+
+static inline bool skb_array_empty_any(struct skb_array *a)
+{
+ return ptr_ring_empty_any(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume(struct skb_array *a)
+{
+ return ptr_ring_consume(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a)
+{
+ return ptr_ring_consume_irq(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_any(struct skb_array *a)
+{
+ return ptr_ring_consume_any(&a->ring);
+}
+
+static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a)
+{
+ return ptr_ring_consume_bh(&a->ring);
+}
+
+static inline int __skb_array_len_with_tag(struct sk_buff *skb)
+{
+ if (likely(skb)) {
+ int len = skb->len;
+
+ if (skb_vlan_tag_present(skb))
+ len += VLAN_HLEN;
+
+ return len;
+ } else {
+ return 0;
+ }
+}
+
+static inline int skb_array_peek_len(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_irq(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL_IRQ(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_bh(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL_BH(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_peek_len_any(struct skb_array *a)
+{
+ return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag);
+}
+
+static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp)
+{
+ return ptr_ring_init(&a->ring, size, gfp);
+}
+
+void __skb_array_destroy_skb(void *ptr)
+{
+ kfree_skb(ptr);
+}
+
+int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
+{
+ return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb);
+}
+
+static inline void skb_array_cleanup(struct skb_array *a)
+{
+ ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb);
+}
+
+#endif /* _LINUX_SKB_ARRAY_H */