summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorYing Huang <ying.huang@intel.com>2017-08-08 12:30:00 +0800
committerIngo Molnar <mingo@kernel.org>2017-08-29 15:14:38 +0200
commit966a967116e699762dbf4af7f9e0d1955c25aa37 (patch)
tree42d79e5b9df8d67be50b8b87cf5fcdaae751b54b /include
parentf52be5708076b75a045ac52c6fef3fffb8300525 (diff)
smp: Avoid using two cache lines for struct call_single_data
struct call_single_data is used in IPIs to transfer information between CPUs. Its size is bigger than sizeof(unsigned long) and less than cache line size. Currently it is not allocated with any explicit alignment requirements. This makes it possible for allocated call_single_data to cross two cache lines, which results in double the number of the cache lines that need to be transferred among CPUs. This can be fixed by requiring call_single_data to be aligned with the size of call_single_data. Currently the size of call_single_data is the power of 2. If we add new fields to call_single_data, we may need to add padding to make sure the size of new definition is the power of 2 as well. Fortunately, this is enforced by GCC, which will report bad sizes. To set alignment requirements of call_single_data to the size of call_single_data, a struct definition and a typedef is used. To test the effect of the patch, I used the vm-scalability multiple thread swap test case (swap-w-seq-mt). The test will create multiple threads and each thread will eat memory until all RAM and part of swap is used, so that huge number of IPIs are triggered when unmapping memory. In the test, the throughput of memory writing improves ~5% compared with misaligned call_single_data, because of faster IPIs. Suggested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Huang, Ying <ying.huang@intel.com> [ Add call_single_data_t and align with size of call_single_data. ] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Aaron Lu <aaron.lu@intel.com> Cc: Borislav Petkov <bp@suse.de> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/87bmnqd6lz.fsf@yhuang-mobile.sh.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/smp.h8
3 files changed, 8 insertions, 4 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 25f6a0cb27d3..006fa09a641e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -134,7 +134,7 @@ typedef __u32 __bitwise req_flags_t;
struct request {
struct list_head queuelist;
union {
- struct call_single_data csd;
+ call_single_data_t csd;
u64 fifo_time;
};
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 779b23595596..6557f320b66e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2774,7 +2774,7 @@ struct softnet_data {
unsigned int input_queue_head ____cacheline_aligned_in_smp;
/* Elements below can be accessed between CPUs for RPS/RFS */
- struct call_single_data csd ____cacheline_aligned_in_smp;
+ call_single_data_t csd ____cacheline_aligned_in_smp;
struct softnet_data *rps_ipi_next;
unsigned int cpu;
unsigned int input_queue_tail;
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 68123c1fe549..98b1fe027fc9 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -14,13 +14,17 @@
#include <linux/llist.h>
typedef void (*smp_call_func_t)(void *info);
-struct call_single_data {
+struct __call_single_data {
struct llist_node llist;
smp_call_func_t func;
void *info;
unsigned int flags;
};
+/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
+typedef struct __call_single_data call_single_data_t
+ __aligned(sizeof(struct __call_single_data));
+
/* total number of cpus in this system (may exceed NR_CPUS) */
extern unsigned int total_cpus;
@@ -48,7 +52,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
smp_call_func_t func, void *info, bool wait,
gfp_t gfp_flags);
-int smp_call_function_single_async(int cpu, struct call_single_data *csd);
+int smp_call_function_single_async(int cpu, call_single_data_t *csd);
#ifdef CONFIG_SMP