summaryrefslogtreecommitdiff
path: root/net/sctp/proc.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-09-30 01:50:57 -0400
committerDavid S. Miller <davem@davemloft.net>2016-09-30 01:50:57 -0400
commitbcdc6efabda3ba6a67f4cb8915873e7d6759b7e6 (patch)
treed977daa27ad975a6acbc313ec792592f4cf50847 /net/sctp/proc.c
parentfa1403548daf3a2c8c988f89db1053df70200405 (diff)
parent6d4a741cbbfa6612a479656654ca5edf7becc72c (diff)
Merge branch 'net_proc_perf'
Jia He says: ==================== Reduce cache miss for snmp_fold_field In a PowerPc server with large cpu number(160), besides commit a3a773726c9f ("net: Optimize snmp stat aggregation by walking all the percpu data at once"), I watched several other snmp_fold_field callsites which would cause high cache miss rate. test source code: ================ My simple test case, which read from the procfs items endlessly: /***********************************************************/ int main(int argc, char **argv) { int i; int fd = -1 ; int rdsize = 0; char buf[LINELEN+1]; buf[LINELEN] = 0; memset(buf,0,LINELEN); if(1 >= argc) { printf("file name empty\n"); return -1; } fd = open(argv[1], O_RDWR, 0644); if(0 > fd){ printf("open error\n"); return -2; } for(i=0;i<0xffffffff;i++) { while(0 < (rdsize = read(fd,buf,LINELEN))){ //nothing here } lseek(fd, 0, SEEK_SET); } close(fd); return 0; } /**********************************************************/ compile and run: ================ gcc test.c -o test perf stat -d -e cache-misses ./test /proc/net/snmp perf stat -d -e cache-misses ./test /proc/net/snmp6 perf stat -d -e cache-misses ./test /proc/net/sctp/snmp perf stat -d -e cache-misses ./test /proc/net/xfrm_stat before the patch set: ==================== Performance counter stats for 'system wide': 355911097 cache-misses [40.08%] 2356829300 L1-dcache-loads [60.04%] 355642645 L1-dcache-load-misses # 15.09% of all L1-dcache hits [60.02%] 346544541 LLC-loads [59.97%] 389763 LLC-load-misses # 0.11% of all LL-cache hits [40.02%] 6.245162638 seconds time elapsed After the patch set: =================== Performance counter stats for 'system wide': 194992476 cache-misses [40.03%] 6718051877 L1-dcache-loads [60.07%] 194871921 L1-dcache-load-misses # 2.90% of all L1-dcache hits [60.11%] 187632232 LLC-loads [60.04%] 464466 LLC-load-misses # 0.25% of all LL-cache hits [39.89%] 6.868422769 seconds time elapsed The cache-miss rate can be reduced from 15% to 2.9% changelog ========= v6: - correct v5 v5: - order local variables from longest to shortest line v4: - move memset into one block of if statement in snmp6_seq_show_item - remove the changes in netstat_seq_show considerred the stack usage is too large v3: - introduce generic interface (suggested by Marcelo Ricardo Leitner) - use max_t instead of self defined macro (suggested by David Miller) v2: - fix bug in udplite statistics. - snmp_seq_show is split into 2 parts ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sctp/proc.c')
-rw-r--r--net/sctp/proc.c10
1 files changed, 7 insertions, 3 deletions
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index ef8ba77a5bea..206377fe91ec 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -73,13 +73,17 @@ static const struct snmp_mib sctp_snmp_list[] = {
/* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */
static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
{
+ unsigned long buff[SCTP_MIB_MAX];
struct net *net = seq->private;
int i;
- for (i = 0; sctp_snmp_list[i].name != NULL; i++)
+ memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX);
+
+ snmp_get_cpu_field_batch(buff, sctp_snmp_list,
+ net->sctp.sctp_statistics);
+ for (i = 0; sctp_snmp_list[i].name; i++)
seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
- snmp_fold_field(net->sctp.sctp_statistics,
- sctp_snmp_list[i].entry));
+ buff[i]);
return 0;
}