aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2010-10-27 18:34:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-27 21:03:13 -0400
commitf2c66cd8eeddedb440f33bc0f5cec1ed7ae376cb (patch)
tree4b8cfcd2a76652839121707fc2a49b70489ec055
parent19cd56c48da58bebc3a638e036bcab69469acd27 (diff)
/proc/stat: scalability of irq num per cpu
/proc/stat shows the total number of all interrupts to each cpu. But when the number of IRQs are very large, it take very long time and 'cat /proc/stat' takes more than 10 secs. This is because sum of all irq events are counted when /proc/stat is read. This patch adds "sum of all irq" counter percpu and reduce read costs. The cost of reading /proc/stat is important because it's used by major applications as 'top', 'ps', 'w', etc.... A test on a mechin (4096cpu, 256 nodes, 4592 irqs) shows %time cat /proc/stat > /dev/null Before Patch: 12.627 sec After Patch: 2.459 sec Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Tested-by: Jack Steiner <steiner@sgi.com> Acked-by: Jack Steiner <steiner@sgi.com> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/stat.c4
-rw-r--r--include/linux/kernel_stat.h14
2 files changed, 13 insertions, 5 deletions
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf31b03fc275..b80c620565bf 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -52,9 +52,7 @@ static int show_stat(struct seq_file *p, void *v)
52 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 52 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
53 guest_nice = cputime64_add(guest_nice, 53 guest_nice = cputime64_add(guest_nice,
54 kstat_cpu(i).cpustat.guest_nice); 54 kstat_cpu(i).cpustat.guest_nice);
55 for_each_irq_nr(j) { 55 sum += kstat_cpu_irqs_sum(i);
56 sum += kstat_irqs_cpu(j, i);
57 }
58 sum += arch_irq_stat_cpu(i); 56 sum += arch_irq_stat_cpu(i);
59 57
60 for (j = 0; j < NR_SOFTIRQS; j++) { 58 for (j = 0; j < NR_SOFTIRQS; j++) {
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index c059044bc6dc..8b9b89085530 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -33,6 +33,7 @@ struct kernel_stat {
33#ifndef CONFIG_GENERIC_HARDIRQS 33#ifndef CONFIG_GENERIC_HARDIRQS
34 unsigned int irqs[NR_IRQS]; 34 unsigned int irqs[NR_IRQS];
35#endif 35#endif
36 unsigned long irqs_sum;
36 unsigned int softirqs[NR_SOFTIRQS]; 37 unsigned int softirqs[NR_SOFTIRQS];
37}; 38};
38 39
@@ -54,6 +55,7 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
54 struct irq_desc *desc) 55 struct irq_desc *desc)
55{ 56{
56 kstat_this_cpu.irqs[irq]++; 57 kstat_this_cpu.irqs[irq]++;
58 kstat_this_cpu.irqs_sum++;
57} 59}
58 60
59static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) 61static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
@@ -65,8 +67,9 @@ static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
65extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); 67extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
66#define kstat_irqs_this_cpu(DESC) \ 68#define kstat_irqs_this_cpu(DESC) \
67 ((DESC)->kstat_irqs[smp_processor_id()]) 69 ((DESC)->kstat_irqs[smp_processor_id()])
68#define kstat_incr_irqs_this_cpu(irqno, DESC) \ 70#define kstat_incr_irqs_this_cpu(irqno, DESC) do {\
69 ((DESC)->kstat_irqs[smp_processor_id()]++) 71 ((DESC)->kstat_irqs[smp_processor_id()]++);\
72 kstat_this_cpu.irqs_sum++; } while (0)
70 73
71#endif 74#endif
72 75
@@ -94,6 +97,13 @@ static inline unsigned int kstat_irqs(unsigned int irq)
94 return sum; 97 return sum;
95} 98}
96 99
100/*
101 * Number of interrupts per cpu, since bootup
102 */
103static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
104{
105 return kstat_cpu(cpu).irqs_sum;
106}
97 107
98/* 108/*
99 * Lock/unlock the current runqueue - to extract task statistics: 109 * Lock/unlock the current runqueue - to extract task statistics: