aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2007-05-09 05:35:12 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-09 15:30:56 -0400
commitd1187ed21026fd512b87851d0ca26d9ae16f9059 (patch)
tree35d77758f134f3b69d3e00ca042a5d5ca6a59373
parent455c017ae3934797653549704c286e7bcc3a9397 (diff)
vmstat: use our own timer events
vmstat is currently using the cache reaper to periodically bring the statistics up to date. The cache reaper does only exists in SLUB as a way to provide compatibility with SLAB. This patch removes the vmstat calls from the slab allocators and provides its own handling. The advantage is also that we can use a different frequency for the updates. Refreshing vm stats is a pretty fast job so we can run this every second and stagger this by only one tick. This will lead to some overlap in large systems. F.e a system running at 250 HZ with 1024 processors will have 4 vm updates occurring at once. However, the vm stats update only accesses per node information. It is only necessary to stagger the vm statistics updates per processor in each node. Vm counter updates occurring on distant nodes will not cause cacheline contention. We could implement an alternate approach that runs the first processor on each node at the second and then each of the other processor on a node on a subsequent tick. That may be useful to keep a large amount of the second free of timer activity. Maybe the timer folks will have some feedback on this one? [jirislaby@gmail.com: add missing break] Cc: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Jiri Slaby <jirislaby@gmail.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/vmstat.h3
-rw-r--r--mm/slab.c1
-rw-r--r--mm/slub.c1
-rw-r--r--mm/vmstat.c40
4 files changed, 36 insertions, 9 deletions
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index acb1f105870c..d9325cf8a134 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -212,8 +212,6 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
212extern void __dec_zone_state(struct zone *, enum zone_stat_item); 212extern void __dec_zone_state(struct zone *, enum zone_stat_item);
213 213
214void refresh_cpu_vm_stats(int); 214void refresh_cpu_vm_stats(int);
215void refresh_vm_stats(void);
216
217#else /* CONFIG_SMP */ 215#else /* CONFIG_SMP */
218 216
219/* 217/*
@@ -260,7 +258,6 @@ static inline void __dec_zone_page_state(struct page *page,
260#define mod_zone_page_state __mod_zone_page_state 258#define mod_zone_page_state __mod_zone_page_state
261 259
262static inline void refresh_cpu_vm_stats(int cpu) { } 260static inline void refresh_cpu_vm_stats(int cpu) { }
263static inline void refresh_vm_stats(void) { }
264#endif 261#endif
265 262
266#endif /* _LINUX_VMSTAT_H */ 263#endif /* _LINUX_VMSTAT_H */
diff --git a/mm/slab.c b/mm/slab.c
index 6f3d6e240c61..e50908b2bfac 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4156,7 +4156,6 @@ next:
4156 check_irq_on(); 4156 check_irq_on();
4157 mutex_unlock(&cache_chain_mutex); 4157 mutex_unlock(&cache_chain_mutex);
4158 next_reap_node(); 4158 next_reap_node();
4159 refresh_cpu_vm_stats(smp_processor_id());
4160out: 4159out:
4161 /* Set up the next iteration */ 4160 /* Set up the next iteration */
4162 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); 4161 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
diff --git a/mm/slub.c b/mm/slub.c
index a581fa8ae11a..dbb206503a8d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2580,7 +2580,6 @@ static DEFINE_PER_CPU(struct delayed_work, reap_work);
2580static void cache_reap(struct work_struct *unused) 2580static void cache_reap(struct work_struct *unused)
2581{ 2581{
2582 next_reap_node(); 2582 next_reap_node();
2583 refresh_cpu_vm_stats(smp_processor_id());
2584 schedule_delayed_work(&__get_cpu_var(reap_work), 2583 schedule_delayed_work(&__get_cpu_var(reap_work),
2585 REAPTIMEOUT_CPUC); 2584 REAPTIMEOUT_CPUC);
2586} 2585}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 9a66dc4aed43..9d824643a22f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -640,6 +640,22 @@ const struct seq_operations vmstat_op = {
640#endif /* CONFIG_PROC_FS */ 640#endif /* CONFIG_PROC_FS */
641 641
642#ifdef CONFIG_SMP 642#ifdef CONFIG_SMP
643static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
644
645static void vmstat_update(struct work_struct *w)
646{
647 refresh_cpu_vm_stats(smp_processor_id());
648 schedule_delayed_work(&__get_cpu_var(vmstat_work), HZ);
649}
650
651static void __devinit start_cpu_timer(int cpu)
652{
653 struct delayed_work *vmstat_work = &per_cpu(vmstat_work, cpu);
654
655 INIT_DELAYED_WORK(vmstat_work, vmstat_update);
656 schedule_delayed_work_on(cpu, vmstat_work, HZ + cpu);
657}
658
643/* 659/*
644 * Use the cpu notifier to insure that the thresholds are recalculated 660 * Use the cpu notifier to insure that the thresholds are recalculated
645 * when necessary. 661 * when necessary.
@@ -648,11 +664,22 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
648 unsigned long action, 664 unsigned long action,
649 void *hcpu) 665 void *hcpu)
650{ 666{
667 long cpu = (long)hcpu;
668
651 switch (action) { 669 switch (action) {
652 case CPU_UP_PREPARE: 670 case CPU_ONLINE:
653 case CPU_UP_PREPARE_FROZEN: 671 case CPU_ONLINE_FROZEN:
654 case CPU_UP_CANCELED: 672 start_cpu_timer(cpu);
655 case CPU_UP_CANCELED_FROZEN: 673 break;
674 case CPU_DOWN_PREPARE:
675 case CPU_DOWN_PREPARE_FROZEN:
676 cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu));
677 per_cpu(vmstat_work, cpu).work.func = NULL;
678 break;
679 case CPU_DOWN_FAILED:
680 case CPU_DOWN_FAILED_FROZEN:
681 start_cpu_timer(cpu);
682 break;
656 case CPU_DEAD: 683 case CPU_DEAD:
657 case CPU_DEAD_FROZEN: 684 case CPU_DEAD_FROZEN:
658 refresh_zone_stat_thresholds(); 685 refresh_zone_stat_thresholds();
@@ -668,8 +695,13 @@ static struct notifier_block __cpuinitdata vmstat_notifier =
668 695
669int __init setup_vmstat(void) 696int __init setup_vmstat(void)
670{ 697{
698 int cpu;
699
671 refresh_zone_stat_thresholds(); 700 refresh_zone_stat_thresholds();
672 register_cpu_notifier(&vmstat_notifier); 701 register_cpu_notifier(&vmstat_notifier);
702
703 for_each_online_cpu(cpu)
704 start_cpu_timer(cpu);
673 return 0; 705 return 0;
674} 706}
675module_init(setup_vmstat) 707module_init(setup_vmstat)