diff options
Diffstat (limited to 'mm/vmstat.c')
-rw-r--r-- | mm/vmstat.c | 95 |
1 files changed, 88 insertions, 7 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c index 6c488d6ac425..9832d9a41d8c 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -281,6 +281,17 @@ EXPORT_SYMBOL(dec_zone_page_state); | |||
281 | 281 | ||
282 | /* | 282 | /* |
283 | * Update the zone counters for one cpu. | 283 | * Update the zone counters for one cpu. |
284 | * | ||
285 | * Note that refresh_cpu_vm_stats strives to only access | ||
286 | * node local memory. The per cpu pagesets on remote zones are placed | ||
287 | * in the memory local to the processor using that pageset. So the | ||
288 | * loop over all zones will access a series of cachelines local to | ||
289 | * the processor. | ||
290 | * | ||
291 | * The call to zone_page_state_add updates the cachelines with the | ||
292 | * statistics in the remote zone struct as well as the global cachelines | ||
293 | * with the global counters. These could cause remote node cache line | ||
294 | * bouncing and will have to be only done when necessary. | ||
284 | */ | 295 | */ |
285 | void refresh_cpu_vm_stats(int cpu) | 296 | void refresh_cpu_vm_stats(int cpu) |
286 | { | 297 | { |
@@ -289,21 +300,54 @@ void refresh_cpu_vm_stats(int cpu) | |||
289 | unsigned long flags; | 300 | unsigned long flags; |
290 | 301 | ||
291 | for_each_zone(zone) { | 302 | for_each_zone(zone) { |
292 | struct per_cpu_pageset *pcp; | 303 | struct per_cpu_pageset *p; |
293 | 304 | ||
294 | if (!populated_zone(zone)) | 305 | if (!populated_zone(zone)) |
295 | continue; | 306 | continue; |
296 | 307 | ||
297 | pcp = zone_pcp(zone, cpu); | 308 | p = zone_pcp(zone, cpu); |
298 | 309 | ||
299 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | 310 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) |
300 | if (pcp->vm_stat_diff[i]) { | 311 | if (p->vm_stat_diff[i]) { |
301 | local_irq_save(flags); | 312 | local_irq_save(flags); |
302 | zone_page_state_add(pcp->vm_stat_diff[i], | 313 | zone_page_state_add(p->vm_stat_diff[i], |
303 | zone, i); | 314 | zone, i); |
304 | pcp->vm_stat_diff[i] = 0; | 315 | p->vm_stat_diff[i] = 0; |
316 | #ifdef CONFIG_NUMA | ||
317 | /* 3 seconds idle till flush */ | ||
318 | p->expire = 3; | ||
319 | #endif | ||
305 | local_irq_restore(flags); | 320 | local_irq_restore(flags); |
306 | } | 321 | } |
322 | #ifdef CONFIG_NUMA | ||
323 | /* | ||
324 | * Deal with draining the remote pageset of this | ||
325 | * processor | ||
326 | * | ||
327 | * Check if there are pages remaining in this pageset | ||
328 | * if not then there is nothing to expire. | ||
329 | */ | ||
330 | if (!p->expire || (!p->pcp[0].count && !p->pcp[1].count)) | ||
331 | continue; | ||
332 | |||
333 | /* | ||
334 | * We never drain zones local to this processor. | ||
335 | */ | ||
336 | if (zone_to_nid(zone) == numa_node_id()) { | ||
337 | p->expire = 0; | ||
338 | continue; | ||
339 | } | ||
340 | |||
341 | p->expire--; | ||
342 | if (p->expire) | ||
343 | continue; | ||
344 | |||
345 | if (p->pcp[0].count) | ||
346 | drain_zone_pages(zone, p->pcp + 0); | ||
347 | |||
348 | if (p->pcp[1].count) | ||
349 | drain_zone_pages(zone, p->pcp + 1); | ||
350 | #endif | ||
307 | } | 351 | } |
308 | } | 352 | } |
309 | 353 | ||
@@ -640,6 +684,24 @@ const struct seq_operations vmstat_op = { | |||
640 | #endif /* CONFIG_PROC_FS */ | 684 | #endif /* CONFIG_PROC_FS */ |
641 | 685 | ||
642 | #ifdef CONFIG_SMP | 686 | #ifdef CONFIG_SMP |
687 | static DEFINE_PER_CPU(struct delayed_work, vmstat_work); | ||
688 | int sysctl_stat_interval __read_mostly = HZ; | ||
689 | |||
690 | static void vmstat_update(struct work_struct *w) | ||
691 | { | ||
692 | refresh_cpu_vm_stats(smp_processor_id()); | ||
693 | schedule_delayed_work(&__get_cpu_var(vmstat_work), | ||
694 | sysctl_stat_interval); | ||
695 | } | ||
696 | |||
697 | static void __devinit start_cpu_timer(int cpu) | ||
698 | { | ||
699 | struct delayed_work *vmstat_work = &per_cpu(vmstat_work, cpu); | ||
700 | |||
701 | INIT_DELAYED_WORK(vmstat_work, vmstat_update); | ||
702 | schedule_delayed_work_on(cpu, vmstat_work, HZ + cpu); | ||
703 | } | ||
704 | |||
643 | /* | 705 | /* |
644 | * Use the cpu notifier to insure that the thresholds are recalculated | 706 | * Use the cpu notifier to insure that the thresholds are recalculated |
645 | * when necessary. | 707 | * when necessary. |
@@ -648,10 +710,24 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, | |||
648 | unsigned long action, | 710 | unsigned long action, |
649 | void *hcpu) | 711 | void *hcpu) |
650 | { | 712 | { |
713 | long cpu = (long)hcpu; | ||
714 | |||
651 | switch (action) { | 715 | switch (action) { |
652 | case CPU_UP_PREPARE: | 716 | case CPU_ONLINE: |
653 | case CPU_UP_CANCELED: | 717 | case CPU_ONLINE_FROZEN: |
718 | start_cpu_timer(cpu); | ||
719 | break; | ||
720 | case CPU_DOWN_PREPARE: | ||
721 | case CPU_DOWN_PREPARE_FROZEN: | ||
722 | cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu)); | ||
723 | per_cpu(vmstat_work, cpu).work.func = NULL; | ||
724 | break; | ||
725 | case CPU_DOWN_FAILED: | ||
726 | case CPU_DOWN_FAILED_FROZEN: | ||
727 | start_cpu_timer(cpu); | ||
728 | break; | ||
654 | case CPU_DEAD: | 729 | case CPU_DEAD: |
730 | case CPU_DEAD_FROZEN: | ||
655 | refresh_zone_stat_thresholds(); | 731 | refresh_zone_stat_thresholds(); |
656 | break; | 732 | break; |
657 | default: | 733 | default: |
@@ -665,8 +741,13 @@ static struct notifier_block __cpuinitdata vmstat_notifier = | |||
665 | 741 | ||
666 | int __init setup_vmstat(void) | 742 | int __init setup_vmstat(void) |
667 | { | 743 | { |
744 | int cpu; | ||
745 | |||
668 | refresh_zone_stat_thresholds(); | 746 | refresh_zone_stat_thresholds(); |
669 | register_cpu_notifier(&vmstat_notifier); | 747 | register_cpu_notifier(&vmstat_notifier); |
748 | |||
749 | for_each_online_cpu(cpu) | ||
750 | start_cpu_timer(cpu); | ||
670 | return 0; | 751 | return 0; |
671 | } | 752 | } |
672 | module_init(setup_vmstat) | 753 | module_init(setup_vmstat) |