diff options
author | Christoph Lameter <cl@linux.com> | 2013-09-11 17:21:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-11 18:57:31 -0400 |
commit | 2bb921e526656556e68f99f5f15a4a1bf2691844 (patch) | |
tree | 91b009a59938d7713de0781df9d5c0c2eacfc51f /mm | |
parent | d2cf5ad6312ca9913464fac40fb47ba47ad945c4 (diff) |
vmstat: create separate function to fold per cpu diffs into local counters
The main idea behind this patchset is to reduce the vmstat update overhead
by avoiding interrupt enable/disable and the use of per cpu atomics.
This patch (of 3):
It is better to have a separate folding function because
refresh_cpu_vm_stats() also does other things like expire pages in the
page allocator caches.
If we have a separate function then refresh_cpu_vm_stats() is only called
from the local cpu which allows additional optimizations.
The folding function is only called when a cpu is being downed and
therefore no other processor will be accessing the counters. Also
simplifies synchronization.
[akpm@linux-foundation.org: fix UP build]
Signed-off-by: Christoph Lameter <cl@linux.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Tejun Heo <tj@kernel.org>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/vmstat.c | 40 |
2 files changed, 35 insertions, 7 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 42c59300bacd..f885eb827159 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -5435,7 +5435,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, | |||
5435 | * This is only okay since the processor is dead and cannot | 5435 | * This is only okay since the processor is dead and cannot |
5436 | * race with what we are doing. | 5436 | * race with what we are doing. |
5437 | */ | 5437 | */ |
5438 | refresh_cpu_vm_stats(cpu); | 5438 | cpu_vm_stats_fold(cpu); |
5439 | } | 5439 | } |
5440 | return NOTIFY_OK; | 5440 | return NOTIFY_OK; |
5441 | } | 5441 | } |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 8a8da1f9b044..aaee66330e01 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -415,11 +415,7 @@ EXPORT_SYMBOL(dec_zone_page_state); | |||
415 | #endif | 415 | #endif |
416 | 416 | ||
417 | /* | 417 | /* |
418 | * Update the zone counters for one cpu. | 418 | * Update the zone counters for the current cpu. |
419 | * | ||
420 | * The cpu specified must be either the current cpu or a processor that | ||
421 | * is not online. If it is the current cpu then the execution thread must | ||
422 | * be pinned to the current cpu. | ||
423 | * | 419 | * |
424 | * Note that refresh_cpu_vm_stats strives to only access | 420 | * Note that refresh_cpu_vm_stats strives to only access |
425 | * node local memory. The per cpu pagesets on remote zones are placed | 421 | * node local memory. The per cpu pagesets on remote zones are placed |
@@ -432,7 +428,7 @@ EXPORT_SYMBOL(dec_zone_page_state); | |||
432 | * with the global counters. These could cause remote node cache line | 428 | * with the global counters. These could cause remote node cache line |
433 | * bouncing and will have to be only done when necessary. | 429 | * bouncing and will have to be only done when necessary. |
434 | */ | 430 | */ |
435 | void refresh_cpu_vm_stats(int cpu) | 431 | static void refresh_cpu_vm_stats(int cpu) |
436 | { | 432 | { |
437 | struct zone *zone; | 433 | struct zone *zone; |
438 | int i; | 434 | int i; |
@@ -494,6 +490,38 @@ void refresh_cpu_vm_stats(int cpu) | |||
494 | } | 490 | } |
495 | 491 | ||
496 | /* | 492 | /* |
493 | * Fold the data for an offline cpu into the global array. | ||
494 | * There cannot be any access by the offline cpu and therefore | ||
495 | * synchronization is simplified. | ||
496 | */ | ||
497 | void cpu_vm_stats_fold(int cpu) | ||
498 | { | ||
499 | struct zone *zone; | ||
500 | int i; | ||
501 | int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; | ||
502 | |||
503 | for_each_populated_zone(zone) { | ||
504 | struct per_cpu_pageset *p; | ||
505 | |||
506 | p = per_cpu_ptr(zone->pageset, cpu); | ||
507 | |||
508 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | ||
509 | if (p->vm_stat_diff[i]) { | ||
510 | int v; | ||
511 | |||
512 | v = p->vm_stat_diff[i]; | ||
513 | p->vm_stat_diff[i] = 0; | ||
514 | atomic_long_add(v, &zone->vm_stat[i]); | ||
515 | global_diff[i] += v; | ||
516 | } | ||
517 | } | ||
518 | |||
519 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | ||
520 | if (global_diff[i]) | ||
521 | atomic_long_add(global_diff[i], &vm_stat[i]); | ||
522 | } | ||
523 | |||
524 | /* | ||
497 | * this is only called if !populated_zone(zone), which implies no other users of | 525 | * this is only called if !populated_zone(zone), which implies no other users of |
498 | * pset->vm_stat_diff[] exsist. | 526 | * pset->vm_stat_diff[] exsist. |
499 | */ | 527 | */ |