diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/vmstat.c | 141 |
1 files changed, 120 insertions, 21 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c index cce7c766da7a..1b12d390dc68 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -7,6 +7,7 @@ | |||
7 | * zoned VM statistics | 7 | * zoned VM statistics |
8 | * Copyright (C) 2006 Silicon Graphics, Inc., | 8 | * Copyright (C) 2006 Silicon Graphics, Inc., |
9 | * Christoph Lameter <christoph@lameter.com> | 9 | * Christoph Lameter <christoph@lameter.com> |
10 | * Copyright (C) 2008-2014 Christoph Lameter | ||
10 | */ | 11 | */ |
11 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
12 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
@@ -14,6 +15,7 @@ | |||
14 | #include <linux/module.h> | 15 | #include <linux/module.h> |
15 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
16 | #include <linux/cpu.h> | 17 | #include <linux/cpu.h> |
18 | #include <linux/cpumask.h> | ||
17 | #include <linux/vmstat.h> | 19 | #include <linux/vmstat.h> |
18 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
19 | #include <linux/math64.h> | 21 | #include <linux/math64.h> |
@@ -419,13 +421,22 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item) | |||
419 | EXPORT_SYMBOL(dec_zone_page_state); | 421 | EXPORT_SYMBOL(dec_zone_page_state); |
420 | #endif | 422 | #endif |
421 | 423 | ||
422 | static inline void fold_diff(int *diff) | 424 | |
425 | /* | ||
426 | * Fold a differential into the global counters. | ||
427 | * Returns the number of counters updated. | ||
428 | */ | ||
429 | static int fold_diff(int *diff) | ||
423 | { | 430 | { |
424 | int i; | 431 | int i; |
432 | int changes = 0; | ||
425 | 433 | ||
426 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | 434 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) |
427 | if (diff[i]) | 435 | if (diff[i]) { |
428 | atomic_long_add(diff[i], &vm_stat[i]); | 436 | atomic_long_add(diff[i], &vm_stat[i]); |
437 | changes++; | ||
438 | } | ||
439 | return changes; | ||
429 | } | 440 | } |
430 | 441 | ||
431 | /* | 442 | /* |
@@ -441,12 +452,15 @@ static inline void fold_diff(int *diff) | |||
441 | * statistics in the remote zone struct as well as the global cachelines | 452 | * statistics in the remote zone struct as well as the global cachelines |
442 | * with the global counters. These could cause remote node cache line | 453 | * with the global counters. These could cause remote node cache line |
443 | * bouncing and will have to be only done when necessary. | 454 | * bouncing and will have to be only done when necessary. |
455 | * | ||
456 | * The function returns the number of global counters updated. | ||
444 | */ | 457 | */ |
445 | static void refresh_cpu_vm_stats(void) | 458 | static int refresh_cpu_vm_stats(void) |
446 | { | 459 | { |
447 | struct zone *zone; | 460 | struct zone *zone; |
448 | int i; | 461 | int i; |
449 | int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; | 462 | int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; |
463 | int changes = 0; | ||
450 | 464 | ||
451 | for_each_populated_zone(zone) { | 465 | for_each_populated_zone(zone) { |
452 | struct per_cpu_pageset __percpu *p = zone->pageset; | 466 | struct per_cpu_pageset __percpu *p = zone->pageset; |
@@ -486,15 +500,17 @@ static void refresh_cpu_vm_stats(void) | |||
486 | continue; | 500 | continue; |
487 | } | 501 | } |
488 | 502 | ||
489 | |||
490 | if (__this_cpu_dec_return(p->expire)) | 503 | if (__this_cpu_dec_return(p->expire)) |
491 | continue; | 504 | continue; |
492 | 505 | ||
493 | if (__this_cpu_read(p->pcp.count)) | 506 | if (__this_cpu_read(p->pcp.count)) { |
494 | drain_zone_pages(zone, this_cpu_ptr(&p->pcp)); | 507 | drain_zone_pages(zone, this_cpu_ptr(&p->pcp)); |
508 | changes++; | ||
509 | } | ||
495 | #endif | 510 | #endif |
496 | } | 511 | } |
497 | fold_diff(global_diff); | 512 | changes += fold_diff(global_diff); |
513 | return changes; | ||
498 | } | 514 | } |
499 | 515 | ||
500 | /* | 516 | /* |
@@ -1239,20 +1255,108 @@ static const struct file_operations proc_vmstat_file_operations = { | |||
1239 | #ifdef CONFIG_SMP | 1255 | #ifdef CONFIG_SMP |
1240 | static DEFINE_PER_CPU(struct delayed_work, vmstat_work); | 1256 | static DEFINE_PER_CPU(struct delayed_work, vmstat_work); |
1241 | int sysctl_stat_interval __read_mostly = HZ; | 1257 | int sysctl_stat_interval __read_mostly = HZ; |
1258 | static cpumask_var_t cpu_stat_off; | ||
1242 | 1259 | ||
1243 | static void vmstat_update(struct work_struct *w) | 1260 | static void vmstat_update(struct work_struct *w) |
1244 | { | 1261 | { |
1245 | refresh_cpu_vm_stats(); | 1262 | if (refresh_cpu_vm_stats()) |
1246 | schedule_delayed_work(this_cpu_ptr(&vmstat_work), | 1263 | /* |
1264 | * Counters were updated so we expect more updates | ||
1265 | * to occur in the future. Keep on running the | ||
1266 | * update worker thread. | ||
1267 | */ | ||
1268 | schedule_delayed_work(this_cpu_ptr(&vmstat_work), | ||
1269 | round_jiffies_relative(sysctl_stat_interval)); | ||
1270 | else { | ||
1271 | /* | ||
1272 | * We did not update any counters so the app may be in | ||
1273 | * a mode where it does not cause counter updates. | ||
1274 | * We may be uselessly running vmstat_update. | ||
1275 | * Defer the checking for differentials to the | ||
1276 | * shepherd thread on a different processor. | ||
1277 | */ | ||
1278 | int r; | ||
1279 | /* | ||
1280 | * Shepherd work thread does not race since it never | ||
1281 | * changes the bit if its zero but the cpu | ||
1282 | * online / off line code may race if | ||
1283 | * worker threads are still allowed during | ||
1284 | * shutdown / startup. | ||
1285 | */ | ||
1286 | r = cpumask_test_and_set_cpu(smp_processor_id(), | ||
1287 | cpu_stat_off); | ||
1288 | VM_BUG_ON(r); | ||
1289 | } | ||
1290 | } | ||
1291 | |||
1292 | /* | ||
1293 | * Check if the diffs for a certain cpu indicate that | ||
1294 | * an update is needed. | ||
1295 | */ | ||
1296 | static bool need_update(int cpu) | ||
1297 | { | ||
1298 | struct zone *zone; | ||
1299 | |||
1300 | for_each_populated_zone(zone) { | ||
1301 | struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu); | ||
1302 | |||
1303 | BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1); | ||
1304 | /* | ||
1305 | * The fast way of checking if there are any vmstat diffs. | ||
1306 | * This works because the diffs are byte sized items. | ||
1307 | */ | ||
1308 | if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS)) | ||
1309 | return true; | ||
1310 | |||
1311 | } | ||
1312 | return false; | ||
1313 | } | ||
1314 | |||
1315 | |||
1316 | /* | ||
1317 | * Shepherd worker thread that checks the | ||
1318 | * differentials of processors that have their worker | ||
1319 | * threads for vm statistics updates disabled because of | ||
1320 | * inactivity. | ||
1321 | */ | ||
1322 | static void vmstat_shepherd(struct work_struct *w); | ||
1323 | |||
1324 | static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd); | ||
1325 | |||
1326 | static void vmstat_shepherd(struct work_struct *w) | ||
1327 | { | ||
1328 | int cpu; | ||
1329 | |||
1330 | get_online_cpus(); | ||
1331 | /* Check processors whose vmstat worker threads have been disabled */ | ||
1332 | for_each_cpu(cpu, cpu_stat_off) | ||
1333 | if (need_update(cpu) && | ||
1334 | cpumask_test_and_clear_cpu(cpu, cpu_stat_off)) | ||
1335 | |||
1336 | schedule_delayed_work_on(cpu, &per_cpu(vmstat_work, cpu), | ||
1337 | __round_jiffies_relative(sysctl_stat_interval, cpu)); | ||
1338 | |||
1339 | put_online_cpus(); | ||
1340 | |||
1341 | schedule_delayed_work(&shepherd, | ||
1247 | round_jiffies_relative(sysctl_stat_interval)); | 1342 | round_jiffies_relative(sysctl_stat_interval)); |
1343 | |||
1248 | } | 1344 | } |
1249 | 1345 | ||
1250 | static void start_cpu_timer(int cpu) | 1346 | static void __init start_shepherd_timer(void) |
1251 | { | 1347 | { |
1252 | struct delayed_work *work = &per_cpu(vmstat_work, cpu); | 1348 | int cpu; |
1349 | |||
1350 | for_each_possible_cpu(cpu) | ||
1351 | INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu), | ||
1352 | vmstat_update); | ||
1353 | |||
1354 | if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL)) | ||
1355 | BUG(); | ||
1356 | cpumask_copy(cpu_stat_off, cpu_online_mask); | ||
1253 | 1357 | ||
1254 | INIT_DEFERRABLE_WORK(work, vmstat_update); | 1358 | schedule_delayed_work(&shepherd, |
1255 | schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu)); | 1359 | round_jiffies_relative(sysctl_stat_interval)); |
1256 | } | 1360 | } |
1257 | 1361 | ||
1258 | static void vmstat_cpu_dead(int node) | 1362 | static void vmstat_cpu_dead(int node) |
@@ -1283,17 +1387,17 @@ static int vmstat_cpuup_callback(struct notifier_block *nfb, | |||
1283 | case CPU_ONLINE: | 1387 | case CPU_ONLINE: |
1284 | case CPU_ONLINE_FROZEN: | 1388 | case CPU_ONLINE_FROZEN: |
1285 | refresh_zone_stat_thresholds(); | 1389 | refresh_zone_stat_thresholds(); |
1286 | start_cpu_timer(cpu); | ||
1287 | node_set_state(cpu_to_node(cpu), N_CPU); | 1390 | node_set_state(cpu_to_node(cpu), N_CPU); |
1391 | cpumask_set_cpu(cpu, cpu_stat_off); | ||
1288 | break; | 1392 | break; |
1289 | case CPU_DOWN_PREPARE: | 1393 | case CPU_DOWN_PREPARE: |
1290 | case CPU_DOWN_PREPARE_FROZEN: | 1394 | case CPU_DOWN_PREPARE_FROZEN: |
1291 | cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); | 1395 | cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); |
1292 | per_cpu(vmstat_work, cpu).work.func = NULL; | 1396 | cpumask_clear_cpu(cpu, cpu_stat_off); |
1293 | break; | 1397 | break; |
1294 | case CPU_DOWN_FAILED: | 1398 | case CPU_DOWN_FAILED: |
1295 | case CPU_DOWN_FAILED_FROZEN: | 1399 | case CPU_DOWN_FAILED_FROZEN: |
1296 | start_cpu_timer(cpu); | 1400 | cpumask_set_cpu(cpu, cpu_stat_off); |
1297 | break; | 1401 | break; |
1298 | case CPU_DEAD: | 1402 | case CPU_DEAD: |
1299 | case CPU_DEAD_FROZEN: | 1403 | case CPU_DEAD_FROZEN: |
@@ -1313,15 +1417,10 @@ static struct notifier_block vmstat_notifier = | |||
1313 | static int __init setup_vmstat(void) | 1417 | static int __init setup_vmstat(void) |
1314 | { | 1418 | { |
1315 | #ifdef CONFIG_SMP | 1419 | #ifdef CONFIG_SMP |
1316 | int cpu; | ||
1317 | |||
1318 | cpu_notifier_register_begin(); | 1420 | cpu_notifier_register_begin(); |
1319 | __register_cpu_notifier(&vmstat_notifier); | 1421 | __register_cpu_notifier(&vmstat_notifier); |
1320 | 1422 | ||
1321 | for_each_online_cpu(cpu) { | 1423 | start_shepherd_timer(); |
1322 | start_cpu_timer(cpu); | ||
1323 | node_set_state(cpu_to_node(cpu), N_CPU); | ||
1324 | } | ||
1325 | cpu_notifier_register_done(); | 1424 | cpu_notifier_register_done(); |
1326 | #endif | 1425 | #endif |
1327 | #ifdef CONFIG_PROC_FS | 1426 | #ifdef CONFIG_PROC_FS |