aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c472
1 files changed, 13 insertions, 459 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 60f2feddbe5d..3e792a583f3b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -455,7 +455,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
455 455
456 kernel_map_pages(page, 1 << order, 0); 456 kernel_map_pages(page, 1 << order, 0);
457 local_irq_save(flags); 457 local_irq_save(flags);
458 __mod_page_state(pgfree, 1 << order); 458 __count_vm_events(PGFREE, 1 << order);
459 free_one_page(page_zone(page), page, order); 459 free_one_page(page_zone(page), page, order);
460 local_irq_restore(flags); 460 local_irq_restore(flags);
461} 461}
@@ -708,27 +708,6 @@ void drain_local_pages(void)
708} 708}
709#endif /* CONFIG_PM */ 709#endif /* CONFIG_PM */
710 710
711static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu)
712{
713#ifdef CONFIG_NUMA
714 pg_data_t *pg = z->zone_pgdat;
715 pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
716 struct per_cpu_pageset *p;
717
718 p = zone_pcp(z, cpu);
719 if (pg == orig) {
720 p->numa_hit++;
721 } else {
722 p->numa_miss++;
723 zone_pcp(zonelist->zones[0], cpu)->numa_foreign++;
724 }
725 if (pg == NODE_DATA(numa_node_id()))
726 p->local_node++;
727 else
728 p->other_node++;
729#endif
730}
731
732/* 711/*
733 * Free a 0-order page 712 * Free a 0-order page
734 */ 713 */
@@ -749,7 +728,7 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
749 728
750 pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; 729 pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
751 local_irq_save(flags); 730 local_irq_save(flags);
752 __inc_page_state(pgfree); 731 __count_vm_event(PGFREE);
753 list_add(&page->lru, &pcp->list); 732 list_add(&page->lru, &pcp->list);
754 pcp->count++; 733 pcp->count++;
755 if (pcp->count >= pcp->high) { 734 if (pcp->count >= pcp->high) {
@@ -825,8 +804,8 @@ again:
825 goto failed; 804 goto failed;
826 } 805 }
827 806
828 __mod_page_state_zone(zone, pgalloc, 1 << order); 807 __count_zone_vm_events(PGALLOC, zone, 1 << order);
829 zone_statistics(zonelist, zone, cpu); 808 zone_statistics(zonelist, zone);
830 local_irq_restore(flags); 809 local_irq_restore(flags);
831 put_cpu(); 810 put_cpu();
832 811
@@ -1230,141 +1209,6 @@ static void show_node(struct zone *zone)
1230#define show_node(zone) do { } while (0) 1209#define show_node(zone) do { } while (0)
1231#endif 1210#endif
1232 1211
1233/*
1234 * Accumulate the page_state information across all CPUs.
1235 * The result is unavoidably approximate - it can change
1236 * during and after execution of this function.
1237 */
1238static DEFINE_PER_CPU(struct page_state, page_states) = {0};
1239
1240atomic_t nr_pagecache = ATOMIC_INIT(0);
1241EXPORT_SYMBOL(nr_pagecache);
1242#ifdef CONFIG_SMP
1243DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
1244#endif
1245
1246static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
1247{
1248 unsigned cpu;
1249
1250 memset(ret, 0, nr * sizeof(unsigned long));
1251 cpus_and(*cpumask, *cpumask, cpu_online_map);
1252
1253 for_each_cpu_mask(cpu, *cpumask) {
1254 unsigned long *in;
1255 unsigned long *out;
1256 unsigned off;
1257 unsigned next_cpu;
1258
1259 in = (unsigned long *)&per_cpu(page_states, cpu);
1260
1261 next_cpu = next_cpu(cpu, *cpumask);
1262 if (likely(next_cpu < NR_CPUS))
1263 prefetch(&per_cpu(page_states, next_cpu));
1264
1265 out = (unsigned long *)ret;
1266 for (off = 0; off < nr; off++)
1267 *out++ += *in++;
1268 }
1269}
1270
1271void get_page_state_node(struct page_state *ret, int node)
1272{
1273 int nr;
1274 cpumask_t mask = node_to_cpumask(node);
1275
1276 nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
1277 nr /= sizeof(unsigned long);
1278
1279 __get_page_state(ret, nr+1, &mask);
1280}
1281
1282void get_page_state(struct page_state *ret)
1283{
1284 int nr;
1285 cpumask_t mask = CPU_MASK_ALL;
1286
1287 nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
1288 nr /= sizeof(unsigned long);
1289
1290 __get_page_state(ret, nr + 1, &mask);
1291}
1292
1293void get_full_page_state(struct page_state *ret)
1294{
1295 cpumask_t mask = CPU_MASK_ALL;
1296
1297 __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
1298}
1299
1300unsigned long read_page_state_offset(unsigned long offset)
1301{
1302 unsigned long ret = 0;
1303 int cpu;
1304
1305 for_each_online_cpu(cpu) {
1306 unsigned long in;
1307
1308 in = (unsigned long)&per_cpu(page_states, cpu) + offset;
1309 ret += *((unsigned long *)in);
1310 }
1311 return ret;
1312}
1313
1314void __mod_page_state_offset(unsigned long offset, unsigned long delta)
1315{
1316 void *ptr;
1317
1318 ptr = &__get_cpu_var(page_states);
1319 *(unsigned long *)(ptr + offset) += delta;
1320}
1321EXPORT_SYMBOL(__mod_page_state_offset);
1322
1323void mod_page_state_offset(unsigned long offset, unsigned long delta)
1324{
1325 unsigned long flags;
1326 void *ptr;
1327
1328 local_irq_save(flags);
1329 ptr = &__get_cpu_var(page_states);
1330 *(unsigned long *)(ptr + offset) += delta;
1331 local_irq_restore(flags);
1332}
1333EXPORT_SYMBOL(mod_page_state_offset);
1334
1335void __get_zone_counts(unsigned long *active, unsigned long *inactive,
1336 unsigned long *free, struct pglist_data *pgdat)
1337{
1338 struct zone *zones = pgdat->node_zones;
1339 int i;
1340
1341 *active = 0;
1342 *inactive = 0;
1343 *free = 0;
1344 for (i = 0; i < MAX_NR_ZONES; i++) {
1345 *active += zones[i].nr_active;
1346 *inactive += zones[i].nr_inactive;
1347 *free += zones[i].free_pages;
1348 }
1349}
1350
1351void get_zone_counts(unsigned long *active,
1352 unsigned long *inactive, unsigned long *free)
1353{
1354 struct pglist_data *pgdat;
1355
1356 *active = 0;
1357 *inactive = 0;
1358 *free = 0;
1359 for_each_online_pgdat(pgdat) {
1360 unsigned long l, m, n;
1361 __get_zone_counts(&l, &m, &n, pgdat);
1362 *active += l;
1363 *inactive += m;
1364 *free += n;
1365 }
1366}
1367
1368void si_meminfo(struct sysinfo *val) 1212void si_meminfo(struct sysinfo *val)
1369{ 1213{
1370 val->totalram = totalram_pages; 1214 val->totalram = totalram_pages;
@@ -1405,7 +1249,6 @@ void si_meminfo_node(struct sysinfo *val, int nid)
1405 */ 1249 */
1406void show_free_areas(void) 1250void show_free_areas(void)
1407{ 1251{
1408 struct page_state ps;
1409 int cpu, temperature; 1252 int cpu, temperature;
1410 unsigned long active; 1253 unsigned long active;
1411 unsigned long inactive; 1254 unsigned long inactive;
@@ -1437,7 +1280,6 @@ void show_free_areas(void)
1437 } 1280 }
1438 } 1281 }
1439 1282
1440 get_page_state(&ps);
1441 get_zone_counts(&active, &inactive, &free); 1283 get_zone_counts(&active, &inactive, &free);
1442 1284
1443 printk("Free pages: %11ukB (%ukB HighMem)\n", 1285 printk("Free pages: %11ukB (%ukB HighMem)\n",
@@ -1448,13 +1290,13 @@ void show_free_areas(void)
1448 "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", 1290 "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
1449 active, 1291 active,
1450 inactive, 1292 inactive,
1451 ps.nr_dirty, 1293 global_page_state(NR_FILE_DIRTY),
1452 ps.nr_writeback, 1294 global_page_state(NR_WRITEBACK),
1453 ps.nr_unstable, 1295 global_page_state(NR_UNSTABLE_NFS),
1454 nr_free_pages(), 1296 nr_free_pages(),
1455 ps.nr_slab, 1297 global_page_state(NR_SLAB),
1456 ps.nr_mapped, 1298 global_page_state(NR_FILE_MAPPED),
1457 ps.nr_page_table_pages); 1299 global_page_state(NR_PAGETABLE));
1458 1300
1459 for_each_zone(zone) { 1301 for_each_zone(zone) {
1460 int i; 1302 int i;
@@ -2179,6 +2021,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2179 zone->nr_scan_inactive = 0; 2021 zone->nr_scan_inactive = 0;
2180 zone->nr_active = 0; 2022 zone->nr_active = 0;
2181 zone->nr_inactive = 0; 2023 zone->nr_inactive = 0;
2024 zap_zone_vm_stats(zone);
2182 atomic_set(&zone->reclaim_in_progress, 0); 2025 atomic_set(&zone->reclaim_in_progress, 0);
2183 if (!size) 2026 if (!size)
2184 continue; 2027 continue;
@@ -2252,307 +2095,18 @@ void __init free_area_init(unsigned long *zones_size)
2252 __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); 2095 __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
2253} 2096}
2254 2097
2255#ifdef CONFIG_PROC_FS
2256
2257#include <linux/seq_file.h>
2258
2259static void *frag_start(struct seq_file *m, loff_t *pos)
2260{
2261 pg_data_t *pgdat;
2262 loff_t node = *pos;
2263 for (pgdat = first_online_pgdat();
2264 pgdat && node;
2265 pgdat = next_online_pgdat(pgdat))
2266 --node;
2267
2268 return pgdat;
2269}
2270
2271static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
2272{
2273 pg_data_t *pgdat = (pg_data_t *)arg;
2274
2275 (*pos)++;
2276 return next_online_pgdat(pgdat);
2277}
2278
2279static void frag_stop(struct seq_file *m, void *arg)
2280{
2281}
2282
2283/*
2284 * This walks the free areas for each zone.
2285 */
2286static int frag_show(struct seq_file *m, void *arg)
2287{
2288 pg_data_t *pgdat = (pg_data_t *)arg;
2289 struct zone *zone;
2290 struct zone *node_zones = pgdat->node_zones;
2291 unsigned long flags;
2292 int order;
2293
2294 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
2295 if (!populated_zone(zone))
2296 continue;
2297
2298 spin_lock_irqsave(&zone->lock, flags);
2299 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
2300 for (order = 0; order < MAX_ORDER; ++order)
2301 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
2302 spin_unlock_irqrestore(&zone->lock, flags);
2303 seq_putc(m, '\n');
2304 }
2305 return 0;
2306}
2307
2308struct seq_operations fragmentation_op = {
2309 .start = frag_start,
2310 .next = frag_next,
2311 .stop = frag_stop,
2312 .show = frag_show,
2313};
2314
2315/*
2316 * Output information about zones in @pgdat.
2317 */
2318static int zoneinfo_show(struct seq_file *m, void *arg)
2319{
2320 pg_data_t *pgdat = arg;
2321 struct zone *zone;
2322 struct zone *node_zones = pgdat->node_zones;
2323 unsigned long flags;
2324
2325 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
2326 int i;
2327
2328 if (!populated_zone(zone))
2329 continue;
2330
2331 spin_lock_irqsave(&zone->lock, flags);
2332 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
2333 seq_printf(m,
2334 "\n pages free %lu"
2335 "\n min %lu"
2336 "\n low %lu"
2337 "\n high %lu"
2338 "\n active %lu"
2339 "\n inactive %lu"
2340 "\n scanned %lu (a: %lu i: %lu)"
2341 "\n spanned %lu"
2342 "\n present %lu",
2343 zone->free_pages,
2344 zone->pages_min,
2345 zone->pages_low,
2346 zone->pages_high,
2347 zone->nr_active,
2348 zone->nr_inactive,
2349 zone->pages_scanned,
2350 zone->nr_scan_active, zone->nr_scan_inactive,
2351 zone->spanned_pages,
2352 zone->present_pages);
2353 seq_printf(m,
2354 "\n protection: (%lu",
2355 zone->lowmem_reserve[0]);
2356 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
2357 seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
2358 seq_printf(m,
2359 ")"
2360 "\n pagesets");
2361 for_each_online_cpu(i) {
2362 struct per_cpu_pageset *pageset;
2363 int j;
2364
2365 pageset = zone_pcp(zone, i);
2366 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
2367 if (pageset->pcp[j].count)
2368 break;
2369 }
2370 if (j == ARRAY_SIZE(pageset->pcp))
2371 continue;
2372 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
2373 seq_printf(m,
2374 "\n cpu: %i pcp: %i"
2375 "\n count: %i"
2376 "\n high: %i"
2377 "\n batch: %i",
2378 i, j,
2379 pageset->pcp[j].count,
2380 pageset->pcp[j].high,
2381 pageset->pcp[j].batch);
2382 }
2383#ifdef CONFIG_NUMA
2384 seq_printf(m,
2385 "\n numa_hit: %lu"
2386 "\n numa_miss: %lu"
2387 "\n numa_foreign: %lu"
2388 "\n interleave_hit: %lu"
2389 "\n local_node: %lu"
2390 "\n other_node: %lu",
2391 pageset->numa_hit,
2392 pageset->numa_miss,
2393 pageset->numa_foreign,
2394 pageset->interleave_hit,
2395 pageset->local_node,
2396 pageset->other_node);
2397#endif
2398 }
2399 seq_printf(m,
2400 "\n all_unreclaimable: %u"
2401 "\n prev_priority: %i"
2402 "\n temp_priority: %i"
2403 "\n start_pfn: %lu",
2404 zone->all_unreclaimable,
2405 zone->prev_priority,
2406 zone->temp_priority,
2407 zone->zone_start_pfn);
2408 spin_unlock_irqrestore(&zone->lock, flags);
2409 seq_putc(m, '\n');
2410 }
2411 return 0;
2412}
2413
2414struct seq_operations zoneinfo_op = {
2415 .start = frag_start, /* iterate over all zones. The same as in
2416 * fragmentation. */
2417 .next = frag_next,
2418 .stop = frag_stop,
2419 .show = zoneinfo_show,
2420};
2421
2422static char *vmstat_text[] = {
2423 "nr_dirty",
2424 "nr_writeback",
2425 "nr_unstable",
2426 "nr_page_table_pages",
2427 "nr_mapped",
2428 "nr_slab",
2429
2430 "pgpgin",
2431 "pgpgout",
2432 "pswpin",
2433 "pswpout",
2434
2435 "pgalloc_high",
2436 "pgalloc_normal",
2437 "pgalloc_dma32",
2438 "pgalloc_dma",
2439
2440 "pgfree",
2441 "pgactivate",
2442 "pgdeactivate",
2443
2444 "pgfault",
2445 "pgmajfault",
2446
2447 "pgrefill_high",
2448 "pgrefill_normal",
2449 "pgrefill_dma32",
2450 "pgrefill_dma",
2451
2452 "pgsteal_high",
2453 "pgsteal_normal",
2454 "pgsteal_dma32",
2455 "pgsteal_dma",
2456
2457 "pgscan_kswapd_high",
2458 "pgscan_kswapd_normal",
2459 "pgscan_kswapd_dma32",
2460 "pgscan_kswapd_dma",
2461
2462 "pgscan_direct_high",
2463 "pgscan_direct_normal",
2464 "pgscan_direct_dma32",
2465 "pgscan_direct_dma",
2466
2467 "pginodesteal",
2468 "slabs_scanned",
2469 "kswapd_steal",
2470 "kswapd_inodesteal",
2471 "pageoutrun",
2472 "allocstall",
2473
2474 "pgrotated",
2475 "nr_bounce",
2476};
2477
2478static void *vmstat_start(struct seq_file *m, loff_t *pos)
2479{
2480 struct page_state *ps;
2481
2482 if (*pos >= ARRAY_SIZE(vmstat_text))
2483 return NULL;
2484
2485 ps = kmalloc(sizeof(*ps), GFP_KERNEL);
2486 m->private = ps;
2487 if (!ps)
2488 return ERR_PTR(-ENOMEM);
2489 get_full_page_state(ps);
2490 ps->pgpgin /= 2; /* sectors -> kbytes */
2491 ps->pgpgout /= 2;
2492 return (unsigned long *)ps + *pos;
2493}
2494
2495static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
2496{
2497 (*pos)++;
2498 if (*pos >= ARRAY_SIZE(vmstat_text))
2499 return NULL;
2500 return (unsigned long *)m->private + *pos;
2501}
2502
2503static int vmstat_show(struct seq_file *m, void *arg)
2504{
2505 unsigned long *l = arg;
2506 unsigned long off = l - (unsigned long *)m->private;
2507
2508 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
2509 return 0;
2510}
2511
2512static void vmstat_stop(struct seq_file *m, void *arg)
2513{
2514 kfree(m->private);
2515 m->private = NULL;
2516}
2517
2518struct seq_operations vmstat_op = {
2519 .start = vmstat_start,
2520 .next = vmstat_next,
2521 .stop = vmstat_stop,
2522 .show = vmstat_show,
2523};
2524
2525#endif /* CONFIG_PROC_FS */
2526
2527#ifdef CONFIG_HOTPLUG_CPU 2098#ifdef CONFIG_HOTPLUG_CPU
2528static int page_alloc_cpu_notify(struct notifier_block *self, 2099static int page_alloc_cpu_notify(struct notifier_block *self,
2529 unsigned long action, void *hcpu) 2100 unsigned long action, void *hcpu)
2530{ 2101{
2531 int cpu = (unsigned long)hcpu; 2102 int cpu = (unsigned long)hcpu;
2532 long *count;
2533 unsigned long *src, *dest;
2534 2103
2535 if (action == CPU_DEAD) { 2104 if (action == CPU_DEAD) {
2536 int i;
2537
2538 /* Drain local pagecache count. */
2539 count = &per_cpu(nr_pagecache_local, cpu);
2540 atomic_add(*count, &nr_pagecache);
2541 *count = 0;
2542 local_irq_disable(); 2105 local_irq_disable();
2543 __drain_pages(cpu); 2106 __drain_pages(cpu);
2544 2107 vm_events_fold_cpu(cpu);
2545 /* Add dead cpu's page_states to our own. */
2546 dest = (unsigned long *)&__get_cpu_var(page_states);
2547 src = (unsigned long *)&per_cpu(page_states, cpu);
2548
2549 for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long);
2550 i++) {
2551 dest[i] += src[i];
2552 src[i] = 0;
2553 }
2554
2555 local_irq_enable(); 2108 local_irq_enable();
2109 refresh_cpu_vm_stats(cpu);
2556 } 2110 }
2557 return NOTIFY_OK; 2111 return NOTIFY_OK;
2558} 2112}