aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-06-30 04:55:32 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-30 14:25:34 -0400
commitf6ac2354d791195ca40822b84d73d48a4e8b7f2b (patch)
tree5f600175cf3591eac3d32bb8cebfd45d0aabf804 /mm
parent672b2714ae57af16fe7d760dc4e0918a7a6cb0fa (diff)
[PATCH] zoned vm counters: create vmstat.c/.h from page_alloc.c/.h
NOTE: ZVC are *not* the lightweight event counters. ZVCs are reliable whereas event counters do not need to be. Zone based VM statistics are necessary to be able to determine what the state of memory in one zone is. In a NUMA system this can be helpful for local reclaim and other memory optimizations that may be able to shift VM load in order to get more balanced memory use. It is also useful to know how the computing load affects the memory allocations on various zones. This patchset allows the retrieval of that data from userspace. The patchset introduces a framework for counters that is a cross between the existing page_stats --which are simply global counters split per cpu-- and the approach of deferred incremental updates implemented for nr_pagecache. Small per cpu 8 bit counters are added to struct zone. If the counter exceeds certain thresholds then the counters are accumulated in an array of atomic_long in the zone and in a global array that sums up all zone values. The small 8 bit counters are next to the per cpu page pointers and so they will be in high in the cpu cache when pages are allocated and freed. Access to VM counter information for a zone and for the whole machine is then possible by simply indexing an array (Thanks to Nick Piggin for pointing out that approach). The access to the total number of pages of various types does no longer require the summing up of all per cpu counters. Benefits of this patchset right now: - Ability for UP and SMP configuration to determine how memory is balanced between the DMA, NORMAL and HIGHMEM zones. - loops over all processors are avoided in writeback and reclaim paths. We can avoid caching the writeback information because the needed information is directly accessible. - Special handling for nr_pagecache removed. - zone_reclaim_interval vanishes since VM stats can now determine when it is worth to do local reclaim. - Fast inline per node page state determination. - Accurate counters in /sys/devices/system/node/node*/meminfo. Current counters are counting simply which processor allocated a page somewhere and guestimate based on that. So the counters were not useful to show the actual distribution of page use on a specific zone. - The swap_prefetch patch requires per node statistics in order to figure out when processors of a node can prefetch. This patch provides some of the needed numbers. - Detailed VM counters available in more /proc and /sys status files. References to earlier discussions: V1 http://marc.theaimsgroup.com/?l=linux-kernel&m=113511649910826&w=2 V2 http://marc.theaimsgroup.com/?l=linux-kernel&m=114980851924230&w=2 V3 http://marc.theaimsgroup.com/?l=linux-kernel&m=115014697910351&w=2 V4 http://marc.theaimsgroup.com/?l=linux-kernel&m=115024767318740&w=2 Performance tests with AIM7 did not show any regressions. Seems to be a tad faster even. Tested on ia64/NUMA. Builds fine on i386, SMP / UP. Includes fixes for s390/arm/uml arch code. This patch: Move counter code from page_alloc.c/page-flags.h to vmstat.c/h. Create vmstat.c/vmstat.h by separating the counter code and the proc functions. Move the vm_stat_text array before zoneinfo_show. [akpm@osdl.org: s390 build fix] [akpm@osdl.org: HOTPLUG_CPU build fix] Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile2
-rw-r--r--mm/page_alloc.c407
-rw-r--r--mm/vmstat.c417
3 files changed, 418 insertions, 408 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 0b8f73f2ed16..9dd824c11eeb 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -10,7 +10,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ 10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
11 page_alloc.o page-writeback.o pdflush.o \ 11 page_alloc.o page-writeback.o pdflush.o \
12 readahead.o swap.o truncate.o vmscan.o \ 12 readahead.o swap.o truncate.o vmscan.o \
13 prio_tree.o util.o mmzone.o $(mmu-y) 13 prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
14 14
15obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 15obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
16obj-$(CONFIG_HUGETLBFS) += hugetlb.o 16obj-$(CONFIG_HUGETLBFS) += hugetlb.o
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 084a2de7e52a..87dc1297fe39 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1231,141 +1231,6 @@ static void show_node(struct zone *zone)
1231#define show_node(zone) do { } while (0) 1231#define show_node(zone) do { } while (0)
1232#endif 1232#endif
1233 1233
1234/*
1235 * Accumulate the page_state information across all CPUs.
1236 * The result is unavoidably approximate - it can change
1237 * during and after execution of this function.
1238 */
1239static DEFINE_PER_CPU(struct page_state, page_states) = {0};
1240
1241atomic_t nr_pagecache = ATOMIC_INIT(0);
1242EXPORT_SYMBOL(nr_pagecache);
1243#ifdef CONFIG_SMP
1244DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
1245#endif
1246
1247static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
1248{
1249 unsigned cpu;
1250
1251 memset(ret, 0, nr * sizeof(unsigned long));
1252 cpus_and(*cpumask, *cpumask, cpu_online_map);
1253
1254 for_each_cpu_mask(cpu, *cpumask) {
1255 unsigned long *in;
1256 unsigned long *out;
1257 unsigned off;
1258 unsigned next_cpu;
1259
1260 in = (unsigned long *)&per_cpu(page_states, cpu);
1261
1262 next_cpu = next_cpu(cpu, *cpumask);
1263 if (likely(next_cpu < NR_CPUS))
1264 prefetch(&per_cpu(page_states, next_cpu));
1265
1266 out = (unsigned long *)ret;
1267 for (off = 0; off < nr; off++)
1268 *out++ += *in++;
1269 }
1270}
1271
1272void get_page_state_node(struct page_state *ret, int node)
1273{
1274 int nr;
1275 cpumask_t mask = node_to_cpumask(node);
1276
1277 nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
1278 nr /= sizeof(unsigned long);
1279
1280 __get_page_state(ret, nr+1, &mask);
1281}
1282
1283void get_page_state(struct page_state *ret)
1284{
1285 int nr;
1286 cpumask_t mask = CPU_MASK_ALL;
1287
1288 nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
1289 nr /= sizeof(unsigned long);
1290
1291 __get_page_state(ret, nr + 1, &mask);
1292}
1293
1294void get_full_page_state(struct page_state *ret)
1295{
1296 cpumask_t mask = CPU_MASK_ALL;
1297
1298 __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
1299}
1300
1301unsigned long read_page_state_offset(unsigned long offset)
1302{
1303 unsigned long ret = 0;
1304 int cpu;
1305
1306 for_each_online_cpu(cpu) {
1307 unsigned long in;
1308
1309 in = (unsigned long)&per_cpu(page_states, cpu) + offset;
1310 ret += *((unsigned long *)in);
1311 }
1312 return ret;
1313}
1314
1315void __mod_page_state_offset(unsigned long offset, unsigned long delta)
1316{
1317 void *ptr;
1318
1319 ptr = &__get_cpu_var(page_states);
1320 *(unsigned long *)(ptr + offset) += delta;
1321}
1322EXPORT_SYMBOL(__mod_page_state_offset);
1323
1324void mod_page_state_offset(unsigned long offset, unsigned long delta)
1325{
1326 unsigned long flags;
1327 void *ptr;
1328
1329 local_irq_save(flags);
1330 ptr = &__get_cpu_var(page_states);
1331 *(unsigned long *)(ptr + offset) += delta;
1332 local_irq_restore(flags);
1333}
1334EXPORT_SYMBOL(mod_page_state_offset);
1335
1336void __get_zone_counts(unsigned long *active, unsigned long *inactive,
1337 unsigned long *free, struct pglist_data *pgdat)
1338{
1339 struct zone *zones = pgdat->node_zones;
1340 int i;
1341
1342 *active = 0;
1343 *inactive = 0;
1344 *free = 0;
1345 for (i = 0; i < MAX_NR_ZONES; i++) {
1346 *active += zones[i].nr_active;
1347 *inactive += zones[i].nr_inactive;
1348 *free += zones[i].free_pages;
1349 }
1350}
1351
1352void get_zone_counts(unsigned long *active,
1353 unsigned long *inactive, unsigned long *free)
1354{
1355 struct pglist_data *pgdat;
1356
1357 *active = 0;
1358 *inactive = 0;
1359 *free = 0;
1360 for_each_online_pgdat(pgdat) {
1361 unsigned long l, m, n;
1362 __get_zone_counts(&l, &m, &n, pgdat);
1363 *active += l;
1364 *inactive += m;
1365 *free += n;
1366 }
1367}
1368
1369void si_meminfo(struct sysinfo *val) 1234void si_meminfo(struct sysinfo *val)
1370{ 1235{
1371 val->totalram = totalram_pages; 1236 val->totalram = totalram_pages;
@@ -2253,278 +2118,6 @@ void __init free_area_init(unsigned long *zones_size)
2253 __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); 2118 __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
2254} 2119}
2255 2120
2256#ifdef CONFIG_PROC_FS
2257
2258#include <linux/seq_file.h>
2259
2260static void *frag_start(struct seq_file *m, loff_t *pos)
2261{
2262 pg_data_t *pgdat;
2263 loff_t node = *pos;
2264 for (pgdat = first_online_pgdat();
2265 pgdat && node;
2266 pgdat = next_online_pgdat(pgdat))
2267 --node;
2268
2269 return pgdat;
2270}
2271
2272static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
2273{
2274 pg_data_t *pgdat = (pg_data_t *)arg;
2275
2276 (*pos)++;
2277 return next_online_pgdat(pgdat);
2278}
2279
2280static void frag_stop(struct seq_file *m, void *arg)
2281{
2282}
2283
2284/*
2285 * This walks the free areas for each zone.
2286 */
2287static int frag_show(struct seq_file *m, void *arg)
2288{
2289 pg_data_t *pgdat = (pg_data_t *)arg;
2290 struct zone *zone;
2291 struct zone *node_zones = pgdat->node_zones;
2292 unsigned long flags;
2293 int order;
2294
2295 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
2296 if (!populated_zone(zone))
2297 continue;
2298
2299 spin_lock_irqsave(&zone->lock, flags);
2300 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
2301 for (order = 0; order < MAX_ORDER; ++order)
2302 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
2303 spin_unlock_irqrestore(&zone->lock, flags);
2304 seq_putc(m, '\n');
2305 }
2306 return 0;
2307}
2308
2309struct seq_operations fragmentation_op = {
2310 .start = frag_start,
2311 .next = frag_next,
2312 .stop = frag_stop,
2313 .show = frag_show,
2314};
2315
2316/*
2317 * Output information about zones in @pgdat.
2318 */
2319static int zoneinfo_show(struct seq_file *m, void *arg)
2320{
2321 pg_data_t *pgdat = arg;
2322 struct zone *zone;
2323 struct zone *node_zones = pgdat->node_zones;
2324 unsigned long flags;
2325
2326 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
2327 int i;
2328
2329 if (!populated_zone(zone))
2330 continue;
2331
2332 spin_lock_irqsave(&zone->lock, flags);
2333 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
2334 seq_printf(m,
2335 "\n pages free %lu"
2336 "\n min %lu"
2337 "\n low %lu"
2338 "\n high %lu"
2339 "\n active %lu"
2340 "\n inactive %lu"
2341 "\n scanned %lu (a: %lu i: %lu)"
2342 "\n spanned %lu"
2343 "\n present %lu",
2344 zone->free_pages,
2345 zone->pages_min,
2346 zone->pages_low,
2347 zone->pages_high,
2348 zone->nr_active,
2349 zone->nr_inactive,
2350 zone->pages_scanned,
2351 zone->nr_scan_active, zone->nr_scan_inactive,
2352 zone->spanned_pages,
2353 zone->present_pages);
2354 seq_printf(m,
2355 "\n protection: (%lu",
2356 zone->lowmem_reserve[0]);
2357 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
2358 seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
2359 seq_printf(m,
2360 ")"
2361 "\n pagesets");
2362 for_each_online_cpu(i) {
2363 struct per_cpu_pageset *pageset;
2364 int j;
2365
2366 pageset = zone_pcp(zone, i);
2367 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
2368 if (pageset->pcp[j].count)
2369 break;
2370 }
2371 if (j == ARRAY_SIZE(pageset->pcp))
2372 continue;
2373 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
2374 seq_printf(m,
2375 "\n cpu: %i pcp: %i"
2376 "\n count: %i"
2377 "\n high: %i"
2378 "\n batch: %i",
2379 i, j,
2380 pageset->pcp[j].count,
2381 pageset->pcp[j].high,
2382 pageset->pcp[j].batch);
2383 }
2384#ifdef CONFIG_NUMA
2385 seq_printf(m,
2386 "\n numa_hit: %lu"
2387 "\n numa_miss: %lu"
2388 "\n numa_foreign: %lu"
2389 "\n interleave_hit: %lu"
2390 "\n local_node: %lu"
2391 "\n other_node: %lu",
2392 pageset->numa_hit,
2393 pageset->numa_miss,
2394 pageset->numa_foreign,
2395 pageset->interleave_hit,
2396 pageset->local_node,
2397 pageset->other_node);
2398#endif
2399 }
2400 seq_printf(m,
2401 "\n all_unreclaimable: %u"
2402 "\n prev_priority: %i"
2403 "\n temp_priority: %i"
2404 "\n start_pfn: %lu",
2405 zone->all_unreclaimable,
2406 zone->prev_priority,
2407 zone->temp_priority,
2408 zone->zone_start_pfn);
2409 spin_unlock_irqrestore(&zone->lock, flags);
2410 seq_putc(m, '\n');
2411 }
2412 return 0;
2413}
2414
2415struct seq_operations zoneinfo_op = {
2416 .start = frag_start, /* iterate over all zones. The same as in
2417 * fragmentation. */
2418 .next = frag_next,
2419 .stop = frag_stop,
2420 .show = zoneinfo_show,
2421};
2422
2423static char *vmstat_text[] = {
2424 "nr_dirty",
2425 "nr_writeback",
2426 "nr_unstable",
2427 "nr_page_table_pages",
2428 "nr_mapped",
2429 "nr_slab",
2430
2431 "pgpgin",
2432 "pgpgout",
2433 "pswpin",
2434 "pswpout",
2435
2436 "pgalloc_high",
2437 "pgalloc_normal",
2438 "pgalloc_dma32",
2439 "pgalloc_dma",
2440
2441 "pgfree",
2442 "pgactivate",
2443 "pgdeactivate",
2444
2445 "pgfault",
2446 "pgmajfault",
2447
2448 "pgrefill_high",
2449 "pgrefill_normal",
2450 "pgrefill_dma32",
2451 "pgrefill_dma",
2452
2453 "pgsteal_high",
2454 "pgsteal_normal",
2455 "pgsteal_dma32",
2456 "pgsteal_dma",
2457
2458 "pgscan_kswapd_high",
2459 "pgscan_kswapd_normal",
2460 "pgscan_kswapd_dma32",
2461 "pgscan_kswapd_dma",
2462
2463 "pgscan_direct_high",
2464 "pgscan_direct_normal",
2465 "pgscan_direct_dma32",
2466 "pgscan_direct_dma",
2467
2468 "pginodesteal",
2469 "slabs_scanned",
2470 "kswapd_steal",
2471 "kswapd_inodesteal",
2472 "pageoutrun",
2473 "allocstall",
2474
2475 "pgrotated",
2476 "nr_bounce",
2477};
2478
2479static void *vmstat_start(struct seq_file *m, loff_t *pos)
2480{
2481 struct page_state *ps;
2482
2483 if (*pos >= ARRAY_SIZE(vmstat_text))
2484 return NULL;
2485
2486 ps = kmalloc(sizeof(*ps), GFP_KERNEL);
2487 m->private = ps;
2488 if (!ps)
2489 return ERR_PTR(-ENOMEM);
2490 get_full_page_state(ps);
2491 ps->pgpgin /= 2; /* sectors -> kbytes */
2492 ps->pgpgout /= 2;
2493 return (unsigned long *)ps + *pos;
2494}
2495
2496static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
2497{
2498 (*pos)++;
2499 if (*pos >= ARRAY_SIZE(vmstat_text))
2500 return NULL;
2501 return (unsigned long *)m->private + *pos;
2502}
2503
2504static int vmstat_show(struct seq_file *m, void *arg)
2505{
2506 unsigned long *l = arg;
2507 unsigned long off = l - (unsigned long *)m->private;
2508
2509 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
2510 return 0;
2511}
2512
2513static void vmstat_stop(struct seq_file *m, void *arg)
2514{
2515 kfree(m->private);
2516 m->private = NULL;
2517}
2518
2519struct seq_operations vmstat_op = {
2520 .start = vmstat_start,
2521 .next = vmstat_next,
2522 .stop = vmstat_stop,
2523 .show = vmstat_show,
2524};
2525
2526#endif /* CONFIG_PROC_FS */
2527
2528#ifdef CONFIG_HOTPLUG_CPU 2121#ifdef CONFIG_HOTPLUG_CPU
2529static int page_alloc_cpu_notify(struct notifier_block *self, 2122static int page_alloc_cpu_notify(struct notifier_block *self,
2530 unsigned long action, void *hcpu) 2123 unsigned long action, void *hcpu)
diff --git a/mm/vmstat.c b/mm/vmstat.c
new file mode 100644
index 000000000000..ad456202ff1a
--- /dev/null
+++ b/mm/vmstat.c
@@ -0,0 +1,417 @@
1/*
2 * linux/mm/vmstat.c
3 *
4 * Manages VM statistics
5 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
6 */
7
8#include <linux/config.h>
9#include <linux/mm.h>
10
11/*
12 * Accumulate the page_state information across all CPUs.
13 * The result is unavoidably approximate - it can change
14 * during and after execution of this function.
15 */
16DEFINE_PER_CPU(struct page_state, page_states) = {0};
17
18atomic_t nr_pagecache = ATOMIC_INIT(0);
19EXPORT_SYMBOL(nr_pagecache);
20#ifdef CONFIG_SMP
21DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
22#endif
23
24static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
25{
26 unsigned cpu;
27
28 memset(ret, 0, nr * sizeof(unsigned long));
29 cpus_and(*cpumask, *cpumask, cpu_online_map);
30
31 for_each_cpu_mask(cpu, *cpumask) {
32 unsigned long *in;
33 unsigned long *out;
34 unsigned off;
35 unsigned next_cpu;
36
37 in = (unsigned long *)&per_cpu(page_states, cpu);
38
39 next_cpu = next_cpu(cpu, *cpumask);
40 if (likely(next_cpu < NR_CPUS))
41 prefetch(&per_cpu(page_states, next_cpu));
42
43 out = (unsigned long *)ret;
44 for (off = 0; off < nr; off++)
45 *out++ += *in++;
46 }
47}
48
49void get_page_state_node(struct page_state *ret, int node)
50{
51 int nr;
52 cpumask_t mask = node_to_cpumask(node);
53
54 nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
55 nr /= sizeof(unsigned long);
56
57 __get_page_state(ret, nr+1, &mask);
58}
59
60void get_page_state(struct page_state *ret)
61{
62 int nr;
63 cpumask_t mask = CPU_MASK_ALL;
64
65 nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
66 nr /= sizeof(unsigned long);
67
68 __get_page_state(ret, nr + 1, &mask);
69}
70
71void get_full_page_state(struct page_state *ret)
72{
73 cpumask_t mask = CPU_MASK_ALL;
74
75 __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
76}
77
78unsigned long read_page_state_offset(unsigned long offset)
79{
80 unsigned long ret = 0;
81 int cpu;
82
83 for_each_online_cpu(cpu) {
84 unsigned long in;
85
86 in = (unsigned long)&per_cpu(page_states, cpu) + offset;
87 ret += *((unsigned long *)in);
88 }
89 return ret;
90}
91
92void __mod_page_state_offset(unsigned long offset, unsigned long delta)
93{
94 void *ptr;
95
96 ptr = &__get_cpu_var(page_states);
97 *(unsigned long *)(ptr + offset) += delta;
98}
99EXPORT_SYMBOL(__mod_page_state_offset);
100
101void mod_page_state_offset(unsigned long offset, unsigned long delta)
102{
103 unsigned long flags;
104 void *ptr;
105
106 local_irq_save(flags);
107 ptr = &__get_cpu_var(page_states);
108 *(unsigned long *)(ptr + offset) += delta;
109 local_irq_restore(flags);
110}
111EXPORT_SYMBOL(mod_page_state_offset);
112
113void __get_zone_counts(unsigned long *active, unsigned long *inactive,
114 unsigned long *free, struct pglist_data *pgdat)
115{
116 struct zone *zones = pgdat->node_zones;
117 int i;
118
119 *active = 0;
120 *inactive = 0;
121 *free = 0;
122 for (i = 0; i < MAX_NR_ZONES; i++) {
123 *active += zones[i].nr_active;
124 *inactive += zones[i].nr_inactive;
125 *free += zones[i].free_pages;
126 }
127}
128
129void get_zone_counts(unsigned long *active,
130 unsigned long *inactive, unsigned long *free)
131{
132 struct pglist_data *pgdat;
133
134 *active = 0;
135 *inactive = 0;
136 *free = 0;
137 for_each_online_pgdat(pgdat) {
138 unsigned long l, m, n;
139 __get_zone_counts(&l, &m, &n, pgdat);
140 *active += l;
141 *inactive += m;
142 *free += n;
143 }
144}
145
146#ifdef CONFIG_PROC_FS
147
148#include <linux/seq_file.h>
149
150static void *frag_start(struct seq_file *m, loff_t *pos)
151{
152 pg_data_t *pgdat;
153 loff_t node = *pos;
154 for (pgdat = first_online_pgdat();
155 pgdat && node;
156 pgdat = next_online_pgdat(pgdat))
157 --node;
158
159 return pgdat;
160}
161
162static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
163{
164 pg_data_t *pgdat = (pg_data_t *)arg;
165
166 (*pos)++;
167 return next_online_pgdat(pgdat);
168}
169
170static void frag_stop(struct seq_file *m, void *arg)
171{
172}
173
174/*
175 * This walks the free areas for each zone.
176 */
177static int frag_show(struct seq_file *m, void *arg)
178{
179 pg_data_t *pgdat = (pg_data_t *)arg;
180 struct zone *zone;
181 struct zone *node_zones = pgdat->node_zones;
182 unsigned long flags;
183 int order;
184
185 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
186 if (!populated_zone(zone))
187 continue;
188
189 spin_lock_irqsave(&zone->lock, flags);
190 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
191 for (order = 0; order < MAX_ORDER; ++order)
192 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
193 spin_unlock_irqrestore(&zone->lock, flags);
194 seq_putc(m, '\n');
195 }
196 return 0;
197}
198
199struct seq_operations fragmentation_op = {
200 .start = frag_start,
201 .next = frag_next,
202 .stop = frag_stop,
203 .show = frag_show,
204};
205
206static char *vmstat_text[] = {
207 "nr_dirty",
208 "nr_writeback",
209 "nr_unstable",
210 "nr_page_table_pages",
211 "nr_mapped",
212 "nr_slab",
213
214 "pgpgin",
215 "pgpgout",
216 "pswpin",
217 "pswpout",
218
219 "pgalloc_high",
220 "pgalloc_normal",
221 "pgalloc_dma32",
222 "pgalloc_dma",
223
224 "pgfree",
225 "pgactivate",
226 "pgdeactivate",
227
228 "pgfault",
229 "pgmajfault",
230
231 "pgrefill_high",
232 "pgrefill_normal",
233 "pgrefill_dma32",
234 "pgrefill_dma",
235
236 "pgsteal_high",
237 "pgsteal_normal",
238 "pgsteal_dma32",
239 "pgsteal_dma",
240
241 "pgscan_kswapd_high",
242 "pgscan_kswapd_normal",
243 "pgscan_kswapd_dma32",
244 "pgscan_kswapd_dma",
245
246 "pgscan_direct_high",
247 "pgscan_direct_normal",
248 "pgscan_direct_dma32",
249 "pgscan_direct_dma",
250
251 "pginodesteal",
252 "slabs_scanned",
253 "kswapd_steal",
254 "kswapd_inodesteal",
255 "pageoutrun",
256 "allocstall",
257
258 "pgrotated",
259 "nr_bounce",
260};
261
262/*
263 * Output information about zones in @pgdat.
264 */
265static int zoneinfo_show(struct seq_file *m, void *arg)
266{
267 pg_data_t *pgdat = arg;
268 struct zone *zone;
269 struct zone *node_zones = pgdat->node_zones;
270 unsigned long flags;
271
272 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
273 int i;
274
275 if (!populated_zone(zone))
276 continue;
277
278 spin_lock_irqsave(&zone->lock, flags);
279 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
280 seq_printf(m,
281 "\n pages free %lu"
282 "\n min %lu"
283 "\n low %lu"
284 "\n high %lu"
285 "\n active %lu"
286 "\n inactive %lu"
287 "\n scanned %lu (a: %lu i: %lu)"
288 "\n spanned %lu"
289 "\n present %lu",
290 zone->free_pages,
291 zone->pages_min,
292 zone->pages_low,
293 zone->pages_high,
294 zone->nr_active,
295 zone->nr_inactive,
296 zone->pages_scanned,
297 zone->nr_scan_active, zone->nr_scan_inactive,
298 zone->spanned_pages,
299 zone->present_pages);
300 seq_printf(m,
301 "\n protection: (%lu",
302 zone->lowmem_reserve[0]);
303 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
304 seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
305 seq_printf(m,
306 ")"
307 "\n pagesets");
308 for_each_online_cpu(i) {
309 struct per_cpu_pageset *pageset;
310 int j;
311
312 pageset = zone_pcp(zone, i);
313 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
314 if (pageset->pcp[j].count)
315 break;
316 }
317 if (j == ARRAY_SIZE(pageset->pcp))
318 continue;
319 for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
320 seq_printf(m,
321 "\n cpu: %i pcp: %i"
322 "\n count: %i"
323 "\n high: %i"
324 "\n batch: %i",
325 i, j,
326 pageset->pcp[j].count,
327 pageset->pcp[j].high,
328 pageset->pcp[j].batch);
329 }
330#ifdef CONFIG_NUMA
331 seq_printf(m,
332 "\n numa_hit: %lu"
333 "\n numa_miss: %lu"
334 "\n numa_foreign: %lu"
335 "\n interleave_hit: %lu"
336 "\n local_node: %lu"
337 "\n other_node: %lu",
338 pageset->numa_hit,
339 pageset->numa_miss,
340 pageset->numa_foreign,
341 pageset->interleave_hit,
342 pageset->local_node,
343 pageset->other_node);
344#endif
345 }
346 seq_printf(m,
347 "\n all_unreclaimable: %u"
348 "\n prev_priority: %i"
349 "\n temp_priority: %i"
350 "\n start_pfn: %lu",
351 zone->all_unreclaimable,
352 zone->prev_priority,
353 zone->temp_priority,
354 zone->zone_start_pfn);
355 spin_unlock_irqrestore(&zone->lock, flags);
356 seq_putc(m, '\n');
357 }
358 return 0;
359}
360
361struct seq_operations zoneinfo_op = {
362 .start = frag_start, /* iterate over all zones. The same as in
363 * fragmentation. */
364 .next = frag_next,
365 .stop = frag_stop,
366 .show = zoneinfo_show,
367};
368
369static void *vmstat_start(struct seq_file *m, loff_t *pos)
370{
371 struct page_state *ps;
372
373 if (*pos >= ARRAY_SIZE(vmstat_text))
374 return NULL;
375
376 ps = kmalloc(sizeof(*ps), GFP_KERNEL);
377 m->private = ps;
378 if (!ps)
379 return ERR_PTR(-ENOMEM);
380 get_full_page_state(ps);
381 ps->pgpgin /= 2; /* sectors -> kbytes */
382 ps->pgpgout /= 2;
383 return (unsigned long *)ps + *pos;
384}
385
386static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
387{
388 (*pos)++;
389 if (*pos >= ARRAY_SIZE(vmstat_text))
390 return NULL;
391 return (unsigned long *)m->private + *pos;
392}
393
394static int vmstat_show(struct seq_file *m, void *arg)
395{
396 unsigned long *l = arg;
397 unsigned long off = l - (unsigned long *)m->private;
398
399 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
400 return 0;
401}
402
403static void vmstat_stop(struct seq_file *m, void *arg)
404{
405 kfree(m->private);
406 m->private = NULL;
407}
408
409struct seq_operations vmstat_op = {
410 .start = vmstat_start,
411 .next = vmstat_next,
412 .stop = vmstat_stop,
413 .show = vmstat_show,
414};
415
416#endif /* CONFIG_PROC_FS */
417