aboutsummaryrefslogtreecommitdiffstats
path: root/tools/vm
diff options
context:
space:
mode:
authorDave Young <dyoung@redhat.com>2012-03-28 17:42:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 20:14:37 -0400
commitc6dd897f3bfc54a44942d742d6dfa842e33d88e0 (patch)
tree4da10b4ce42b5f3b93b90578e962a1bddbfb13f1 /tools/vm
parentcab6b0560080c6da5107c5d7dbba6372f7b288ab (diff)
mm: move page-types.c from Documentation to tools/vm
tools/ is the better place for vm tools which are used by many people. Moving them to tools also make them open to more users instead of hide in Documentation folder. This patch moves page-types.c to tools/vm/page-types.c. Also add a Makefile in tools/vm and fix two coding style problems: a) change const arrary to 'const char * const', b) change a space to tab for indent. Signed-off-by: Dave Young <dyoung@redhat.com> Acked-by: Wu Fengguang <fengguang.wu@intel.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Frederic Weisbecker <fweisbec@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'tools/vm')
-rw-r--r--tools/vm/Makefile11
-rw-r--r--tools/vm/page-types.c1102
2 files changed, 1113 insertions, 0 deletions
diff --git a/tools/vm/Makefile b/tools/vm/Makefile
new file mode 100644
index 000000000000..3823d4b1fa75
--- /dev/null
+++ b/tools/vm/Makefile
@@ -0,0 +1,11 @@
1# Makefile for vm tools
2
3CC = $(CROSS_COMPILE)gcc
4CFLAGS = -Wall -Wextra
5
6all: page-types
7%: %.c
8 $(CC) $(CFLAGS) -o $@ $^
9
10clean:
11 $(RM) page-types
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
new file mode 100644
index 000000000000..7dab7b25b5c6
--- /dev/null
+++ b/tools/vm/page-types.c
@@ -0,0 +1,1102 @@
1/*
2 * page-types: Tool for querying page flags
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should find a copy of v2 of the GNU General Public License somewhere on
14 * your Linux system; if not, write to the Free Software Foundation, Inc., 59
15 * Temple Place, Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2009 Intel corporation
18 *
19 * Authors: Wu Fengguang <fengguang.wu@intel.com>
20 */
21
22#define _LARGEFILE64_SOURCE
23#include <stdio.h>
24#include <stdlib.h>
25#include <unistd.h>
26#include <stdint.h>
27#include <stdarg.h>
28#include <string.h>
29#include <getopt.h>
30#include <limits.h>
31#include <assert.h>
32#include <sys/types.h>
33#include <sys/errno.h>
34#include <sys/fcntl.h>
35#include <sys/mount.h>
36#include <sys/statfs.h>
37#include "../../include/linux/magic.h"
38
39
40#ifndef MAX_PATH
41# define MAX_PATH 256
42#endif
43
44#ifndef STR
45# define _STR(x) #x
46# define STR(x) _STR(x)
47#endif
48
49/*
50 * pagemap kernel ABI bits
51 */
52
53#define PM_ENTRY_BYTES sizeof(uint64_t)
54#define PM_STATUS_BITS 3
55#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
56#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
57#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
58#define PM_PSHIFT_BITS 6
59#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
60#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
61#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
62#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
63#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
64
65#define PM_PRESENT PM_STATUS(4LL)
66#define PM_SWAP PM_STATUS(2LL)
67
68
69/*
70 * kernel page flags
71 */
72
73#define KPF_BYTES 8
74#define PROC_KPAGEFLAGS "/proc/kpageflags"
75
76/* copied from kpageflags_read() */
77#define KPF_LOCKED 0
78#define KPF_ERROR 1
79#define KPF_REFERENCED 2
80#define KPF_UPTODATE 3
81#define KPF_DIRTY 4
82#define KPF_LRU 5
83#define KPF_ACTIVE 6
84#define KPF_SLAB 7
85#define KPF_WRITEBACK 8
86#define KPF_RECLAIM 9
87#define KPF_BUDDY 10
88
89/* [11-20] new additions in 2.6.31 */
90#define KPF_MMAP 11
91#define KPF_ANON 12
92#define KPF_SWAPCACHE 13
93#define KPF_SWAPBACKED 14
94#define KPF_COMPOUND_HEAD 15
95#define KPF_COMPOUND_TAIL 16
96#define KPF_HUGE 17
97#define KPF_UNEVICTABLE 18
98#define KPF_HWPOISON 19
99#define KPF_NOPAGE 20
100#define KPF_KSM 21
101#define KPF_THP 22
102
103/* [32-] kernel hacking assistances */
104#define KPF_RESERVED 32
105#define KPF_MLOCKED 33
106#define KPF_MAPPEDTODISK 34
107#define KPF_PRIVATE 35
108#define KPF_PRIVATE_2 36
109#define KPF_OWNER_PRIVATE 37
110#define KPF_ARCH 38
111#define KPF_UNCACHED 39
112
113/* [48-] take some arbitrary free slots for expanding overloaded flags
114 * not part of kernel API
115 */
116#define KPF_READAHEAD 48
117#define KPF_SLOB_FREE 49
118#define KPF_SLUB_FROZEN 50
119#define KPF_SLUB_DEBUG 51
120
121#define KPF_ALL_BITS ((uint64_t)~0ULL)
122#define KPF_HACKERS_BITS (0xffffULL << 32)
123#define KPF_OVERLOADED_BITS (0xffffULL << 48)
124#define BIT(name) (1ULL << KPF_##name)
125#define BITS_COMPOUND (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL))
126
127static const char * const page_flag_names[] = {
128 [KPF_LOCKED] = "L:locked",
129 [KPF_ERROR] = "E:error",
130 [KPF_REFERENCED] = "R:referenced",
131 [KPF_UPTODATE] = "U:uptodate",
132 [KPF_DIRTY] = "D:dirty",
133 [KPF_LRU] = "l:lru",
134 [KPF_ACTIVE] = "A:active",
135 [KPF_SLAB] = "S:slab",
136 [KPF_WRITEBACK] = "W:writeback",
137 [KPF_RECLAIM] = "I:reclaim",
138 [KPF_BUDDY] = "B:buddy",
139
140 [KPF_MMAP] = "M:mmap",
141 [KPF_ANON] = "a:anonymous",
142 [KPF_SWAPCACHE] = "s:swapcache",
143 [KPF_SWAPBACKED] = "b:swapbacked",
144 [KPF_COMPOUND_HEAD] = "H:compound_head",
145 [KPF_COMPOUND_TAIL] = "T:compound_tail",
146 [KPF_HUGE] = "G:huge",
147 [KPF_UNEVICTABLE] = "u:unevictable",
148 [KPF_HWPOISON] = "X:hwpoison",
149 [KPF_NOPAGE] = "n:nopage",
150 [KPF_KSM] = "x:ksm",
151 [KPF_THP] = "t:thp",
152
153 [KPF_RESERVED] = "r:reserved",
154 [KPF_MLOCKED] = "m:mlocked",
155 [KPF_MAPPEDTODISK] = "d:mappedtodisk",
156 [KPF_PRIVATE] = "P:private",
157 [KPF_PRIVATE_2] = "p:private_2",
158 [KPF_OWNER_PRIVATE] = "O:owner_private",
159 [KPF_ARCH] = "h:arch",
160 [KPF_UNCACHED] = "c:uncached",
161
162 [KPF_READAHEAD] = "I:readahead",
163 [KPF_SLOB_FREE] = "P:slob_free",
164 [KPF_SLUB_FROZEN] = "A:slub_frozen",
165 [KPF_SLUB_DEBUG] = "E:slub_debug",
166};
167
168
169static const char * const debugfs_known_mountpoints[] = {
170 "/sys/kernel/debug",
171 "/debug",
172 0,
173};
174
175/*
176 * data structures
177 */
178
179static int opt_raw; /* for kernel developers */
180static int opt_list; /* list pages (in ranges) */
181static int opt_no_summary; /* don't show summary */
182static pid_t opt_pid; /* process to walk */
183
184#define MAX_ADDR_RANGES 1024
185static int nr_addr_ranges;
186static unsigned long opt_offset[MAX_ADDR_RANGES];
187static unsigned long opt_size[MAX_ADDR_RANGES];
188
189#define MAX_VMAS 10240
190static int nr_vmas;
191static unsigned long pg_start[MAX_VMAS];
192static unsigned long pg_end[MAX_VMAS];
193
194#define MAX_BIT_FILTERS 64
195static int nr_bit_filters;
196static uint64_t opt_mask[MAX_BIT_FILTERS];
197static uint64_t opt_bits[MAX_BIT_FILTERS];
198
199static int page_size;
200
201static int pagemap_fd;
202static int kpageflags_fd;
203
204static int opt_hwpoison;
205static int opt_unpoison;
206
207static char hwpoison_debug_fs[MAX_PATH+1];
208static int hwpoison_inject_fd;
209static int hwpoison_forget_fd;
210
211#define HASH_SHIFT 13
212#define HASH_SIZE (1 << HASH_SHIFT)
213#define HASH_MASK (HASH_SIZE - 1)
214#define HASH_KEY(flags) (flags & HASH_MASK)
215
216static unsigned long total_pages;
217static unsigned long nr_pages[HASH_SIZE];
218static uint64_t page_flags[HASH_SIZE];
219
220
221/*
222 * helper functions
223 */
224
225#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
226
227#define min_t(type, x, y) ({ \
228 type __min1 = (x); \
229 type __min2 = (y); \
230 __min1 < __min2 ? __min1 : __min2; })
231
232#define max_t(type, x, y) ({ \
233 type __max1 = (x); \
234 type __max2 = (y); \
235 __max1 > __max2 ? __max1 : __max2; })
236
237static unsigned long pages2mb(unsigned long pages)
238{
239 return (pages * page_size) >> 20;
240}
241
242static void fatal(const char *x, ...)
243{
244 va_list ap;
245
246 va_start(ap, x);
247 vfprintf(stderr, x, ap);
248 va_end(ap);
249 exit(EXIT_FAILURE);
250}
251
252static int checked_open(const char *pathname, int flags)
253{
254 int fd = open(pathname, flags);
255
256 if (fd < 0) {
257 perror(pathname);
258 exit(EXIT_FAILURE);
259 }
260
261 return fd;
262}
263
264/*
265 * pagemap/kpageflags routines
266 */
267
268static unsigned long do_u64_read(int fd, char *name,
269 uint64_t *buf,
270 unsigned long index,
271 unsigned long count)
272{
273 long bytes;
274
275 if (index > ULONG_MAX / 8)
276 fatal("index overflow: %lu\n", index);
277
278 if (lseek(fd, index * 8, SEEK_SET) < 0) {
279 perror(name);
280 exit(EXIT_FAILURE);
281 }
282
283 bytes = read(fd, buf, count * 8);
284 if (bytes < 0) {
285 perror(name);
286 exit(EXIT_FAILURE);
287 }
288 if (bytes % 8)
289 fatal("partial read: %lu bytes\n", bytes);
290
291 return bytes / 8;
292}
293
294static unsigned long kpageflags_read(uint64_t *buf,
295 unsigned long index,
296 unsigned long pages)
297{
298 return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages);
299}
300
301static unsigned long pagemap_read(uint64_t *buf,
302 unsigned long index,
303 unsigned long pages)
304{
305 return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages);
306}
307
308static unsigned long pagemap_pfn(uint64_t val)
309{
310 unsigned long pfn;
311
312 if (val & PM_PRESENT)
313 pfn = PM_PFRAME(val);
314 else
315 pfn = 0;
316
317 return pfn;
318}
319
320
321/*
322 * page flag names
323 */
324
325static char *page_flag_name(uint64_t flags)
326{
327 static char buf[65];
328 int present;
329 int i, j;
330
331 for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
332 present = (flags >> i) & 1;
333 if (!page_flag_names[i]) {
334 if (present)
335 fatal("unknown flag bit %d\n", i);
336 continue;
337 }
338 buf[j++] = present ? page_flag_names[i][0] : '_';
339 }
340
341 return buf;
342}
343
344static char *page_flag_longname(uint64_t flags)
345{
346 static char buf[1024];
347 int i, n;
348
349 for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) {
350 if (!page_flag_names[i])
351 continue;
352 if ((flags >> i) & 1)
353 n += snprintf(buf + n, sizeof(buf) - n, "%s,",
354 page_flag_names[i] + 2);
355 }
356 if (n)
357 n--;
358 buf[n] = '\0';
359
360 return buf;
361}
362
363
364/*
365 * page list and summary
366 */
367
368static void show_page_range(unsigned long voffset,
369 unsigned long offset, uint64_t flags)
370{
371 static uint64_t flags0;
372 static unsigned long voff;
373 static unsigned long index;
374 static unsigned long count;
375
376 if (flags == flags0 && offset == index + count &&
377 (!opt_pid || voffset == voff + count)) {
378 count++;
379 return;
380 }
381
382 if (count) {
383 if (opt_pid)
384 printf("%lx\t", voff);
385 printf("%lx\t%lx\t%s\n",
386 index, count, page_flag_name(flags0));
387 }
388
389 flags0 = flags;
390 index = offset;
391 voff = voffset;
392 count = 1;
393}
394
395static void show_page(unsigned long voffset,
396 unsigned long offset, uint64_t flags)
397{
398 if (opt_pid)
399 printf("%lx\t", voffset);
400 printf("%lx\t%s\n", offset, page_flag_name(flags));
401}
402
403static void show_summary(void)
404{
405 int i;
406
407 printf(" flags\tpage-count MB"
408 " symbolic-flags\t\t\tlong-symbolic-flags\n");
409
410 for (i = 0; i < ARRAY_SIZE(nr_pages); i++) {
411 if (nr_pages[i])
412 printf("0x%016llx\t%10lu %8lu %s\t%s\n",
413 (unsigned long long)page_flags[i],
414 nr_pages[i],
415 pages2mb(nr_pages[i]),
416 page_flag_name(page_flags[i]),
417 page_flag_longname(page_flags[i]));
418 }
419
420 printf(" total\t%10lu %8lu\n",
421 total_pages, pages2mb(total_pages));
422}
423
424
425/*
426 * page flag filters
427 */
428
429static int bit_mask_ok(uint64_t flags)
430{
431 int i;
432
433 for (i = 0; i < nr_bit_filters; i++) {
434 if (opt_bits[i] == KPF_ALL_BITS) {
435 if ((flags & opt_mask[i]) == 0)
436 return 0;
437 } else {
438 if ((flags & opt_mask[i]) != opt_bits[i])
439 return 0;
440 }
441 }
442
443 return 1;
444}
445
446static uint64_t expand_overloaded_flags(uint64_t flags)
447{
448 /* SLOB/SLUB overload several page flags */
449 if (flags & BIT(SLAB)) {
450 if (flags & BIT(PRIVATE))
451 flags ^= BIT(PRIVATE) | BIT(SLOB_FREE);
452 if (flags & BIT(ACTIVE))
453 flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN);
454 if (flags & BIT(ERROR))
455 flags ^= BIT(ERROR) | BIT(SLUB_DEBUG);
456 }
457
458 /* PG_reclaim is overloaded as PG_readahead in the read path */
459 if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM))
460 flags ^= BIT(RECLAIM) | BIT(READAHEAD);
461
462 return flags;
463}
464
465static uint64_t well_known_flags(uint64_t flags)
466{
467 /* hide flags intended only for kernel hacker */
468 flags &= ~KPF_HACKERS_BITS;
469
470 /* hide non-hugeTLB compound pages */
471 if ((flags & BITS_COMPOUND) && !(flags & BIT(HUGE)))
472 flags &= ~BITS_COMPOUND;
473
474 return flags;
475}
476
477static uint64_t kpageflags_flags(uint64_t flags)
478{
479 flags = expand_overloaded_flags(flags);
480
481 if (!opt_raw)
482 flags = well_known_flags(flags);
483
484 return flags;
485}
486
487/* verify that a mountpoint is actually a debugfs instance */
488static int debugfs_valid_mountpoint(const char *debugfs)
489{
490 struct statfs st_fs;
491
492 if (statfs(debugfs, &st_fs) < 0)
493 return -ENOENT;
494 else if (st_fs.f_type != (long) DEBUGFS_MAGIC)
495 return -ENOENT;
496
497 return 0;
498}
499
500/* find the path to the mounted debugfs */
501static const char *debugfs_find_mountpoint(void)
502{
503 const char **ptr;
504 char type[100];
505 FILE *fp;
506
507 ptr = debugfs_known_mountpoints;
508 while (*ptr) {
509 if (debugfs_valid_mountpoint(*ptr) == 0) {
510 strcpy(hwpoison_debug_fs, *ptr);
511 return hwpoison_debug_fs;
512 }
513 ptr++;
514 }
515
516 /* give up and parse /proc/mounts */
517 fp = fopen("/proc/mounts", "r");
518 if (fp == NULL)
519 perror("Can't open /proc/mounts for read");
520
521 while (fscanf(fp, "%*s %"
522 STR(MAX_PATH)
523 "s %99s %*s %*d %*d\n",
524 hwpoison_debug_fs, type) == 2) {
525 if (strcmp(type, "debugfs") == 0)
526 break;
527 }
528 fclose(fp);
529
530 if (strcmp(type, "debugfs") != 0)
531 return NULL;
532
533 return hwpoison_debug_fs;
534}
535
536/* mount the debugfs somewhere if it's not mounted */
537
538static void debugfs_mount(void)
539{
540 const char **ptr;
541
542 /* see if it's already mounted */
543 if (debugfs_find_mountpoint())
544 return;
545
546 ptr = debugfs_known_mountpoints;
547 while (*ptr) {
548 if (mount(NULL, *ptr, "debugfs", 0, NULL) == 0) {
549 /* save the mountpoint */
550 strcpy(hwpoison_debug_fs, *ptr);
551 break;
552 }
553 ptr++;
554 }
555
556 if (*ptr == NULL) {
557 perror("mount debugfs");
558 exit(EXIT_FAILURE);
559 }
560}
561
562/*
563 * page actions
564 */
565
566static void prepare_hwpoison_fd(void)
567{
568 char buf[MAX_PATH + 1];
569
570 debugfs_mount();
571
572 if (opt_hwpoison && !hwpoison_inject_fd) {
573 snprintf(buf, MAX_PATH, "%s/hwpoison/corrupt-pfn",
574 hwpoison_debug_fs);
575 hwpoison_inject_fd = checked_open(buf, O_WRONLY);
576 }
577
578 if (opt_unpoison && !hwpoison_forget_fd) {
579 snprintf(buf, MAX_PATH, "%s/hwpoison/unpoison-pfn",
580 hwpoison_debug_fs);
581 hwpoison_forget_fd = checked_open(buf, O_WRONLY);
582 }
583}
584
585static int hwpoison_page(unsigned long offset)
586{
587 char buf[100];
588 int len;
589
590 len = sprintf(buf, "0x%lx\n", offset);
591 len = write(hwpoison_inject_fd, buf, len);
592 if (len < 0) {
593 perror("hwpoison inject");
594 return len;
595 }
596 return 0;
597}
598
599static int unpoison_page(unsigned long offset)
600{
601 char buf[100];
602 int len;
603
604 len = sprintf(buf, "0x%lx\n", offset);
605 len = write(hwpoison_forget_fd, buf, len);
606 if (len < 0) {
607 perror("hwpoison forget");
608 return len;
609 }
610 return 0;
611}
612
613/*
614 * page frame walker
615 */
616
617static int hash_slot(uint64_t flags)
618{
619 int k = HASH_KEY(flags);
620 int i;
621
622 /* Explicitly reserve slot 0 for flags 0: the following logic
623 * cannot distinguish an unoccupied slot from slot (flags==0).
624 */
625 if (flags == 0)
626 return 0;
627
628 /* search through the remaining (HASH_SIZE-1) slots */
629 for (i = 1; i < ARRAY_SIZE(page_flags); i++, k++) {
630 if (!k || k >= ARRAY_SIZE(page_flags))
631 k = 1;
632 if (page_flags[k] == 0) {
633 page_flags[k] = flags;
634 return k;
635 }
636 if (page_flags[k] == flags)
637 return k;
638 }
639
640 fatal("hash table full: bump up HASH_SHIFT?\n");
641 exit(EXIT_FAILURE);
642}
643
644static void add_page(unsigned long voffset,
645 unsigned long offset, uint64_t flags)
646{
647 flags = kpageflags_flags(flags);
648
649 if (!bit_mask_ok(flags))
650 return;
651
652 if (opt_hwpoison)
653 hwpoison_page(offset);
654 if (opt_unpoison)
655 unpoison_page(offset);
656
657 if (opt_list == 1)
658 show_page_range(voffset, offset, flags);
659 else if (opt_list == 2)
660 show_page(voffset, offset, flags);
661
662 nr_pages[hash_slot(flags)]++;
663 total_pages++;
664}
665
666#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */
667static void walk_pfn(unsigned long voffset,
668 unsigned long index,
669 unsigned long count)
670{
671 uint64_t buf[KPAGEFLAGS_BATCH];
672 unsigned long batch;
673 long pages;
674 unsigned long i;
675
676 while (count) {
677 batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH);
678 pages = kpageflags_read(buf, index, batch);
679 if (pages == 0)
680 break;
681
682 for (i = 0; i < pages; i++)
683 add_page(voffset + i, index + i, buf[i]);
684
685 index += pages;
686 count -= pages;
687 }
688}
689
690#define PAGEMAP_BATCH (64 << 10)
691static void walk_vma(unsigned long index, unsigned long count)
692{
693 uint64_t buf[PAGEMAP_BATCH];
694 unsigned long batch;
695 unsigned long pages;
696 unsigned long pfn;
697 unsigned long i;
698
699 while (count) {
700 batch = min_t(unsigned long, count, PAGEMAP_BATCH);
701 pages = pagemap_read(buf, index, batch);
702 if (pages == 0)
703 break;
704
705 for (i = 0; i < pages; i++) {
706 pfn = pagemap_pfn(buf[i]);
707 if (pfn)
708 walk_pfn(index + i, pfn, 1);
709 }
710
711 index += pages;
712 count -= pages;
713 }
714}
715
716static void walk_task(unsigned long index, unsigned long count)
717{
718 const unsigned long end = index + count;
719 unsigned long start;
720 int i = 0;
721
722 while (index < end) {
723
724 while (pg_end[i] <= index)
725 if (++i >= nr_vmas)
726 return;
727 if (pg_start[i] >= end)
728 return;
729
730 start = max_t(unsigned long, pg_start[i], index);
731 index = min_t(unsigned long, pg_end[i], end);
732
733 assert(start < index);
734 walk_vma(start, index - start);
735 }
736}
737
738static void add_addr_range(unsigned long offset, unsigned long size)
739{
740 if (nr_addr_ranges >= MAX_ADDR_RANGES)
741 fatal("too many addr ranges\n");
742
743 opt_offset[nr_addr_ranges] = offset;
744 opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
745 nr_addr_ranges++;
746}
747
748static void walk_addr_ranges(void)
749{
750 int i;
751
752 kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
753
754 if (!nr_addr_ranges)
755 add_addr_range(0, ULONG_MAX);
756
757 for (i = 0; i < nr_addr_ranges; i++)
758 if (!opt_pid)
759 walk_pfn(0, opt_offset[i], opt_size[i]);
760 else
761 walk_task(opt_offset[i], opt_size[i]);
762
763 close(kpageflags_fd);
764}
765
766
767/*
768 * user interface
769 */
770
771static const char *page_flag_type(uint64_t flag)
772{
773 if (flag & KPF_HACKERS_BITS)
774 return "(r)";
775 if (flag & KPF_OVERLOADED_BITS)
776 return "(o)";
777 return " ";
778}
779
780static void usage(void)
781{
782 int i, j;
783
784 printf(
785"page-types [options]\n"
786" -r|--raw Raw mode, for kernel developers\n"
787" -d|--describe flags Describe flags\n"
788" -a|--addr addr-spec Walk a range of pages\n"
789" -b|--bits bits-spec Walk pages with specified bits\n"
790" -p|--pid pid Walk process address space\n"
791#if 0 /* planned features */
792" -f|--file filename Walk file address space\n"
793#endif
794" -l|--list Show page details in ranges\n"
795" -L|--list-each Show page details one by one\n"
796" -N|--no-summary Don't show summary info\n"
797" -X|--hwpoison hwpoison pages\n"
798" -x|--unpoison unpoison pages\n"
799" -h|--help Show this usage message\n"
800"flags:\n"
801" 0x10 bitfield format, e.g.\n"
802" anon bit-name, e.g.\n"
803" 0x10,anon comma-separated list, e.g.\n"
804"addr-spec:\n"
805" N one page at offset N (unit: pages)\n"
806" N+M pages range from N to N+M-1\n"
807" N,M pages range from N to M-1\n"
808" N, pages range from N to end\n"
809" ,M pages range from 0 to M-1\n"
810"bits-spec:\n"
811" bit1,bit2 (flags & (bit1|bit2)) != 0\n"
812" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n"
813" bit1,~bit2 (flags & (bit1|bit2)) == bit1\n"
814" =bit1,bit2 flags == (bit1|bit2)\n"
815"bit-names:\n"
816 );
817
818 for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
819 if (!page_flag_names[i])
820 continue;
821 printf("%16s%s", page_flag_names[i] + 2,
822 page_flag_type(1ULL << i));
823 if (++j > 3) {
824 j = 0;
825 putchar('\n');
826 }
827 }
828 printf("\n "
829 "(r) raw mode bits (o) overloaded bits\n");
830}
831
832static unsigned long long parse_number(const char *str)
833{
834 unsigned long long n;
835
836 n = strtoll(str, NULL, 0);
837
838 if (n == 0 && str[0] != '0')
839 fatal("invalid name or number: %s\n", str);
840
841 return n;
842}
843
844static void parse_pid(const char *str)
845{
846 FILE *file;
847 char buf[5000];
848
849 opt_pid = parse_number(str);
850
851 sprintf(buf, "/proc/%d/pagemap", opt_pid);
852 pagemap_fd = checked_open(buf, O_RDONLY);
853
854 sprintf(buf, "/proc/%d/maps", opt_pid);
855 file = fopen(buf, "r");
856 if (!file) {
857 perror(buf);
858 exit(EXIT_FAILURE);
859 }
860
861 while (fgets(buf, sizeof(buf), file) != NULL) {
862 unsigned long vm_start;
863 unsigned long vm_end;
864 unsigned long long pgoff;
865 int major, minor;
866 char r, w, x, s;
867 unsigned long ino;
868 int n;
869
870 n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
871 &vm_start,
872 &vm_end,
873 &r, &w, &x, &s,
874 &pgoff,
875 &major, &minor,
876 &ino);
877 if (n < 10) {
878 fprintf(stderr, "unexpected line: %s\n", buf);
879 continue;
880 }
881 pg_start[nr_vmas] = vm_start / page_size;
882 pg_end[nr_vmas] = vm_end / page_size;
883 if (++nr_vmas >= MAX_VMAS) {
884 fprintf(stderr, "too many VMAs\n");
885 break;
886 }
887 }
888 fclose(file);
889}
890
891static void parse_file(const char *name)
892{
893}
894
895static void parse_addr_range(const char *optarg)
896{
897 unsigned long offset;
898 unsigned long size;
899 char *p;
900
901 p = strchr(optarg, ',');
902 if (!p)
903 p = strchr(optarg, '+');
904
905 if (p == optarg) {
906 offset = 0;
907 size = parse_number(p + 1);
908 } else if (p) {
909 offset = parse_number(optarg);
910 if (p[1] == '\0')
911 size = ULONG_MAX;
912 else {
913 size = parse_number(p + 1);
914 if (*p == ',') {
915 if (size < offset)
916 fatal("invalid range: %lu,%lu\n",
917 offset, size);
918 size -= offset;
919 }
920 }
921 } else {
922 offset = parse_number(optarg);
923 size = 1;
924 }
925
926 add_addr_range(offset, size);
927}
928
929static void add_bits_filter(uint64_t mask, uint64_t bits)
930{
931 if (nr_bit_filters >= MAX_BIT_FILTERS)
932 fatal("too much bit filters\n");
933
934 opt_mask[nr_bit_filters] = mask;
935 opt_bits[nr_bit_filters] = bits;
936 nr_bit_filters++;
937}
938
939static uint64_t parse_flag_name(const char *str, int len)
940{
941 int i;
942
943 if (!*str || !len)
944 return 0;
945
946 if (len <= 8 && !strncmp(str, "compound", len))
947 return BITS_COMPOUND;
948
949 for (i = 0; i < ARRAY_SIZE(page_flag_names); i++) {
950 if (!page_flag_names[i])
951 continue;
952 if (!strncmp(str, page_flag_names[i] + 2, len))
953 return 1ULL << i;
954 }
955
956 return parse_number(str);
957}
958
959static uint64_t parse_flag_names(const char *str, int all)
960{
961 const char *p = str;
962 uint64_t flags = 0;
963
964 while (1) {
965 if (*p == ',' || *p == '=' || *p == '\0') {
966 if ((*str != '~') || (*str == '~' && all && *++str))
967 flags |= parse_flag_name(str, p - str);
968 if (*p != ',')
969 break;
970 str = p + 1;
971 }
972 p++;
973 }
974
975 return flags;
976}
977
978static void parse_bits_mask(const char *optarg)
979{
980 uint64_t mask;
981 uint64_t bits;
982 const char *p;
983
984 p = strchr(optarg, '=');
985 if (p == optarg) {
986 mask = KPF_ALL_BITS;
987 bits = parse_flag_names(p + 1, 0);
988 } else if (p) {
989 mask = parse_flag_names(optarg, 0);
990 bits = parse_flag_names(p + 1, 0);
991 } else if (strchr(optarg, '~')) {
992 mask = parse_flag_names(optarg, 1);
993 bits = parse_flag_names(optarg, 0);
994 } else {
995 mask = parse_flag_names(optarg, 0);
996 bits = KPF_ALL_BITS;
997 }
998
999 add_bits_filter(mask, bits);
1000}
1001
1002static void describe_flags(const char *optarg)
1003{
1004 uint64_t flags = parse_flag_names(optarg, 0);
1005
1006 printf("0x%016llx\t%s\t%s\n",
1007 (unsigned long long)flags,
1008 page_flag_name(flags),
1009 page_flag_longname(flags));
1010}
1011
1012static const struct option opts[] = {
1013 { "raw" , 0, NULL, 'r' },
1014 { "pid" , 1, NULL, 'p' },
1015 { "file" , 1, NULL, 'f' },
1016 { "addr" , 1, NULL, 'a' },
1017 { "bits" , 1, NULL, 'b' },
1018 { "describe" , 1, NULL, 'd' },
1019 { "list" , 0, NULL, 'l' },
1020 { "list-each" , 0, NULL, 'L' },
1021 { "no-summary", 0, NULL, 'N' },
1022 { "hwpoison" , 0, NULL, 'X' },
1023 { "unpoison" , 0, NULL, 'x' },
1024 { "help" , 0, NULL, 'h' },
1025 { NULL , 0, NULL, 0 }
1026};
1027
1028int main(int argc, char *argv[])
1029{
1030 int c;
1031
1032 page_size = getpagesize();
1033
1034 while ((c = getopt_long(argc, argv,
1035 "rp:f:a:b:d:lLNXxh", opts, NULL)) != -1) {
1036 switch (c) {
1037 case 'r':
1038 opt_raw = 1;
1039 break;
1040 case 'p':
1041 parse_pid(optarg);
1042 break;
1043 case 'f':
1044 parse_file(optarg);
1045 break;
1046 case 'a':
1047 parse_addr_range(optarg);
1048 break;
1049 case 'b':
1050 parse_bits_mask(optarg);
1051 break;
1052 case 'd':
1053 describe_flags(optarg);
1054 exit(0);
1055 case 'l':
1056 opt_list = 1;
1057 break;
1058 case 'L':
1059 opt_list = 2;
1060 break;
1061 case 'N':
1062 opt_no_summary = 1;
1063 break;
1064 case 'X':
1065 opt_hwpoison = 1;
1066 prepare_hwpoison_fd();
1067 break;
1068 case 'x':
1069 opt_unpoison = 1;
1070 prepare_hwpoison_fd();
1071 break;
1072 case 'h':
1073 usage();
1074 exit(0);
1075 default:
1076 usage();
1077 exit(1);
1078 }
1079 }
1080
1081 if (opt_list && opt_pid)
1082 printf("voffset\t");
1083 if (opt_list == 1)
1084 printf("offset\tlen\tflags\n");
1085 if (opt_list == 2)
1086 printf("offset\tflags\n");
1087
1088 walk_addr_ranges();
1089
1090 if (opt_list == 1)
1091 show_page_range(0, 0, 0); /* drain the buffer */
1092
1093 if (opt_no_summary)
1094 return 0;
1095
1096 if (opt_list)
1097 printf("\n\n");
1098
1099 show_summary();
1100
1101 return 0;
1102}