diff options
| -rw-r--r-- | Documentation/vm/page-types.c | 200 |
1 files changed, 180 insertions, 20 deletions
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index 3eda8ea00852..fa1a30d9e9d5 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> | 5 | * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> |
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #define _LARGEFILE64_SOURCE | ||
| 8 | #include <stdio.h> | 9 | #include <stdio.h> |
| 9 | #include <stdlib.h> | 10 | #include <stdlib.h> |
| 10 | #include <unistd.h> | 11 | #include <unistd.h> |
| @@ -13,12 +14,33 @@ | |||
| 13 | #include <string.h> | 14 | #include <string.h> |
| 14 | #include <getopt.h> | 15 | #include <getopt.h> |
| 15 | #include <limits.h> | 16 | #include <limits.h> |
| 17 | #include <assert.h> | ||
| 16 | #include <sys/types.h> | 18 | #include <sys/types.h> |
| 17 | #include <sys/errno.h> | 19 | #include <sys/errno.h> |
| 18 | #include <sys/fcntl.h> | 20 | #include <sys/fcntl.h> |
| 19 | 21 | ||
| 20 | 22 | ||
| 21 | /* | 23 | /* |
| 24 | * pagemap kernel ABI bits | ||
| 25 | */ | ||
| 26 | |||
| 27 | #define PM_ENTRY_BYTES sizeof(uint64_t) | ||
| 28 | #define PM_STATUS_BITS 3 | ||
| 29 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) | ||
| 30 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) | ||
| 31 | #define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) | ||
| 32 | #define PM_PSHIFT_BITS 6 | ||
| 33 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) | ||
| 34 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) | ||
| 35 | #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) | ||
| 36 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) | ||
| 37 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) | ||
| 38 | |||
| 39 | #define PM_PRESENT PM_STATUS(4LL) | ||
| 40 | #define PM_SWAP PM_STATUS(2LL) | ||
| 41 | |||
| 42 | |||
| 43 | /* | ||
| 22 | * kernel page flags | 44 | * kernel page flags |
| 23 | */ | 45 | */ |
| 24 | 46 | ||
| @@ -126,6 +148,14 @@ static int nr_addr_ranges; | |||
| 126 | static unsigned long opt_offset[MAX_ADDR_RANGES]; | 148 | static unsigned long opt_offset[MAX_ADDR_RANGES]; |
| 127 | static unsigned long opt_size[MAX_ADDR_RANGES]; | 149 | static unsigned long opt_size[MAX_ADDR_RANGES]; |
| 128 | 150 | ||
| 151 | #define MAX_VMAS 10240 | ||
| 152 | static int nr_vmas; | ||
| 153 | static unsigned long pg_start[MAX_VMAS]; | ||
| 154 | static unsigned long pg_end[MAX_VMAS]; | ||
| 155 | static unsigned long voffset; | ||
| 156 | |||
| 157 | static int pagemap_fd; | ||
| 158 | |||
| 129 | #define MAX_BIT_FILTERS 64 | 159 | #define MAX_BIT_FILTERS 64 |
| 130 | static int nr_bit_filters; | 160 | static int nr_bit_filters; |
| 131 | static uint64_t opt_mask[MAX_BIT_FILTERS]; | 161 | static uint64_t opt_mask[MAX_BIT_FILTERS]; |
| @@ -135,7 +165,6 @@ static int page_size; | |||
| 135 | 165 | ||
| 136 | #define PAGES_BATCH (64 << 10) /* 64k pages */ | 166 | #define PAGES_BATCH (64 << 10) /* 64k pages */ |
| 137 | static int kpageflags_fd; | 167 | static int kpageflags_fd; |
| 138 | static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; | ||
| 139 | 168 | ||
| 140 | #define HASH_SHIFT 13 | 169 | #define HASH_SHIFT 13 |
| 141 | #define HASH_SIZE (1 << HASH_SHIFT) | 170 | #define HASH_SIZE (1 << HASH_SHIFT) |
| @@ -158,6 +187,11 @@ static uint64_t page_flags[HASH_SIZE]; | |||
| 158 | type __min2 = (y); \ | 187 | type __min2 = (y); \ |
| 159 | __min1 < __min2 ? __min1 : __min2; }) | 188 | __min1 < __min2 ? __min1 : __min2; }) |
| 160 | 189 | ||
| 190 | #define max_t(type, x, y) ({ \ | ||
| 191 | type __max1 = (x); \ | ||
| 192 | type __max2 = (y); \ | ||
| 193 | __max1 > __max2 ? __max1 : __max2; }) | ||
| 194 | |||
| 161 | static unsigned long pages2mb(unsigned long pages) | 195 | static unsigned long pages2mb(unsigned long pages) |
| 162 | { | 196 | { |
| 163 | return (pages * page_size) >> 20; | 197 | return (pages * page_size) >> 20; |
| @@ -224,26 +258,34 @@ static char *page_flag_longname(uint64_t flags) | |||
| 224 | static void show_page_range(unsigned long offset, uint64_t flags) | 258 | static void show_page_range(unsigned long offset, uint64_t flags) |
| 225 | { | 259 | { |
| 226 | static uint64_t flags0; | 260 | static uint64_t flags0; |
| 261 | static unsigned long voff; | ||
| 227 | static unsigned long index; | 262 | static unsigned long index; |
| 228 | static unsigned long count; | 263 | static unsigned long count; |
| 229 | 264 | ||
| 230 | if (flags == flags0 && offset == index + count) { | 265 | if (flags == flags0 && offset == index + count && |
| 266 | (!opt_pid || voffset == voff + count)) { | ||
| 231 | count++; | 267 | count++; |
| 232 | return; | 268 | return; |
| 233 | } | 269 | } |
| 234 | 270 | ||
| 235 | if (count) | 271 | if (count) { |
| 236 | printf("%lu\t%lu\t%s\n", | 272 | if (opt_pid) |
| 273 | printf("%lx\t", voff); | ||
| 274 | printf("%lx\t%lx\t%s\n", | ||
| 237 | index, count, page_flag_name(flags0)); | 275 | index, count, page_flag_name(flags0)); |
| 276 | } | ||
| 238 | 277 | ||
| 239 | flags0 = flags; | 278 | flags0 = flags; |
| 240 | index = offset; | 279 | index = offset; |
| 280 | voff = voffset; | ||
| 241 | count = 1; | 281 | count = 1; |
| 242 | } | 282 | } |
| 243 | 283 | ||
| 244 | static void show_page(unsigned long offset, uint64_t flags) | 284 | static void show_page(unsigned long offset, uint64_t flags) |
| 245 | { | 285 | { |
| 246 | printf("%lu\t%s\n", offset, page_flag_name(flags)); | 286 | if (opt_pid) |
| 287 | printf("%lx\t", voffset); | ||
| 288 | printf("%lx\t%s\n", offset, page_flag_name(flags)); | ||
| 247 | } | 289 | } |
| 248 | 290 | ||
| 249 | static void show_summary(void) | 291 | static void show_summary(void) |
| @@ -383,6 +425,8 @@ static void walk_pfn(unsigned long index, unsigned long count) | |||
| 383 | lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); | 425 | lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); |
| 384 | 426 | ||
| 385 | while (count) { | 427 | while (count) { |
| 428 | uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; | ||
| 429 | |||
| 386 | batch = min_t(unsigned long, count, PAGES_BATCH); | 430 | batch = min_t(unsigned long, count, PAGES_BATCH); |
| 387 | n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); | 431 | n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); |
| 388 | if (n == 0) | 432 | if (n == 0) |
| @@ -404,6 +448,81 @@ static void walk_pfn(unsigned long index, unsigned long count) | |||
| 404 | } | 448 | } |
| 405 | } | 449 | } |
| 406 | 450 | ||
| 451 | |||
| 452 | #define PAGEMAP_BATCH 4096 | ||
| 453 | static unsigned long task_pfn(unsigned long pgoff) | ||
| 454 | { | ||
| 455 | static uint64_t buf[PAGEMAP_BATCH]; | ||
| 456 | static unsigned long start; | ||
| 457 | static long count; | ||
| 458 | uint64_t pfn; | ||
| 459 | |||
| 460 | if (pgoff < start || pgoff >= start + count) { | ||
| 461 | if (lseek64(pagemap_fd, | ||
| 462 | (uint64_t)pgoff * PM_ENTRY_BYTES, | ||
| 463 | SEEK_SET) < 0) { | ||
| 464 | perror("pagemap seek"); | ||
| 465 | exit(EXIT_FAILURE); | ||
| 466 | } | ||
| 467 | count = read(pagemap_fd, buf, sizeof(buf)); | ||
| 468 | if (count == 0) | ||
| 469 | return 0; | ||
| 470 | if (count < 0) { | ||
| 471 | perror("pagemap read"); | ||
| 472 | exit(EXIT_FAILURE); | ||
| 473 | } | ||
| 474 | if (count % PM_ENTRY_BYTES) { | ||
| 475 | fatal("pagemap read not aligned.\n"); | ||
| 476 | exit(EXIT_FAILURE); | ||
| 477 | } | ||
| 478 | count /= PM_ENTRY_BYTES; | ||
| 479 | start = pgoff; | ||
| 480 | } | ||
| 481 | |||
| 482 | pfn = buf[pgoff - start]; | ||
| 483 | if (pfn & PM_PRESENT) | ||
| 484 | pfn = PM_PFRAME(pfn); | ||
| 485 | else | ||
| 486 | pfn = 0; | ||
| 487 | |||
| 488 | return pfn; | ||
| 489 | } | ||
| 490 | |||
| 491 | static void walk_task(unsigned long index, unsigned long count) | ||
| 492 | { | ||
| 493 | int i = 0; | ||
| 494 | const unsigned long end = index + count; | ||
| 495 | |||
| 496 | while (index < end) { | ||
| 497 | |||
| 498 | while (pg_end[i] <= index) | ||
| 499 | if (++i >= nr_vmas) | ||
| 500 | return; | ||
| 501 | if (pg_start[i] >= end) | ||
| 502 | return; | ||
| 503 | |||
| 504 | voffset = max_t(unsigned long, pg_start[i], index); | ||
| 505 | index = min_t(unsigned long, pg_end[i], end); | ||
| 506 | |||
| 507 | assert(voffset < index); | ||
| 508 | for (; voffset < index; voffset++) { | ||
| 509 | unsigned long pfn = task_pfn(voffset); | ||
| 510 | if (pfn) | ||
| 511 | walk_pfn(pfn, 1); | ||
| 512 | } | ||
| 513 | } | ||
| 514 | } | ||
| 515 | |||
| 516 | static void add_addr_range(unsigned long offset, unsigned long size) | ||
| 517 | { | ||
| 518 | if (nr_addr_ranges >= MAX_ADDR_RANGES) | ||
| 519 | fatal("too many addr ranges\n"); | ||
| 520 | |||
| 521 | opt_offset[nr_addr_ranges] = offset; | ||
| 522 | opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset); | ||
| 523 | nr_addr_ranges++; | ||
| 524 | } | ||
| 525 | |||
| 407 | static void walk_addr_ranges(void) | 526 | static void walk_addr_ranges(void) |
| 408 | { | 527 | { |
| 409 | int i; | 528 | int i; |
| @@ -415,10 +534,13 @@ static void walk_addr_ranges(void) | |||
| 415 | } | 534 | } |
| 416 | 535 | ||
| 417 | if (!nr_addr_ranges) | 536 | if (!nr_addr_ranges) |
| 418 | walk_pfn(0, ULONG_MAX); | 537 | add_addr_range(0, ULONG_MAX); |
| 419 | 538 | ||
| 420 | for (i = 0; i < nr_addr_ranges; i++) | 539 | for (i = 0; i < nr_addr_ranges; i++) |
| 421 | walk_pfn(opt_offset[i], opt_size[i]); | 540 | if (!opt_pid) |
| 541 | walk_pfn(opt_offset[i], opt_size[i]); | ||
| 542 | else | ||
| 543 | walk_task(opt_offset[i], opt_size[i]); | ||
| 422 | 544 | ||
| 423 | close(kpageflags_fd); | 545 | close(kpageflags_fd); |
| 424 | } | 546 | } |
| @@ -446,8 +568,8 @@ static void usage(void) | |||
| 446 | " -r|--raw Raw mode, for kernel developers\n" | 568 | " -r|--raw Raw mode, for kernel developers\n" |
| 447 | " -a|--addr addr-spec Walk a range of pages\n" | 569 | " -a|--addr addr-spec Walk a range of pages\n" |
| 448 | " -b|--bits bits-spec Walk pages with specified bits\n" | 570 | " -b|--bits bits-spec Walk pages with specified bits\n" |
| 449 | #if 0 /* planned features */ | ||
| 450 | " -p|--pid pid Walk process address space\n" | 571 | " -p|--pid pid Walk process address space\n" |
| 572 | #if 0 /* planned features */ | ||
| 451 | " -f|--file filename Walk file address space\n" | 573 | " -f|--file filename Walk file address space\n" |
| 452 | #endif | 574 | #endif |
| 453 | " -l|--list Show page details in ranges\n" | 575 | " -l|--list Show page details in ranges\n" |
| @@ -459,7 +581,7 @@ static void usage(void) | |||
| 459 | " N+M pages range from N to N+M-1\n" | 581 | " N+M pages range from N to N+M-1\n" |
| 460 | " N,M pages range from N to M-1\n" | 582 | " N,M pages range from N to M-1\n" |
| 461 | " N, pages range from N to end\n" | 583 | " N, pages range from N to end\n" |
| 462 | " ,M pages range from 0 to M\n" | 584 | " ,M pages range from 0 to M-1\n" |
| 463 | "bits-spec:\n" | 585 | "bits-spec:\n" |
| 464 | " bit1,bit2 (flags & (bit1|bit2)) != 0\n" | 586 | " bit1,bit2 (flags & (bit1|bit2)) != 0\n" |
| 465 | " bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" | 587 | " bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" |
| @@ -496,21 +618,57 @@ static unsigned long long parse_number(const char *str) | |||
| 496 | 618 | ||
| 497 | static void parse_pid(const char *str) | 619 | static void parse_pid(const char *str) |
| 498 | { | 620 | { |
| 621 | FILE *file; | ||
| 622 | char buf[5000]; | ||
| 623 | |||
| 499 | opt_pid = parse_number(str); | 624 | opt_pid = parse_number(str); |
| 500 | } | ||
| 501 | 625 | ||
| 502 | static void parse_file(const char *name) | 626 | sprintf(buf, "/proc/%d/pagemap", opt_pid); |
| 503 | { | 627 | pagemap_fd = open(buf, O_RDONLY); |
| 628 | if (pagemap_fd < 0) { | ||
| 629 | perror(buf); | ||
| 630 | exit(EXIT_FAILURE); | ||
| 631 | } | ||
| 632 | |||
| 633 | sprintf(buf, "/proc/%d/maps", opt_pid); | ||
| 634 | file = fopen(buf, "r"); | ||
| 635 | if (!file) { | ||
| 636 | perror(buf); | ||
| 637 | exit(EXIT_FAILURE); | ||
| 638 | } | ||
| 639 | |||
| 640 | while (fgets(buf, sizeof(buf), file) != NULL) { | ||
| 641 | unsigned long vm_start; | ||
| 642 | unsigned long vm_end; | ||
| 643 | unsigned long long pgoff; | ||
| 644 | int major, minor; | ||
| 645 | char r, w, x, s; | ||
| 646 | unsigned long ino; | ||
| 647 | int n; | ||
| 648 | |||
| 649 | n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu", | ||
| 650 | &vm_start, | ||
| 651 | &vm_end, | ||
| 652 | &r, &w, &x, &s, | ||
| 653 | &pgoff, | ||
| 654 | &major, &minor, | ||
| 655 | &ino); | ||
| 656 | if (n < 10) { | ||
| 657 | fprintf(stderr, "unexpected line: %s\n", buf); | ||
| 658 | continue; | ||
| 659 | } | ||
| 660 | pg_start[nr_vmas] = vm_start / page_size; | ||
| 661 | pg_end[nr_vmas] = vm_end / page_size; | ||
| 662 | if (++nr_vmas >= MAX_VMAS) { | ||
| 663 | fprintf(stderr, "too many VMAs\n"); | ||
| 664 | break; | ||
| 665 | } | ||
| 666 | } | ||
| 667 | fclose(file); | ||
| 504 | } | 668 | } |
| 505 | 669 | ||
| 506 | static void add_addr_range(unsigned long offset, unsigned long size) | 670 | static void parse_file(const char *name) |
| 507 | { | 671 | { |
| 508 | if (nr_addr_ranges >= MAX_ADDR_RANGES) | ||
| 509 | fatal("too much addr ranges\n"); | ||
| 510 | |||
| 511 | opt_offset[nr_addr_ranges] = offset; | ||
| 512 | opt_size[nr_addr_ranges] = size; | ||
| 513 | nr_addr_ranges++; | ||
| 514 | } | 672 | } |
| 515 | 673 | ||
| 516 | static void parse_addr_range(const char *optarg) | 674 | static void parse_addr_range(const char *optarg) |
| @@ -676,8 +834,10 @@ int main(int argc, char *argv[]) | |||
| 676 | } | 834 | } |
| 677 | } | 835 | } |
| 678 | 836 | ||
| 837 | if (opt_list && opt_pid) | ||
| 838 | printf("voffset\t"); | ||
| 679 | if (opt_list == 1) | 839 | if (opt_list == 1) |
| 680 | printf("offset\tcount\tflags\n"); | 840 | printf("offset\tlen\tflags\n"); |
| 681 | if (opt_list == 2) | 841 | if (opt_list == 2) |
| 682 | printf("offset\tflags\n"); | 842 | printf("offset\tflags\n"); |
| 683 | 843 | ||
