aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorKonstantin Khlebnikov <koct9i@gmail.com>2014-04-07 18:37:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 19:35:52 -0400
commit65a6a4105f84f961fb219f5acaf05203f7114cf9 (patch)
tree924e49701188c9cf3c143ad39f99f4a982097a01 /tools
parent9164550ecd15253d72b5fe3b4baa9505c4b6fa1f (diff)
tools/vm/page-types.c: page-cache sniffing feature
After this patch 'page-types' can walk over a file's mappings and analyze populated page cache pages mostly without disturbing its state. It maps chunk of file, marks VMA as MADV_RANDOM to turn off readahead, pokes VMA via mincore() to determine cached pages, triggers page-fault only for them, and finally gathers information via pagemap/kpageflags. Before unmap it marks VMA as MADV_SEQUENTIAL for ignoring reference bits. usage: page-types -f <path> If <path> is directory it will analyse all files in all subdirectories. Symlinks are not followed as well as mount points. Hardlinks aren't handled, they'll be dumped as many times as they are found. Recursive walk brings all dentries into dcache and populates page cache of block-devices aka 'Buffers'. Probably it's worth to add ioctl for dumping file page cache as array of PFNs as a replacement for this hackish juggling with mmap/madvise/mincore/pagemap. Also recursive walk could be replaced with dumping cached inodes via some ioctl or debugfs interface followed by openning them via open_by_handle_at, this would fix hardlinks handling and unneeded population of dcache and buffers. This interface might be used as data source for constructing readahead plans and for background optimizations of actively used files. collateral changes: + fix 64-bit LFS: define _FILE_OFFSET_BITS instead of _LARGEFILE64_SOURCE + replace lseek + read with single pread + make show_page_range() reusable after flush usage example: ~/src/linux/tools/vm$ sudo ./page-types -L -f page-types foffset offset flags page-types Inode: 2229277 Size: 89065 (22 pages) Modify: Tue Feb 25 12:00:59 2014 (162 seconds ago) Access: Tue Feb 25 12:01:00 2014 (161 seconds ago) 0 3cbf3b __RU_lA____M________________________ 1 38946a __RU_lA____M________________________ 2 1a3cec __RU_lA____M________________________ 3 1a8321 __RU_lA____M________________________ 4 3af7cc __RU_lA____M________________________ 5 1ed532 __RU_lA_____________________________ 6 2e436a __RU_lA_____________________________ 7 29a35e ___U_lA_____________________________ 8 2de86e ___U_lA_____________________________ 9 3bdfb4 ___U_lA_____________________________ 10 3cd8a3 ___U_lA_____________________________ 11 2afa50 ___U_lA_____________________________ 12 2534c2 ___U_lA_____________________________ 13 1b7a40 ___U_lA_____________________________ 14 17b0be ___U_lA_____________________________ 15 392b0c ___U_lA_____________________________ 16 3ba46a __RU_lA_____________________________ 17 397dc8 ___U_lA_____________________________ 18 1f2a36 ___U_lA_____________________________ 19 21fd30 __RU_lA_____________________________ 20 2c35ba __RU_l______________________________ 21 20f181 __RU_l______________________________ flags page-count MB symbolic-flags long-symbolic-flags 0x000000000000002c 2 0 __RU_l______________________________ referenced,uptodate,lru 0x0000000000000068 11 0 ___U_lA_____________________________ uptodate,lru,active 0x000000000000006c 4 0 __RU_lA_____________________________ referenced,uptodate,lru,active 0x000000000000086c 5 0 __RU_lA____M________________________ referenced,uptodate,lru,active,mmap total 22 0 ~/src/linux/tools/vm$ sudo ./page-types -f / flags page-count MB symbolic-flags long-symbolic-flags 0x0000000000000028 21761 85 ___U_l______________________________ uptodate,lru 0x000000000000002c 127279 497 __RU_l______________________________ referenced,uptodate,lru 0x0000000000000068 74160 289 ___U_lA_____________________________ uptodate,lru,active 0x000000000000006c 84469 329 __RU_lA_____________________________ referenced,uptodate,lru,active 0x000000000000007c 1 0 __RUDlA_____________________________ referenced,uptodate,dirty,lru,active 0x0000000000000228 370 1 ___U_l___I__________________________ uptodate,lru,reclaim 0x0000000000000828 49 0 ___U_l_____M________________________ uptodate,lru,mmap 0x000000000000082c 126 0 __RU_l_____M________________________ referenced,uptodate,lru,mmap 0x0000000000000868 137 0 ___U_lA____M________________________ uptodate,lru,active,mmap 0x000000000000086c 12890 50 __RU_lA____M________________________ referenced,uptodate,lru,active,mmap total 321242 1254 Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Borislav Petkov <bp@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/vm/page-types.c170
1 files changed, 152 insertions, 18 deletions
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index f9be24d9efac..05654f5e48d5 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -19,7 +19,8 @@
19 * Authors: Wu Fengguang <fengguang.wu@intel.com> 19 * Authors: Wu Fengguang <fengguang.wu@intel.com>
20 */ 20 */
21 21
22#define _LARGEFILE64_SOURCE 22#define _FILE_OFFSET_BITS 64
23#define _GNU_SOURCE
23#include <stdio.h> 24#include <stdio.h>
24#include <stdlib.h> 25#include <stdlib.h>
25#include <unistd.h> 26#include <unistd.h>
@@ -29,11 +30,14 @@
29#include <getopt.h> 30#include <getopt.h>
30#include <limits.h> 31#include <limits.h>
31#include <assert.h> 32#include <assert.h>
33#include <ftw.h>
34#include <time.h>
32#include <sys/types.h> 35#include <sys/types.h>
33#include <sys/errno.h> 36#include <sys/errno.h>
34#include <sys/fcntl.h> 37#include <sys/fcntl.h>
35#include <sys/mount.h> 38#include <sys/mount.h>
36#include <sys/statfs.h> 39#include <sys/statfs.h>
40#include <sys/mman.h>
37#include "../../include/uapi/linux/magic.h" 41#include "../../include/uapi/linux/magic.h"
38#include "../../include/uapi/linux/kernel-page-flags.h" 42#include "../../include/uapi/linux/kernel-page-flags.h"
39#include <api/fs/debugfs.h> 43#include <api/fs/debugfs.h>
@@ -158,6 +162,7 @@ static int opt_raw; /* for kernel developers */
158static int opt_list; /* list pages (in ranges) */ 162static int opt_list; /* list pages (in ranges) */
159static int opt_no_summary; /* don't show summary */ 163static int opt_no_summary; /* don't show summary */
160static pid_t opt_pid; /* process to walk */ 164static pid_t opt_pid; /* process to walk */
165const char * opt_file;
161 166
162#define MAX_ADDR_RANGES 1024 167#define MAX_ADDR_RANGES 1024
163static int nr_addr_ranges; 168static int nr_addr_ranges;
@@ -253,12 +258,7 @@ static unsigned long do_u64_read(int fd, char *name,
253 if (index > ULONG_MAX / 8) 258 if (index > ULONG_MAX / 8)
254 fatal("index overflow: %lu\n", index); 259 fatal("index overflow: %lu\n", index);
255 260
256 if (lseek(fd, index * 8, SEEK_SET) < 0) { 261 bytes = pread(fd, buf, count * 8, (off_t)index * 8);
257 perror(name);
258 exit(EXIT_FAILURE);
259 }
260
261 bytes = read(fd, buf, count * 8);
262 if (bytes < 0) { 262 if (bytes < 0) {
263 perror(name); 263 perror(name);
264 exit(EXIT_FAILURE); 264 exit(EXIT_FAILURE);
@@ -343,8 +343,8 @@ static char *page_flag_longname(uint64_t flags)
343 * page list and summary 343 * page list and summary
344 */ 344 */
345 345
346static void show_page_range(unsigned long voffset, 346static void show_page_range(unsigned long voffset, unsigned long offset,
347 unsigned long offset, uint64_t flags) 347 unsigned long size, uint64_t flags)
348{ 348{
349 static uint64_t flags0; 349 static uint64_t flags0;
350 static unsigned long voff; 350 static unsigned long voff;
@@ -352,14 +352,16 @@ static void show_page_range(unsigned long voffset,
352 static unsigned long count; 352 static unsigned long count;
353 353
354 if (flags == flags0 && offset == index + count && 354 if (flags == flags0 && offset == index + count &&
355 (!opt_pid || voffset == voff + count)) { 355 size && voffset == voff + count) {
356 count++; 356 count += size;
357 return; 357 return;
358 } 358 }
359 359
360 if (count) { 360 if (count) {
361 if (opt_pid) 361 if (opt_pid)
362 printf("%lx\t", voff); 362 printf("%lx\t", voff);
363 if (opt_file)
364 printf("%lu\t", voff);
363 printf("%lx\t%lx\t%s\n", 365 printf("%lx\t%lx\t%s\n",
364 index, count, page_flag_name(flags0)); 366 index, count, page_flag_name(flags0));
365 } 367 }
@@ -367,7 +369,12 @@ static void show_page_range(unsigned long voffset,
367 flags0 = flags; 369 flags0 = flags;
368 index = offset; 370 index = offset;
369 voff = voffset; 371 voff = voffset;
370 count = 1; 372 count = size;
373}
374
375static void flush_page_range(void)
376{
377 show_page_range(0, 0, 0, 0);
371} 378}
372 379
373static void show_page(unsigned long voffset, 380static void show_page(unsigned long voffset,
@@ -375,6 +382,8 @@ static void show_page(unsigned long voffset,
375{ 382{
376 if (opt_pid) 383 if (opt_pid)
377 printf("%lx\t", voffset); 384 printf("%lx\t", voffset);
385 if (opt_file)
386 printf("%lu\t", voffset);
378 printf("%lx\t%s\n", offset, page_flag_name(flags)); 387 printf("%lx\t%s\n", offset, page_flag_name(flags));
379} 388}
380 389
@@ -565,7 +574,7 @@ static void add_page(unsigned long voffset,
565 unpoison_page(offset); 574 unpoison_page(offset);
566 575
567 if (opt_list == 1) 576 if (opt_list == 1)
568 show_page_range(voffset, offset, flags); 577 show_page_range(voffset, offset, 1, flags);
569 else if (opt_list == 2) 578 else if (opt_list == 2)
570 show_page(voffset, offset, flags); 579 show_page(voffset, offset, flags);
571 580
@@ -667,7 +676,7 @@ static void walk_addr_ranges(void)
667 676
668 for (i = 0; i < nr_addr_ranges; i++) 677 for (i = 0; i < nr_addr_ranges; i++)
669 if (!opt_pid) 678 if (!opt_pid)
670 walk_pfn(0, opt_offset[i], opt_size[i], 0); 679 walk_pfn(opt_offset[i], opt_offset[i], opt_size[i], 0);
671 else 680 else
672 walk_task(opt_offset[i], opt_size[i]); 681 walk_task(opt_offset[i], opt_size[i]);
673 682
@@ -699,9 +708,7 @@ static void usage(void)
699" -a|--addr addr-spec Walk a range of pages\n" 708" -a|--addr addr-spec Walk a range of pages\n"
700" -b|--bits bits-spec Walk pages with specified bits\n" 709" -b|--bits bits-spec Walk pages with specified bits\n"
701" -p|--pid pid Walk process address space\n" 710" -p|--pid pid Walk process address space\n"
702#if 0 /* planned features */
703" -f|--file filename Walk file address space\n" 711" -f|--file filename Walk file address space\n"
704#endif
705" -l|--list Show page details in ranges\n" 712" -l|--list Show page details in ranges\n"
706" -L|--list-each Show page details one by one\n" 713" -L|--list-each Show page details one by one\n"
707" -N|--no-summary Don't show summary info\n" 714" -N|--no-summary Don't show summary info\n"
@@ -799,8 +806,130 @@ static void parse_pid(const char *str)
799 fclose(file); 806 fclose(file);
800} 807}
801 808
809static void show_file(const char *name, const struct stat *st)
810{
811 unsigned long long size = st->st_size;
812 char atime[64], mtime[64];
813 long now = time(NULL);
814
815 printf("%s\tInode: %u\tSize: %llu (%llu pages)\n",
816 name, (unsigned)st->st_ino,
817 size, (size + page_size - 1) / page_size);
818
819 strftime(atime, sizeof(atime), "%c", localtime(&st->st_atime));
820 strftime(mtime, sizeof(mtime), "%c", localtime(&st->st_mtime));
821
822 printf("Modify: %s (%ld seconds ago)\nAccess: %s (%ld seconds ago)\n",
823 mtime, now - st->st_mtime,
824 atime, now - st->st_atime);
825}
826
827static void walk_file(const char *name, const struct stat *st)
828{
829 uint8_t vec[PAGEMAP_BATCH];
830 uint64_t buf[PAGEMAP_BATCH], flags;
831 unsigned long nr_pages, pfn, i;
832 int fd;
833 off_t off;
834 ssize_t len;
835 void *ptr;
836 int first = 1;
837
838 fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW);
839
840 for (off = 0; off < st->st_size; off += len) {
841 nr_pages = (st->st_size - off + page_size - 1) / page_size;
842 if (nr_pages > PAGEMAP_BATCH)
843 nr_pages = PAGEMAP_BATCH;
844 len = nr_pages * page_size;
845
846 ptr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, off);
847 if (ptr == MAP_FAILED)
848 fatal("mmap failed: %s", name);
849
850 /* determine cached pages */
851 if (mincore(ptr, len, vec))
852 fatal("mincore failed: %s", name);
853
854 /* turn off readahead */
855 if (madvise(ptr, len, MADV_RANDOM))
856 fatal("madvice failed: %s", name);
857
858 /* populate ptes */
859 for (i = 0; i < nr_pages ; i++) {
860 if (vec[i] & 1)
861 (void)*(volatile int *)(ptr + i * page_size);
862 }
863
864 /* turn off harvesting reference bits */
865 if (madvise(ptr, len, MADV_SEQUENTIAL))
866 fatal("madvice failed: %s", name);
867
868 if (pagemap_read(buf, (unsigned long)ptr / page_size,
869 nr_pages) != nr_pages)
870 fatal("cannot read pagemap");
871
872 munmap(ptr, len);
873
874 for (i = 0; i < nr_pages; i++) {
875 pfn = pagemap_pfn(buf[i]);
876 if (!pfn)
877 continue;
878 if (!kpageflags_read(&flags, pfn, 1))
879 continue;
880 if (first && opt_list) {
881 first = 0;
882 flush_page_range();
883 show_file(name, st);
884 }
885 add_page(off / page_size + i, pfn, flags, buf[i]);
886 }
887 }
888
889 close(fd);
890}
891
892int walk_tree(const char *name, const struct stat *st, int type, struct FTW *f)
893{
894 (void)f;
895 switch (type) {
896 case FTW_F:
897 if (S_ISREG(st->st_mode))
898 walk_file(name, st);
899 break;
900 case FTW_DNR:
901 fprintf(stderr, "cannot read dir: %s\n", name);
902 break;
903 }
904 return 0;
905}
906
907static void walk_page_cache(void)
908{
909 struct stat st;
910
911 kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
912 pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY);
913
914 if (stat(opt_file, &st))
915 fatal("stat failed: %s\n", opt_file);
916
917 if (S_ISREG(st.st_mode)) {
918 walk_file(opt_file, &st);
919 } else if (S_ISDIR(st.st_mode)) {
920 /* do not follow symlinks and mountpoints */
921 if (nftw(opt_file, walk_tree, 64, FTW_MOUNT | FTW_PHYS) < 0)
922 fatal("nftw failed: %s\n", opt_file);
923 } else
924 fatal("unhandled file type: %s\n", opt_file);
925
926 close(kpageflags_fd);
927 close(pagemap_fd);
928}
929
802static void parse_file(const char *name) 930static void parse_file(const char *name)
803{ 931{
932 opt_file = name;
804} 933}
805 934
806static void parse_addr_range(const char *optarg) 935static void parse_addr_range(const char *optarg)
@@ -991,15 +1120,20 @@ int main(int argc, char *argv[])
991 1120
992 if (opt_list && opt_pid) 1121 if (opt_list && opt_pid)
993 printf("voffset\t"); 1122 printf("voffset\t");
1123 if (opt_list && opt_file)
1124 printf("foffset\t");
994 if (opt_list == 1) 1125 if (opt_list == 1)
995 printf("offset\tlen\tflags\n"); 1126 printf("offset\tlen\tflags\n");
996 if (opt_list == 2) 1127 if (opt_list == 2)
997 printf("offset\tflags\n"); 1128 printf("offset\tflags\n");
998 1129
999 walk_addr_ranges(); 1130 if (opt_file)
1131 walk_page_cache();
1132 else
1133 walk_addr_ranges();
1000 1134
1001 if (opt_list == 1) 1135 if (opt_list == 1)
1002 show_page_range(0, 0, 0); /* drain the buffer */ 1136 flush_page_range();
1003 1137
1004 if (opt_no_summary) 1138 if (opt_no_summary)
1005 return 0; 1139 return 0;