diff options
author | Christoph Lameter <clameter@engr.sgi.com> | 2005-09-03 18:54:45 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@evo.osdl.org> | 2005-09-05 03:05:43 -0400 |
commit | 6e21c8f145f5052c1c2fb4a4b41bee01c848159b (patch) | |
tree | 0b956cfbd67636c19be79fc0cbe0a5ed89fb6b9a | |
parent | 839b9685e80592809d6dfdd865986cd1b5ddc2fb (diff) |
[PATCH] /proc/<pid>/numa_maps to show on which nodes pages reside
This patch was recently discussed on linux-mm:
http://marc.theaimsgroup.com/?t=112085728500002&r=1&w=2
I inherited a large code base from Ray for page migration. There was a
small patch in there that I find to be very useful since it allows the
display of the locality of the pages in use by a process. I reworked that
patch and came up with a /proc/<pid>/numa_maps that gives more information
about the vma's of a process. numa_maps is indexes by the start address
found in /proc/<pid>/maps. F.e. with this patch you can see the page use
of the "getty" process:
margin:/proc/12008 # cat maps
00000000-00004000 r--p 00000000 00:00 0
2000000000000000-200000000002c000 r-xp 00000000 08:04 516 /lib/ld-2.3.3.so
2000000000038000-2000000000040000 rw-p 00028000 08:04 516 /lib/ld-2.3.3.so
2000000000040000-2000000000044000 rw-p 2000000000040000 00:00 0
2000000000058000-2000000000260000 r-xp 00000000 08:04 54707842 /lib/tls/libc.so.6.1
2000000000260000-2000000000268000 ---p 00208000 08:04 54707842 /lib/tls/libc.so.6.1
2000000000268000-2000000000274000 rw-p 00200000 08:04 54707842 /lib/tls/libc.so.6.1
2000000000274000-2000000000280000 rw-p 2000000000274000 00:00 0
2000000000280000-20000000002b4000 r--p 00000000 08:04 9126923 /usr/lib/locale/en_US.utf8/LC_CTYPE
2000000000300000-2000000000308000 r--s 00000000 08:04 60071467 /usr/lib/gconv/gconv-modules.cache
2000000000318000-2000000000328000 rw-p 2000000000318000 00:00 0
4000000000000000-4000000000008000 r-xp 00000000 08:04 29576399 /sbin/mingetty
6000000000004000-6000000000008000 rw-p 00004000 08:04 29576399 /sbin/mingetty
6000000000008000-600000000002c000 rw-p 6000000000008000 00:00 0 [heap]
60000fff7fffc000-60000fff80000000 rw-p 60000fff7fffc000 00:00 0
60000ffffff44000-60000ffffff98000 rw-p 60000ffffff44000 00:00 0 [stack]
a000000000000000-a000000000020000 ---p 00000000 00:00 0 [vdso]
cat numa_maps
2000000000000000 default MaxRef=43 Pages=11 Mapped=11 N0=4 N1=3 N2=2 N3=2
2000000000038000 default MaxRef=1 Pages=2 Mapped=2 Anon=2 N0=2
2000000000040000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1
2000000000058000 default MaxRef=43 Pages=61 Mapped=61 N0=14 N1=15 N2=16 N3=16
2000000000268000 default MaxRef=1 Pages=2 Mapped=2 Anon=2 N0=2
2000000000274000 default MaxRef=1 Pages=3 Mapped=3 Anon=3 N0=3
2000000000280000 default MaxRef=8 Pages=3 Mapped=3 N0=3
2000000000300000 default MaxRef=8 Pages=2 Mapped=2 N0=2
2000000000318000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N2=1
4000000000000000 default MaxRef=6 Pages=2 Mapped=2 N1=2
6000000000004000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1
6000000000008000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1
60000fff7fffc000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1
60000ffffff44000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1
getty uses ld.so. The first vma is the code segment which is used by 43
other processes and the pages are evenly distributed over the 4 nodes.
The second vma is the process specific data portion for ld.so. This is
only one page.
The display format is:
<startaddress> Links to information in /proc/<pid>/map
<memory policy> This can be "default" "interleave={}", "prefer=<node>" or "bind={<zones>}"
MaxRef= <maximum reference to a page in this vma>
Pages= <Nr of pages in use>
Mapped= <Nr of pages with mapcount >
Anon= <nr of anonymous pages>
Nx= <Nr of pages on Node x>
The content of the proc-file is self-evident. If this would be tied into
the sparsemem system then the contents of this file would not be too
useful.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/proc/base.c | 35 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 132 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 3 | ||||
-rw-r--r-- | mm/mempolicy.c | 12 |
4 files changed, 176 insertions, 6 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 491f2d9f89ac..b796bf90a0b1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -65,6 +65,7 @@ enum pid_directory_inos { | |||
65 | PROC_TGID_STAT, | 65 | PROC_TGID_STAT, |
66 | PROC_TGID_STATM, | 66 | PROC_TGID_STATM, |
67 | PROC_TGID_MAPS, | 67 | PROC_TGID_MAPS, |
68 | PROC_TGID_NUMA_MAPS, | ||
68 | PROC_TGID_MOUNTS, | 69 | PROC_TGID_MOUNTS, |
69 | PROC_TGID_WCHAN, | 70 | PROC_TGID_WCHAN, |
70 | #ifdef CONFIG_SCHEDSTATS | 71 | #ifdef CONFIG_SCHEDSTATS |
@@ -102,6 +103,7 @@ enum pid_directory_inos { | |||
102 | PROC_TID_STAT, | 103 | PROC_TID_STAT, |
103 | PROC_TID_STATM, | 104 | PROC_TID_STATM, |
104 | PROC_TID_MAPS, | 105 | PROC_TID_MAPS, |
106 | PROC_TID_NUMA_MAPS, | ||
105 | PROC_TID_MOUNTS, | 107 | PROC_TID_MOUNTS, |
106 | PROC_TID_WCHAN, | 108 | PROC_TID_WCHAN, |
107 | #ifdef CONFIG_SCHEDSTATS | 109 | #ifdef CONFIG_SCHEDSTATS |
@@ -144,6 +146,9 @@ static struct pid_entry tgid_base_stuff[] = { | |||
144 | E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), | 146 | E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), |
145 | E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), | 147 | E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), |
146 | E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), | 148 | E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), |
149 | #ifdef CONFIG_NUMA | ||
150 | E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), | ||
151 | #endif | ||
147 | E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), | 152 | E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), |
148 | #ifdef CONFIG_SECCOMP | 153 | #ifdef CONFIG_SECCOMP |
149 | E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), | 154 | E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), |
@@ -180,6 +185,9 @@ static struct pid_entry tid_base_stuff[] = { | |||
180 | E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), | 185 | E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), |
181 | E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), | 186 | E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), |
182 | E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), | 187 | E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), |
188 | #ifdef CONFIG_NUMA | ||
189 | E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), | ||
190 | #endif | ||
183 | E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), | 191 | E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), |
184 | #ifdef CONFIG_SECCOMP | 192 | #ifdef CONFIG_SECCOMP |
185 | E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), | 193 | E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), |
@@ -515,6 +523,27 @@ static struct file_operations proc_maps_operations = { | |||
515 | .release = seq_release, | 523 | .release = seq_release, |
516 | }; | 524 | }; |
517 | 525 | ||
526 | #ifdef CONFIG_NUMA | ||
527 | extern struct seq_operations proc_pid_numa_maps_op; | ||
528 | static int numa_maps_open(struct inode *inode, struct file *file) | ||
529 | { | ||
530 | struct task_struct *task = proc_task(inode); | ||
531 | int ret = seq_open(file, &proc_pid_numa_maps_op); | ||
532 | if (!ret) { | ||
533 | struct seq_file *m = file->private_data; | ||
534 | m->private = task; | ||
535 | } | ||
536 | return ret; | ||
537 | } | ||
538 | |||
539 | static struct file_operations proc_numa_maps_operations = { | ||
540 | .open = numa_maps_open, | ||
541 | .read = seq_read, | ||
542 | .llseek = seq_lseek, | ||
543 | .release = seq_release, | ||
544 | }; | ||
545 | #endif | ||
546 | |||
518 | extern struct seq_operations mounts_op; | 547 | extern struct seq_operations mounts_op; |
519 | static int mounts_open(struct inode *inode, struct file *file) | 548 | static int mounts_open(struct inode *inode, struct file *file) |
520 | { | 549 | { |
@@ -1524,6 +1553,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1524 | case PROC_TGID_MAPS: | 1553 | case PROC_TGID_MAPS: |
1525 | inode->i_fop = &proc_maps_operations; | 1554 | inode->i_fop = &proc_maps_operations; |
1526 | break; | 1555 | break; |
1556 | #ifdef CONFIG_NUMA | ||
1557 | case PROC_TID_NUMA_MAPS: | ||
1558 | case PROC_TGID_NUMA_MAPS: | ||
1559 | inode->i_fop = &proc_numa_maps_operations; | ||
1560 | break; | ||
1561 | #endif | ||
1527 | case PROC_TID_MEM: | 1562 | case PROC_TID_MEM: |
1528 | case PROC_TGID_MEM: | 1563 | case PROC_TGID_MEM: |
1529 | inode->i_op = &proc_mem_inode_operations; | 1564 | inode->i_op = &proc_mem_inode_operations; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 28b4a0253a92..64e84cadfa3c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -2,6 +2,8 @@ | |||
2 | #include <linux/hugetlb.h> | 2 | #include <linux/hugetlb.h> |
3 | #include <linux/mount.h> | 3 | #include <linux/mount.h> |
4 | #include <linux/seq_file.h> | 4 | #include <linux/seq_file.h> |
5 | #include <linux/pagemap.h> | ||
6 | #include <linux/mempolicy.h> | ||
5 | #include <asm/elf.h> | 7 | #include <asm/elf.h> |
6 | #include <asm/uaccess.h> | 8 | #include <asm/uaccess.h> |
7 | #include "internal.h" | 9 | #include "internal.h" |
@@ -233,3 +235,133 @@ struct seq_operations proc_pid_maps_op = { | |||
233 | .stop = m_stop, | 235 | .stop = m_stop, |
234 | .show = show_map | 236 | .show = show_map |
235 | }; | 237 | }; |
238 | |||
239 | #ifdef CONFIG_NUMA | ||
240 | |||
241 | struct numa_maps { | ||
242 | unsigned long pages; | ||
243 | unsigned long anon; | ||
244 | unsigned long mapped; | ||
245 | unsigned long mapcount_max; | ||
246 | unsigned long node[MAX_NUMNODES]; | ||
247 | }; | ||
248 | |||
249 | /* | ||
250 | * Calculate numa node maps for a vma | ||
251 | */ | ||
252 | static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma) | ||
253 | { | ||
254 | struct page *page; | ||
255 | unsigned long vaddr; | ||
256 | struct mm_struct *mm = vma->vm_mm; | ||
257 | int i; | ||
258 | struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL); | ||
259 | |||
260 | if (!md) | ||
261 | return NULL; | ||
262 | md->pages = 0; | ||
263 | md->anon = 0; | ||
264 | md->mapped = 0; | ||
265 | md->mapcount_max = 0; | ||
266 | for_each_node(i) | ||
267 | md->node[i] =0; | ||
268 | |||
269 | spin_lock(&mm->page_table_lock); | ||
270 | for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { | ||
271 | page = follow_page(mm, vaddr, 0); | ||
272 | if (page) { | ||
273 | int count = page_mapcount(page); | ||
274 | |||
275 | if (count) | ||
276 | md->mapped++; | ||
277 | if (count > md->mapcount_max) | ||
278 | md->mapcount_max = count; | ||
279 | md->pages++; | ||
280 | if (PageAnon(page)) | ||
281 | md->anon++; | ||
282 | md->node[page_to_nid(page)]++; | ||
283 | } | ||
284 | } | ||
285 | spin_unlock(&mm->page_table_lock); | ||
286 | return md; | ||
287 | } | ||
288 | |||
289 | static int show_numa_map(struct seq_file *m, void *v) | ||
290 | { | ||
291 | struct task_struct *task = m->private; | ||
292 | struct vm_area_struct *vma = v; | ||
293 | struct mempolicy *pol; | ||
294 | struct numa_maps *md; | ||
295 | struct zone **z; | ||
296 | int n; | ||
297 | int first; | ||
298 | |||
299 | if (!vma->vm_mm) | ||
300 | return 0; | ||
301 | |||
302 | md = get_numa_maps(vma); | ||
303 | if (!md) | ||
304 | return 0; | ||
305 | |||
306 | seq_printf(m, "%08lx", vma->vm_start); | ||
307 | pol = get_vma_policy(task, vma, vma->vm_start); | ||
308 | /* Print policy */ | ||
309 | switch (pol->policy) { | ||
310 | case MPOL_PREFERRED: | ||
311 | seq_printf(m, " prefer=%d", pol->v.preferred_node); | ||
312 | break; | ||
313 | case MPOL_BIND: | ||
314 | seq_printf(m, " bind={"); | ||
315 | first = 1; | ||
316 | for (z = pol->v.zonelist->zones; *z; z++) { | ||
317 | |||
318 | if (!first) | ||
319 | seq_putc(m, ','); | ||
320 | else | ||
321 | first = 0; | ||
322 | seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id, | ||
323 | (*z)->name); | ||
324 | } | ||
325 | seq_putc(m, '}'); | ||
326 | break; | ||
327 | case MPOL_INTERLEAVE: | ||
328 | seq_printf(m, " interleave={"); | ||
329 | first = 1; | ||
330 | for_each_node(n) { | ||
331 | if (test_bit(n, pol->v.nodes)) { | ||
332 | if (!first) | ||
333 | seq_putc(m,','); | ||
334 | else | ||
335 | first = 0; | ||
336 | seq_printf(m, "%d",n); | ||
337 | } | ||
338 | } | ||
339 | seq_putc(m, '}'); | ||
340 | break; | ||
341 | default: | ||
342 | seq_printf(m," default"); | ||
343 | break; | ||
344 | } | ||
345 | seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu", | ||
346 | md->mapcount_max, md->pages, md->mapped); | ||
347 | if (md->anon) | ||
348 | seq_printf(m," Anon=%lu",md->anon); | ||
349 | |||
350 | for_each_online_node(n) { | ||
351 | if (md->node[n]) | ||
352 | seq_printf(m, " N%d=%lu", n, md->node[n]); | ||
353 | } | ||
354 | seq_putc(m, '\n'); | ||
355 | kfree(md); | ||
356 | if (m->count < m->size) /* vma is copied successfully */ | ||
357 | m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; | ||
358 | return 0; | ||
359 | } | ||
360 | |||
361 | struct seq_operations proc_pid_numa_maps_op = { | ||
362 | .start = m_start, | ||
363 | .next = m_next, | ||
364 | .stop = m_stop, | ||
365 | .show = show_numa_map | ||
366 | }; | ||
367 | #endif | ||
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 8480aef10e62..94a46f38c532 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -150,6 +150,9 @@ void mpol_free_shared_policy(struct shared_policy *p); | |||
150 | struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, | 150 | struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, |
151 | unsigned long idx); | 151 | unsigned long idx); |
152 | 152 | ||
153 | struct mempolicy *get_vma_policy(struct task_struct *task, | ||
154 | struct vm_area_struct *vma, unsigned long addr); | ||
155 | |||
153 | extern void numa_default_policy(void); | 156 | extern void numa_default_policy(void); |
154 | extern void numa_policy_init(void); | 157 | extern void numa_policy_init(void); |
155 | 158 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b4eababc8198..13492d66b7c8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -664,10 +664,10 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, | |||
664 | #endif | 664 | #endif |
665 | 665 | ||
666 | /* Return effective policy for a VMA */ | 666 | /* Return effective policy for a VMA */ |
667 | static struct mempolicy * | 667 | struct mempolicy * |
668 | get_vma_policy(struct vm_area_struct *vma, unsigned long addr) | 668 | get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) |
669 | { | 669 | { |
670 | struct mempolicy *pol = current->mempolicy; | 670 | struct mempolicy *pol = task->mempolicy; |
671 | 671 | ||
672 | if (vma) { | 672 | if (vma) { |
673 | if (vma->vm_ops && vma->vm_ops->get_policy) | 673 | if (vma->vm_ops && vma->vm_ops->get_policy) |
@@ -786,7 +786,7 @@ static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned or | |||
786 | struct page * | 786 | struct page * |
787 | alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr) | 787 | alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr) |
788 | { | 788 | { |
789 | struct mempolicy *pol = get_vma_policy(vma, addr); | 789 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
790 | 790 | ||
791 | cpuset_update_current_mems_allowed(); | 791 | cpuset_update_current_mems_allowed(); |
792 | 792 | ||
@@ -908,7 +908,7 @@ void __mpol_free(struct mempolicy *p) | |||
908 | /* Find first node suitable for an allocation */ | 908 | /* Find first node suitable for an allocation */ |
909 | int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) | 909 | int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) |
910 | { | 910 | { |
911 | struct mempolicy *pol = get_vma_policy(vma, addr); | 911 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
912 | 912 | ||
913 | switch (pol->policy) { | 913 | switch (pol->policy) { |
914 | case MPOL_DEFAULT: | 914 | case MPOL_DEFAULT: |
@@ -928,7 +928,7 @@ int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) | |||
928 | /* Find secondary valid nodes for an allocation */ | 928 | /* Find secondary valid nodes for an allocation */ |
929 | int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr) | 929 | int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr) |
930 | { | 930 | { |
931 | struct mempolicy *pol = get_vma_policy(vma, addr); | 931 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
932 | 932 | ||
933 | switch (pol->policy) { | 933 | switch (pol->policy) { |
934 | case MPOL_PREFERRED: | 934 | case MPOL_PREFERRED: |