aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@engr.sgi.com>2005-09-03 18:54:45 -0400
committerLinus Torvalds <torvalds@evo.osdl.org>2005-09-05 03:05:43 -0400
commit6e21c8f145f5052c1c2fb4a4b41bee01c848159b (patch)
tree0b956cfbd67636c19be79fc0cbe0a5ed89fb6b9a
parent839b9685e80592809d6dfdd865986cd1b5ddc2fb (diff)
[PATCH] /proc/<pid>/numa_maps to show on which nodes pages reside
This patch was recently discussed on linux-mm: http://marc.theaimsgroup.com/?t=112085728500002&r=1&w=2 I inherited a large code base from Ray for page migration. There was a small patch in there that I find to be very useful since it allows the display of the locality of the pages in use by a process. I reworked that patch and came up with a /proc/<pid>/numa_maps that gives more information about the vma's of a process. numa_maps is indexes by the start address found in /proc/<pid>/maps. F.e. with this patch you can see the page use of the "getty" process: margin:/proc/12008 # cat maps 00000000-00004000 r--p 00000000 00:00 0 2000000000000000-200000000002c000 r-xp 00000000 08:04 516 /lib/ld-2.3.3.so 2000000000038000-2000000000040000 rw-p 00028000 08:04 516 /lib/ld-2.3.3.so 2000000000040000-2000000000044000 rw-p 2000000000040000 00:00 0 2000000000058000-2000000000260000 r-xp 00000000 08:04 54707842 /lib/tls/libc.so.6.1 2000000000260000-2000000000268000 ---p 00208000 08:04 54707842 /lib/tls/libc.so.6.1 2000000000268000-2000000000274000 rw-p 00200000 08:04 54707842 /lib/tls/libc.so.6.1 2000000000274000-2000000000280000 rw-p 2000000000274000 00:00 0 2000000000280000-20000000002b4000 r--p 00000000 08:04 9126923 /usr/lib/locale/en_US.utf8/LC_CTYPE 2000000000300000-2000000000308000 r--s 00000000 08:04 60071467 /usr/lib/gconv/gconv-modules.cache 2000000000318000-2000000000328000 rw-p 2000000000318000 00:00 0 4000000000000000-4000000000008000 r-xp 00000000 08:04 29576399 /sbin/mingetty 6000000000004000-6000000000008000 rw-p 00004000 08:04 29576399 /sbin/mingetty 6000000000008000-600000000002c000 rw-p 6000000000008000 00:00 0 [heap] 60000fff7fffc000-60000fff80000000 rw-p 60000fff7fffc000 00:00 0 60000ffffff44000-60000ffffff98000 rw-p 60000ffffff44000 00:00 0 [stack] a000000000000000-a000000000020000 ---p 00000000 00:00 0 [vdso] cat numa_maps 2000000000000000 default MaxRef=43 Pages=11 Mapped=11 N0=4 N1=3 N2=2 N3=2 2000000000038000 default MaxRef=1 Pages=2 Mapped=2 Anon=2 N0=2 2000000000040000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 2000000000058000 default MaxRef=43 Pages=61 Mapped=61 N0=14 N1=15 N2=16 N3=16 2000000000268000 default MaxRef=1 Pages=2 Mapped=2 Anon=2 N0=2 2000000000274000 default MaxRef=1 Pages=3 Mapped=3 Anon=3 N0=3 2000000000280000 default MaxRef=8 Pages=3 Mapped=3 N0=3 2000000000300000 default MaxRef=8 Pages=2 Mapped=2 N0=2 2000000000318000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N2=1 4000000000000000 default MaxRef=6 Pages=2 Mapped=2 N1=2 6000000000004000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 6000000000008000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 60000fff7fffc000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 60000ffffff44000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 getty uses ld.so. The first vma is the code segment which is used by 43 other processes and the pages are evenly distributed over the 4 nodes. The second vma is the process specific data portion for ld.so. This is only one page. The display format is: <startaddress> Links to information in /proc/<pid>/map <memory policy> This can be "default" "interleave={}", "prefer=<node>" or "bind={<zones>}" MaxRef= <maximum reference to a page in this vma> Pages= <Nr of pages in use> Mapped= <Nr of pages with mapcount > Anon= <nr of anonymous pages> Nx= <Nr of pages on Node x> The content of the proc-file is self-evident. If this would be tied into the sparsemem system then the contents of this file would not be too useful. Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/proc/base.c35
-rw-r--r--fs/proc/task_mmu.c132
-rw-r--r--include/linux/mempolicy.h3
-rw-r--r--mm/mempolicy.c12
4 files changed, 176 insertions, 6 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 491f2d9f89ac..b796bf90a0b1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -65,6 +65,7 @@ enum pid_directory_inos {
65 PROC_TGID_STAT, 65 PROC_TGID_STAT,
66 PROC_TGID_STATM, 66 PROC_TGID_STATM,
67 PROC_TGID_MAPS, 67 PROC_TGID_MAPS,
68 PROC_TGID_NUMA_MAPS,
68 PROC_TGID_MOUNTS, 69 PROC_TGID_MOUNTS,
69 PROC_TGID_WCHAN, 70 PROC_TGID_WCHAN,
70#ifdef CONFIG_SCHEDSTATS 71#ifdef CONFIG_SCHEDSTATS
@@ -102,6 +103,7 @@ enum pid_directory_inos {
102 PROC_TID_STAT, 103 PROC_TID_STAT,
103 PROC_TID_STATM, 104 PROC_TID_STATM,
104 PROC_TID_MAPS, 105 PROC_TID_MAPS,
106 PROC_TID_NUMA_MAPS,
105 PROC_TID_MOUNTS, 107 PROC_TID_MOUNTS,
106 PROC_TID_WCHAN, 108 PROC_TID_WCHAN,
107#ifdef CONFIG_SCHEDSTATS 109#ifdef CONFIG_SCHEDSTATS
@@ -144,6 +146,9 @@ static struct pid_entry tgid_base_stuff[] = {
144 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), 146 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO),
145 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), 147 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO),
146 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), 148 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO),
149#ifdef CONFIG_NUMA
150 E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
151#endif
147 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 152 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
148#ifdef CONFIG_SECCOMP 153#ifdef CONFIG_SECCOMP
149 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 154 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
@@ -180,6 +185,9 @@ static struct pid_entry tid_base_stuff[] = {
180 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), 185 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO),
181 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), 186 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO),
182 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), 187 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO),
188#ifdef CONFIG_NUMA
189 E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
190#endif
183 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 191 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
184#ifdef CONFIG_SECCOMP 192#ifdef CONFIG_SECCOMP
185 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 193 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
@@ -515,6 +523,27 @@ static struct file_operations proc_maps_operations = {
515 .release = seq_release, 523 .release = seq_release,
516}; 524};
517 525
526#ifdef CONFIG_NUMA
527extern struct seq_operations proc_pid_numa_maps_op;
528static int numa_maps_open(struct inode *inode, struct file *file)
529{
530 struct task_struct *task = proc_task(inode);
531 int ret = seq_open(file, &proc_pid_numa_maps_op);
532 if (!ret) {
533 struct seq_file *m = file->private_data;
534 m->private = task;
535 }
536 return ret;
537}
538
539static struct file_operations proc_numa_maps_operations = {
540 .open = numa_maps_open,
541 .read = seq_read,
542 .llseek = seq_lseek,
543 .release = seq_release,
544};
545#endif
546
518extern struct seq_operations mounts_op; 547extern struct seq_operations mounts_op;
519static int mounts_open(struct inode *inode, struct file *file) 548static int mounts_open(struct inode *inode, struct file *file)
520{ 549{
@@ -1524,6 +1553,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1524 case PROC_TGID_MAPS: 1553 case PROC_TGID_MAPS:
1525 inode->i_fop = &proc_maps_operations; 1554 inode->i_fop = &proc_maps_operations;
1526 break; 1555 break;
1556#ifdef CONFIG_NUMA
1557 case PROC_TID_NUMA_MAPS:
1558 case PROC_TGID_NUMA_MAPS:
1559 inode->i_fop = &proc_numa_maps_operations;
1560 break;
1561#endif
1527 case PROC_TID_MEM: 1562 case PROC_TID_MEM:
1528 case PROC_TGID_MEM: 1563 case PROC_TGID_MEM:
1529 inode->i_op = &proc_mem_inode_operations; 1564 inode->i_op = &proc_mem_inode_operations;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 28b4a0253a92..64e84cadfa3c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -2,6 +2,8 @@
2#include <linux/hugetlb.h> 2#include <linux/hugetlb.h>
3#include <linux/mount.h> 3#include <linux/mount.h>
4#include <linux/seq_file.h> 4#include <linux/seq_file.h>
5#include <linux/pagemap.h>
6#include <linux/mempolicy.h>
5#include <asm/elf.h> 7#include <asm/elf.h>
6#include <asm/uaccess.h> 8#include <asm/uaccess.h>
7#include "internal.h" 9#include "internal.h"
@@ -233,3 +235,133 @@ struct seq_operations proc_pid_maps_op = {
233 .stop = m_stop, 235 .stop = m_stop,
234 .show = show_map 236 .show = show_map
235}; 237};
238
239#ifdef CONFIG_NUMA
240
241struct numa_maps {
242 unsigned long pages;
243 unsigned long anon;
244 unsigned long mapped;
245 unsigned long mapcount_max;
246 unsigned long node[MAX_NUMNODES];
247};
248
249/*
250 * Calculate numa node maps for a vma
251 */
252static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma)
253{
254 struct page *page;
255 unsigned long vaddr;
256 struct mm_struct *mm = vma->vm_mm;
257 int i;
258 struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL);
259
260 if (!md)
261 return NULL;
262 md->pages = 0;
263 md->anon = 0;
264 md->mapped = 0;
265 md->mapcount_max = 0;
266 for_each_node(i)
267 md->node[i] =0;
268
269 spin_lock(&mm->page_table_lock);
270 for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
271 page = follow_page(mm, vaddr, 0);
272 if (page) {
273 int count = page_mapcount(page);
274
275 if (count)
276 md->mapped++;
277 if (count > md->mapcount_max)
278 md->mapcount_max = count;
279 md->pages++;
280 if (PageAnon(page))
281 md->anon++;
282 md->node[page_to_nid(page)]++;
283 }
284 }
285 spin_unlock(&mm->page_table_lock);
286 return md;
287}
288
289static int show_numa_map(struct seq_file *m, void *v)
290{
291 struct task_struct *task = m->private;
292 struct vm_area_struct *vma = v;
293 struct mempolicy *pol;
294 struct numa_maps *md;
295 struct zone **z;
296 int n;
297 int first;
298
299 if (!vma->vm_mm)
300 return 0;
301
302 md = get_numa_maps(vma);
303 if (!md)
304 return 0;
305
306 seq_printf(m, "%08lx", vma->vm_start);
307 pol = get_vma_policy(task, vma, vma->vm_start);
308 /* Print policy */
309 switch (pol->policy) {
310 case MPOL_PREFERRED:
311 seq_printf(m, " prefer=%d", pol->v.preferred_node);
312 break;
313 case MPOL_BIND:
314 seq_printf(m, " bind={");
315 first = 1;
316 for (z = pol->v.zonelist->zones; *z; z++) {
317
318 if (!first)
319 seq_putc(m, ',');
320 else
321 first = 0;
322 seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id,
323 (*z)->name);
324 }
325 seq_putc(m, '}');
326 break;
327 case MPOL_INTERLEAVE:
328 seq_printf(m, " interleave={");
329 first = 1;
330 for_each_node(n) {
331 if (test_bit(n, pol->v.nodes)) {
332 if (!first)
333 seq_putc(m,',');
334 else
335 first = 0;
336 seq_printf(m, "%d",n);
337 }
338 }
339 seq_putc(m, '}');
340 break;
341 default:
342 seq_printf(m," default");
343 break;
344 }
345 seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu",
346 md->mapcount_max, md->pages, md->mapped);
347 if (md->anon)
348 seq_printf(m," Anon=%lu",md->anon);
349
350 for_each_online_node(n) {
351 if (md->node[n])
352 seq_printf(m, " N%d=%lu", n, md->node[n]);
353 }
354 seq_putc(m, '\n');
355 kfree(md);
356 if (m->count < m->size) /* vma is copied successfully */
357 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
358 return 0;
359}
360
361struct seq_operations proc_pid_numa_maps_op = {
362 .start = m_start,
363 .next = m_next,
364 .stop = m_stop,
365 .show = show_numa_map
366};
367#endif
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 8480aef10e62..94a46f38c532 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -150,6 +150,9 @@ void mpol_free_shared_policy(struct shared_policy *p);
150struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, 150struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
151 unsigned long idx); 151 unsigned long idx);
152 152
153struct mempolicy *get_vma_policy(struct task_struct *task,
154 struct vm_area_struct *vma, unsigned long addr);
155
153extern void numa_default_policy(void); 156extern void numa_default_policy(void);
154extern void numa_policy_init(void); 157extern void numa_policy_init(void);
155 158
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b4eababc8198..13492d66b7c8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -664,10 +664,10 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
664#endif 664#endif
665 665
666/* Return effective policy for a VMA */ 666/* Return effective policy for a VMA */
667static struct mempolicy * 667struct mempolicy *
668get_vma_policy(struct vm_area_struct *vma, unsigned long addr) 668get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr)
669{ 669{
670 struct mempolicy *pol = current->mempolicy; 670 struct mempolicy *pol = task->mempolicy;
671 671
672 if (vma) { 672 if (vma) {
673 if (vma->vm_ops && vma->vm_ops->get_policy) 673 if (vma->vm_ops && vma->vm_ops->get_policy)
@@ -786,7 +786,7 @@ static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned or
786struct page * 786struct page *
787alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr) 787alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr)
788{ 788{
789 struct mempolicy *pol = get_vma_policy(vma, addr); 789 struct mempolicy *pol = get_vma_policy(current, vma, addr);
790 790
791 cpuset_update_current_mems_allowed(); 791 cpuset_update_current_mems_allowed();
792 792
@@ -908,7 +908,7 @@ void __mpol_free(struct mempolicy *p)
908/* Find first node suitable for an allocation */ 908/* Find first node suitable for an allocation */
909int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) 909int mpol_first_node(struct vm_area_struct *vma, unsigned long addr)
910{ 910{
911 struct mempolicy *pol = get_vma_policy(vma, addr); 911 struct mempolicy *pol = get_vma_policy(current, vma, addr);
912 912
913 switch (pol->policy) { 913 switch (pol->policy) {
914 case MPOL_DEFAULT: 914 case MPOL_DEFAULT:
@@ -928,7 +928,7 @@ int mpol_first_node(struct vm_area_struct *vma, unsigned long addr)
928/* Find secondary valid nodes for an allocation */ 928/* Find secondary valid nodes for an allocation */
929int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr) 929int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr)
930{ 930{
931 struct mempolicy *pol = get_vma_policy(vma, addr); 931 struct mempolicy *pol = get_vma_policy(current, vma, addr);
932 932
933 switch (pol->policy) { 933 switch (pol->policy) {
934 case MPOL_PREFERRED: 934 case MPOL_PREFERRED: