aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2009-01-08 07:04:47 -0500
committerDavid Howells <dhowells@redhat.com>2009-01-08 07:04:47 -0500
commit8feae13110d60cc6287afabc2887366b0eb226c2 (patch)
treeb3188986faab70e753e00ea8670a11ba8ec844c0 /fs
parent41836382ebb415d68d3ebc4525e78e871fe58baf (diff)
NOMMU: Make VMAs per MM as for MMU-mode linux
Make VMAs per mm_struct as for MMU-mode linux. This solves two problems: (1) In SYSV SHM where nattch for a segment does not reflect the number of shmat's (and forks) done. (2) In mmap() where the VMA's vm_mm is set to point to the parent mm by an exec'ing process when VM_EXECUTABLE is specified, regardless of the fact that a VMA might be shared and already have its vm_mm assigned to another process or a dead process. A new struct (vm_region) is introduced to track a mapped region and to remember the circumstances under which it may be shared and the vm_list_struct structure is discarded as it's no longer required. This patch makes the following additional changes: (1) Regions are now allocated with alloc_pages() rather than kmalloc() and with no recourse to __GFP_COMP, so the pages are not composite. Instead, each page has a reference on it held by the region. Anything else that is interested in such a page will have to get a reference on it to retain it. When the pages are released due to unmapping, each page is passed to put_page() and will be freed when the page usage count reaches zero. (2) Excess pages are trimmed after an allocation as the allocation must be made as a power-of-2 quantity of pages. (3) VMAs are added to the parent MM's R/B tree and mmap lists. As an MM may end up with overlapping VMAs within the tree, the VMA struct address is appended to the sort key. (4) Non-anonymous VMAs are now added to the backing inode's prio list. (5) Holes may be punched in anonymous VMAs with munmap(), releasing parts of the backing region. The VMA and region structs will be split if necessary. (6) sys_shmdt() only releases one attachment to a SYSV IPC shared memory segment instead of all the attachments at that addresss. Multiple shmat()'s return the same address under NOMMU-mode instead of different virtual addresses as under MMU-mode. (7) Core dumping for ELF-FDPIC requires fewer exceptions for NOMMU-mode. (8) /proc/maps is now the global list of mapped regions, and may list bits that aren't actually mapped anywhere. (9) /proc/meminfo gains a line (tagged "MmapCopy") that indicates the amount of RAM currently allocated by mmap to hold mappable regions that can't be mapped directly. These are copies of the backing device or file if not anonymous. These changes make NOMMU mode more similar to MMU mode. The downside is that NOMMU mode requires some extra memory to track things over NOMMU without this patch (VMAs are no longer shared, and there are now region structs). Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Mike Frysinger <vapier.adi@gmail.com> Acked-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_elf_fdpic.c27
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/meminfo.c6
-rw-r--r--fs/proc/nommu.c71
-rw-r--r--fs/proc/task_nommu.c108
5 files changed, 116 insertions, 98 deletions
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index aa5b43205e3..22baf1b1349 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1567,11 +1567,9 @@ end_coredump:
1567static int elf_fdpic_dump_segments(struct file *file, size_t *size, 1567static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1568 unsigned long *limit, unsigned long mm_flags) 1568 unsigned long *limit, unsigned long mm_flags)
1569{ 1569{
1570 struct vm_list_struct *vml; 1570 struct vm_area_struct *vma;
1571
1572 for (vml = current->mm->context.vmlist; vml; vml = vml->next) {
1573 struct vm_area_struct *vma = vml->vma;
1574 1571
1572 for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1575 if (!maydump(vma, mm_flags)) 1573 if (!maydump(vma, mm_flags))
1576 continue; 1574 continue;
1577 1575
@@ -1617,9 +1615,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1617 elf_fpxregset_t *xfpu = NULL; 1615 elf_fpxregset_t *xfpu = NULL;
1618#endif 1616#endif
1619 int thread_status_size = 0; 1617 int thread_status_size = 0;
1620#ifndef CONFIG_MMU
1621 struct vm_list_struct *vml;
1622#endif
1623 elf_addr_t *auxv; 1618 elf_addr_t *auxv;
1624 unsigned long mm_flags; 1619 unsigned long mm_flags;
1625 1620
@@ -1685,13 +1680,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1685 fill_prstatus(prstatus, current, signr); 1680 fill_prstatus(prstatus, current, signr);
1686 elf_core_copy_regs(&prstatus->pr_reg, regs); 1681 elf_core_copy_regs(&prstatus->pr_reg, regs);
1687 1682
1688#ifdef CONFIG_MMU
1689 segs = current->mm->map_count; 1683 segs = current->mm->map_count;
1690#else
1691 segs = 0;
1692 for (vml = current->mm->context.vmlist; vml; vml = vml->next)
1693 segs++;
1694#endif
1695#ifdef ELF_CORE_EXTRA_PHDRS 1684#ifdef ELF_CORE_EXTRA_PHDRS
1696 segs += ELF_CORE_EXTRA_PHDRS; 1685 segs += ELF_CORE_EXTRA_PHDRS;
1697#endif 1686#endif
@@ -1766,20 +1755,10 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1766 mm_flags = current->mm->flags; 1755 mm_flags = current->mm->flags;
1767 1756
1768 /* write program headers for segments dump */ 1757 /* write program headers for segments dump */
1769 for ( 1758 for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1770#ifdef CONFIG_MMU
1771 vma = current->mm->mmap; vma; vma = vma->vm_next
1772#else
1773 vml = current->mm->context.vmlist; vml; vml = vml->next
1774#endif
1775 ) {
1776 struct elf_phdr phdr; 1759 struct elf_phdr phdr;
1777 size_t sz; 1760 size_t sz;
1778 1761
1779#ifndef CONFIG_MMU
1780 vma = vml->vma;
1781#endif
1782
1783 sz = vma->vm_end - vma->vm_start; 1762 sz = vma->vm_end - vma->vm_start;
1784 1763
1785 phdr.p_type = PT_LOAD; 1764 phdr.p_type = PT_LOAD;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3e8aeb8b61c..cd53ff83849 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -41,8 +41,6 @@ do { \
41 (vmi)->used = 0; \ 41 (vmi)->used = 0; \
42 (vmi)->largest_chunk = 0; \ 42 (vmi)->largest_chunk = 0; \
43} while(0) 43} while(0)
44
45extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
46#endif 44#endif
47 45
48extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, 46extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index b1675c4e66d..43d23948384 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -74,6 +74,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
74 "LowTotal: %8lu kB\n" 74 "LowTotal: %8lu kB\n"
75 "LowFree: %8lu kB\n" 75 "LowFree: %8lu kB\n"
76#endif 76#endif
77#ifndef CONFIG_MMU
78 "MmapCopy: %8lu kB\n"
79#endif
77 "SwapTotal: %8lu kB\n" 80 "SwapTotal: %8lu kB\n"
78 "SwapFree: %8lu kB\n" 81 "SwapFree: %8lu kB\n"
79 "Dirty: %8lu kB\n" 82 "Dirty: %8lu kB\n"
@@ -116,6 +119,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
116 K(i.totalram-i.totalhigh), 119 K(i.totalram-i.totalhigh),
117 K(i.freeram-i.freehigh), 120 K(i.freeram-i.freehigh),
118#endif 121#endif
122#ifndef CONFIG_MMU
123 K((unsigned long) atomic_read(&mmap_pages_allocated)),
124#endif
119 K(i.totalswap), 125 K(i.totalswap),
120 K(i.freeswap), 126 K(i.freeswap),
121 K(global_page_state(NR_FILE_DIRTY)), 127 K(global_page_state(NR_FILE_DIRTY)),
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 3f87d263294..b446d7ad0b0 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -33,33 +33,33 @@
33#include "internal.h" 33#include "internal.h"
34 34
35/* 35/*
36 * display a single VMA to a sequenced file 36 * display a single region to a sequenced file
37 */ 37 */
38int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) 38static int nommu_region_show(struct seq_file *m, struct vm_region *region)
39{ 39{
40 unsigned long ino = 0; 40 unsigned long ino = 0;
41 struct file *file; 41 struct file *file;
42 dev_t dev = 0; 42 dev_t dev = 0;
43 int flags, len; 43 int flags, len;
44 44
45 flags = vma->vm_flags; 45 flags = region->vm_flags;
46 file = vma->vm_file; 46 file = region->vm_file;
47 47
48 if (file) { 48 if (file) {
49 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 49 struct inode *inode = region->vm_file->f_path.dentry->d_inode;
50 dev = inode->i_sb->s_dev; 50 dev = inode->i_sb->s_dev;
51 ino = inode->i_ino; 51 ino = inode->i_ino;
52 } 52 }
53 53
54 seq_printf(m, 54 seq_printf(m,
55 "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", 55 "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
56 vma->vm_start, 56 region->vm_start,
57 vma->vm_end, 57 region->vm_end,
58 flags & VM_READ ? 'r' : '-', 58 flags & VM_READ ? 'r' : '-',
59 flags & VM_WRITE ? 'w' : '-', 59 flags & VM_WRITE ? 'w' : '-',
60 flags & VM_EXEC ? 'x' : '-', 60 flags & VM_EXEC ? 'x' : '-',
61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', 61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
62 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, 62 ((loff_t)region->vm_pgoff) << PAGE_SHIFT,
63 MAJOR(dev), MINOR(dev), ino, &len); 63 MAJOR(dev), MINOR(dev), ino, &len);
64 64
65 if (file) { 65 if (file) {
@@ -75,61 +75,54 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
75} 75}
76 76
77/* 77/*
78 * display a list of all the VMAs the kernel knows about 78 * display a list of all the REGIONs the kernel knows about
79 * - nommu kernals have a single flat list 79 * - nommu kernals have a single flat list
80 */ 80 */
81static int nommu_vma_list_show(struct seq_file *m, void *v) 81static int nommu_region_list_show(struct seq_file *m, void *_p)
82{ 82{
83 struct vm_area_struct *vma; 83 struct rb_node *p = _p;
84 84
85 vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb); 85 return nommu_region_show(m, rb_entry(p, struct vm_region, vm_rb));
86 return nommu_vma_show(m, vma);
87} 86}
88 87
89static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) 88static void *nommu_region_list_start(struct seq_file *m, loff_t *_pos)
90{ 89{
91 struct rb_node *_rb; 90 struct rb_node *p;
92 loff_t pos = *_pos; 91 loff_t pos = *_pos;
93 void *next = NULL;
94 92
95 down_read(&nommu_vma_sem); 93 down_read(&nommu_region_sem);
96 94
97 for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) { 95 for (p = rb_first(&nommu_region_tree); p; p = rb_next(p))
98 if (pos == 0) { 96 if (pos-- == 0)
99 next = _rb; 97 return p;
100 break; 98 return NULL;
101 }
102 pos--;
103 }
104
105 return next;
106} 99}
107 100
108static void nommu_vma_list_stop(struct seq_file *m, void *v) 101static void nommu_region_list_stop(struct seq_file *m, void *v)
109{ 102{
110 up_read(&nommu_vma_sem); 103 up_read(&nommu_region_sem);
111} 104}
112 105
113static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos) 106static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
114{ 107{
115 (*pos)++; 108 (*pos)++;
116 return rb_next((struct rb_node *) v); 109 return rb_next((struct rb_node *) v);
117} 110}
118 111
119static const struct seq_operations proc_nommu_vma_list_seqop = { 112static struct seq_operations proc_nommu_region_list_seqop = {
120 .start = nommu_vma_list_start, 113 .start = nommu_region_list_start,
121 .next = nommu_vma_list_next, 114 .next = nommu_region_list_next,
122 .stop = nommu_vma_list_stop, 115 .stop = nommu_region_list_stop,
123 .show = nommu_vma_list_show 116 .show = nommu_region_list_show
124}; 117};
125 118
126static int proc_nommu_vma_list_open(struct inode *inode, struct file *file) 119static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
127{ 120{
128 return seq_open(file, &proc_nommu_vma_list_seqop); 121 return seq_open(file, &proc_nommu_region_list_seqop);
129} 122}
130 123
131static const struct file_operations proc_nommu_vma_list_operations = { 124static const struct file_operations proc_nommu_region_list_operations = {
132 .open = proc_nommu_vma_list_open, 125 .open = proc_nommu_region_list_open,
133 .read = seq_read, 126 .read = seq_read,
134 .llseek = seq_lseek, 127 .llseek = seq_lseek,
135 .release = seq_release, 128 .release = seq_release,
@@ -137,7 +130,7 @@ static const struct file_operations proc_nommu_vma_list_operations = {
137 130
138static int __init proc_nommu_init(void) 131static int __init proc_nommu_init(void)
139{ 132{
140 proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations); 133 proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
141 return 0; 134 return 0;
142} 135}
143 136
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index d4a8be32b90..ca4a48d0d31 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -15,25 +15,25 @@
15 */ 15 */
16void task_mem(struct seq_file *m, struct mm_struct *mm) 16void task_mem(struct seq_file *m, struct mm_struct *mm)
17{ 17{
18 struct vm_list_struct *vml; 18 struct vm_area_struct *vma;
19 struct rb_node *p;
19 unsigned long bytes = 0, sbytes = 0, slack = 0; 20 unsigned long bytes = 0, sbytes = 0, slack = 0;
20 21
21 down_read(&mm->mmap_sem); 22 down_read(&mm->mmap_sem);
22 for (vml = mm->context.vmlist; vml; vml = vml->next) { 23 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
23 if (!vml->vma) 24 vma = rb_entry(p, struct vm_area_struct, vm_rb);
24 continue;
25 25
26 bytes += kobjsize(vml); 26 bytes += kobjsize(vma);
27 if (atomic_read(&mm->mm_count) > 1 || 27 if (atomic_read(&mm->mm_count) > 1 ||
28 atomic_read(&vml->vma->vm_usage) > 1 28 vma->vm_region ||
29 ) { 29 vma->vm_flags & VM_MAYSHARE) {
30 sbytes += kobjsize((void *) vml->vma->vm_start); 30 sbytes += kobjsize((void *) vma->vm_start);
31 sbytes += kobjsize(vml->vma); 31 if (vma->vm_region)
32 sbytes += kobjsize(vma->vm_region);
32 } else { 33 } else {
33 bytes += kobjsize((void *) vml->vma->vm_start); 34 bytes += kobjsize((void *) vma->vm_start);
34 bytes += kobjsize(vml->vma); 35 slack += kobjsize((void *) vma->vm_start) -
35 slack += kobjsize((void *) vml->vma->vm_start) - 36 (vma->vm_end - vma->vm_start);
36 (vml->vma->vm_end - vml->vma->vm_start);
37 } 37 }
38 } 38 }
39 39
@@ -70,13 +70,14 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
70 70
71unsigned long task_vsize(struct mm_struct *mm) 71unsigned long task_vsize(struct mm_struct *mm)
72{ 72{
73 struct vm_list_struct *tbp; 73 struct vm_area_struct *vma;
74 struct rb_node *p;
74 unsigned long vsize = 0; 75 unsigned long vsize = 0;
75 76
76 down_read(&mm->mmap_sem); 77 down_read(&mm->mmap_sem);
77 for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { 78 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
78 if (tbp->vma) 79 vma = rb_entry(p, struct vm_area_struct, vm_rb);
79 vsize += kobjsize((void *) tbp->vma->vm_start); 80 vsize += vma->vm_region->vm_end - vma->vm_region->vm_start;
80 } 81 }
81 up_read(&mm->mmap_sem); 82 up_read(&mm->mmap_sem);
82 return vsize; 83 return vsize;
@@ -85,16 +86,15 @@ unsigned long task_vsize(struct mm_struct *mm)
85int task_statm(struct mm_struct *mm, int *shared, int *text, 86int task_statm(struct mm_struct *mm, int *shared, int *text,
86 int *data, int *resident) 87 int *data, int *resident)
87{ 88{
88 struct vm_list_struct *tbp; 89 struct vm_area_struct *vma;
90 struct rb_node *p;
89 int size = kobjsize(mm); 91 int size = kobjsize(mm);
90 92
91 down_read(&mm->mmap_sem); 93 down_read(&mm->mmap_sem);
92 for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { 94 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
93 size += kobjsize(tbp); 95 vma = rb_entry(p, struct vm_area_struct, vm_rb);
94 if (tbp->vma) { 96 size += kobjsize(vma);
95 size += kobjsize(tbp->vma); 97 size += kobjsize((void *) vma->vm_start);
96 size += kobjsize((void *) tbp->vma->vm_start);
97 }
98 } 98 }
99 99
100 size += (*text = mm->end_code - mm->start_code); 100 size += (*text = mm->end_code - mm->start_code);
@@ -105,20 +105,62 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
105} 105}
106 106
107/* 107/*
108 * display a single VMA to a sequenced file
109 */
110static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
111{
112 unsigned long ino = 0;
113 struct file *file;
114 dev_t dev = 0;
115 int flags, len;
116
117 flags = vma->vm_flags;
118 file = vma->vm_file;
119
120 if (file) {
121 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
122 dev = inode->i_sb->s_dev;
123 ino = inode->i_ino;
124 }
125
126 seq_printf(m,
127 "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
128 vma->vm_start,
129 vma->vm_end,
130 flags & VM_READ ? 'r' : '-',
131 flags & VM_WRITE ? 'w' : '-',
132 flags & VM_EXEC ? 'x' : '-',
133 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
134 vma->vm_pgoff << PAGE_SHIFT,
135 MAJOR(dev), MINOR(dev), ino, &len);
136
137 if (file) {
138 len = 25 + sizeof(void *) * 6 - len;
139 if (len < 1)
140 len = 1;
141 seq_printf(m, "%*c", len, ' ');
142 seq_path(m, &file->f_path, "");
143 }
144
145 seq_putc(m, '\n');
146 return 0;
147}
148
149/*
108 * display mapping lines for a particular process's /proc/pid/maps 150 * display mapping lines for a particular process's /proc/pid/maps
109 */ 151 */
110static int show_map(struct seq_file *m, void *_vml) 152static int show_map(struct seq_file *m, void *_p)
111{ 153{
112 struct vm_list_struct *vml = _vml; 154 struct rb_node *p = _p;
113 155
114 return nommu_vma_show(m, vml->vma); 156 return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
115} 157}
116 158
117static void *m_start(struct seq_file *m, loff_t *pos) 159static void *m_start(struct seq_file *m, loff_t *pos)
118{ 160{
119 struct proc_maps_private *priv = m->private; 161 struct proc_maps_private *priv = m->private;
120 struct vm_list_struct *vml;
121 struct mm_struct *mm; 162 struct mm_struct *mm;
163 struct rb_node *p;
122 loff_t n = *pos; 164 loff_t n = *pos;
123 165
124 /* pin the task and mm whilst we play with them */ 166 /* pin the task and mm whilst we play with them */
@@ -134,9 +176,9 @@ static void *m_start(struct seq_file *m, loff_t *pos)
134 } 176 }
135 177
136 /* start from the Nth VMA */ 178 /* start from the Nth VMA */
137 for (vml = mm->context.vmlist; vml; vml = vml->next) 179 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
138 if (n-- == 0) 180 if (n-- == 0)
139 return vml; 181 return p;
140 return NULL; 182 return NULL;
141} 183}
142 184
@@ -152,12 +194,12 @@ static void m_stop(struct seq_file *m, void *_vml)
152 } 194 }
153} 195}
154 196
155static void *m_next(struct seq_file *m, void *_vml, loff_t *pos) 197static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
156{ 198{
157 struct vm_list_struct *vml = _vml; 199 struct rb_node *p = _p;
158 200
159 (*pos)++; 201 (*pos)++;
160 return vml ? vml->next : NULL; 202 return p ? rb_next(p) : NULL;
161} 203}
162 204
163static const struct seq_operations proc_pid_maps_ops = { 205static const struct seq_operations proc_pid_maps_ops = {