aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2009-01-08 07:04:47 -0500
committerDavid Howells <dhowells@redhat.com>2009-01-08 07:04:47 -0500
commit8feae13110d60cc6287afabc2887366b0eb226c2 (patch)
treeb3188986faab70e753e00ea8670a11ba8ec844c0
parent41836382ebb415d68d3ebc4525e78e871fe58baf (diff)
NOMMU: Make VMAs per MM as for MMU-mode linux
Make VMAs per mm_struct as for MMU-mode linux. This solves two problems: (1) In SYSV SHM where nattch for a segment does not reflect the number of shmat's (and forks) done. (2) In mmap() where the VMA's vm_mm is set to point to the parent mm by an exec'ing process when VM_EXECUTABLE is specified, regardless of the fact that a VMA might be shared and already have its vm_mm assigned to another process or a dead process. A new struct (vm_region) is introduced to track a mapped region and to remember the circumstances under which it may be shared and the vm_list_struct structure is discarded as it's no longer required. This patch makes the following additional changes: (1) Regions are now allocated with alloc_pages() rather than kmalloc() and with no recourse to __GFP_COMP, so the pages are not composite. Instead, each page has a reference on it held by the region. Anything else that is interested in such a page will have to get a reference on it to retain it. When the pages are released due to unmapping, each page is passed to put_page() and will be freed when the page usage count reaches zero. (2) Excess pages are trimmed after an allocation as the allocation must be made as a power-of-2 quantity of pages. (3) VMAs are added to the parent MM's R/B tree and mmap lists. As an MM may end up with overlapping VMAs within the tree, the VMA struct address is appended to the sort key. (4) Non-anonymous VMAs are now added to the backing inode's prio list. (5) Holes may be punched in anonymous VMAs with munmap(), releasing parts of the backing region. The VMA and region structs will be split if necessary. (6) sys_shmdt() only releases one attachment to a SYSV IPC shared memory segment instead of all the attachments at that addresss. Multiple shmat()'s return the same address under NOMMU-mode instead of different virtual addresses as under MMU-mode. (7) Core dumping for ELF-FDPIC requires fewer exceptions for NOMMU-mode. (8) /proc/maps is now the global list of mapped regions, and may list bits that aren't actually mapped anywhere. (9) /proc/meminfo gains a line (tagged "MmapCopy") that indicates the amount of RAM currently allocated by mmap to hold mappable regions that can't be mapped directly. These are copies of the backing device or file if not anonymous. These changes make NOMMU mode more similar to MMU mode. The downside is that NOMMU mode requires some extra memory to track things over NOMMU without this patch (VMAs are no longer shared, and there are now region structs). Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Mike Frysinger <vapier.adi@gmail.com> Acked-by: Paul Mundt <lethal@linux-sh.org>
-rw-r--r--Documentation/nommu-mmap.txt18
-rw-r--r--arch/arm/include/asm/mmu.h1
-rw-r--r--arch/blackfin/include/asm/mmu.h1
-rw-r--r--arch/blackfin/kernel/ptrace.c6
-rw-r--r--arch/blackfin/kernel/traps.c11
-rw-r--r--arch/frv/kernel/ptrace.c11
-rw-r--r--arch/h8300/include/asm/mmu.h1
-rw-r--r--arch/m68knommu/include/asm/mmu.h1
-rw-r--r--arch/sh/include/asm/mmu.h1
-rw-r--r--fs/binfmt_elf_fdpic.c27
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/meminfo.c6
-rw-r--r--fs/proc/nommu.c71
-rw-r--r--fs/proc/task_nommu.c108
-rw-r--r--include/asm-frv/mmu.h1
-rw-r--r--include/asm-m32r/mmu.h1
-rw-r--r--include/linux/mm.h18
-rw-r--r--include/linux/mm_types.h18
-rw-r--r--ipc/shm.c12
-rw-r--r--kernel/fork.c4
-rw-r--r--lib/Kconfig.debug7
-rw-r--r--mm/mmap.c10
-rw-r--r--mm/nommu.c960
23 files changed, 860 insertions, 436 deletions
diff --git a/Documentation/nommu-mmap.txt b/Documentation/nommu-mmap.txt
index 7714f57caad5..02b89dcf38ac 100644
--- a/Documentation/nommu-mmap.txt
+++ b/Documentation/nommu-mmap.txt
@@ -109,12 +109,18 @@ and it's also much more restricted in the latter case:
109FURTHER NOTES ON NO-MMU MMAP 109FURTHER NOTES ON NO-MMU MMAP
110============================ 110============================
111 111
112 (*) A request for a private mapping of less than a page in size may not return 112 (*) A request for a private mapping of a file may return a buffer that is not
113 a page-aligned buffer. This is because the kernel calls kmalloc() to 113 page-aligned. This is because XIP may take place, and the data may not be
114 allocate the buffer, not get_free_page(). 114 paged aligned in the backing store.
115 115
116 (*) A list of all the mappings on the system is visible through /proc/maps in 116 (*) A request for an anonymous mapping will always be page aligned. If
117 no-MMU mode. 117 possible the size of the request should be a power of two otherwise some
118 of the space may be wasted as the kernel must allocate a power-of-2
119 granule but will only discard the excess if appropriately configured as
120 this has an effect on fragmentation.
121
122 (*) A list of all the private copy and anonymous mappings on the system is
123 visible through /proc/maps in no-MMU mode.
118 124
119 (*) A list of all the mappings in use by a process is visible through 125 (*) A list of all the mappings in use by a process is visible through
120 /proc/<pid>/maps in no-MMU mode. 126 /proc/<pid>/maps in no-MMU mode.
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 53099d4ee421..b561584d04a1 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -24,7 +24,6 @@ typedef struct {
24 * modified for 2.6 by Hyok S. Choi <hyok.choi@samsung.com> 24 * modified for 2.6 by Hyok S. Choi <hyok.choi@samsung.com>
25 */ 25 */
26typedef struct { 26typedef struct {
27 struct vm_list_struct *vmlist;
28 unsigned long end_brk; 27 unsigned long end_brk;
29} mm_context_t; 28} mm_context_t;
30 29
diff --git a/arch/blackfin/include/asm/mmu.h b/arch/blackfin/include/asm/mmu.h
index 757e43906ed4..dbfd686360e6 100644
--- a/arch/blackfin/include/asm/mmu.h
+++ b/arch/blackfin/include/asm/mmu.h
@@ -10,7 +10,6 @@ struct sram_list_struct {
10}; 10};
11 11
12typedef struct { 12typedef struct {
13 struct vm_list_struct *vmlist;
14 unsigned long end_brk; 13 unsigned long end_brk;
15 unsigned long stack_start; 14 unsigned long stack_start;
16 15
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index d2d388536630..594e325b40e4 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -160,15 +160,15 @@ put_reg(struct task_struct *task, int regno, unsigned long data)
160static inline int is_user_addr_valid(struct task_struct *child, 160static inline int is_user_addr_valid(struct task_struct *child,
161 unsigned long start, unsigned long len) 161 unsigned long start, unsigned long len)
162{ 162{
163 struct vm_list_struct *vml; 163 struct vm_area_struct *vma;
164 struct sram_list_struct *sraml; 164 struct sram_list_struct *sraml;
165 165
166 /* overflow */ 166 /* overflow */
167 if (start + len < start) 167 if (start + len < start)
168 return -EIO; 168 return -EIO;
169 169
170 for (vml = child->mm->context.vmlist; vml; vml = vml->next) 170 vma = find_vma(child->mm, start);
171 if (start >= vml->vma->vm_start && start + len < vml->vma->vm_end) 171 if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
172 return 0; 172 return 0;
173 173
174 for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next) 174 for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next)
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 17d8e4172896..5b0667da8d05 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -32,6 +32,7 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/kallsyms.h> 33#include <linux/kallsyms.h>
34#include <linux/fs.h> 34#include <linux/fs.h>
35#include <linux/rbtree.h>
35#include <asm/traps.h> 36#include <asm/traps.h>
36#include <asm/cacheflush.h> 37#include <asm/cacheflush.h>
37#include <asm/cplb.h> 38#include <asm/cplb.h>
@@ -83,6 +84,7 @@ static void decode_address(char *buf, unsigned long address)
83 struct mm_struct *mm; 84 struct mm_struct *mm;
84 unsigned long flags, offset; 85 unsigned long flags, offset;
85 unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic(); 86 unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic();
87 struct rb_node *n;
86 88
87#ifdef CONFIG_KALLSYMS 89#ifdef CONFIG_KALLSYMS
88 unsigned long symsize; 90 unsigned long symsize;
@@ -128,9 +130,10 @@ static void decode_address(char *buf, unsigned long address)
128 if (!mm) 130 if (!mm)
129 continue; 131 continue;
130 132
131 vml = mm->context.vmlist; 133 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
132 while (vml) { 134 struct vm_area_struct *vma;
133 struct vm_area_struct *vma = vml->vma; 135
136 vma = rb_entry(n, struct vm_area_struct, vm_rb);
134 137
135 if (address >= vma->vm_start && address < vma->vm_end) { 138 if (address >= vma->vm_start && address < vma->vm_end) {
136 char _tmpbuf[256]; 139 char _tmpbuf[256];
@@ -176,8 +179,6 @@ static void decode_address(char *buf, unsigned long address)
176 179
177 goto done; 180 goto done;
178 } 181 }
179
180 vml = vml->next;
181 } 182 }
182 if (!in_atomic) 183 if (!in_atomic)
183 mmput(mm); 184 mmput(mm);
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index 709e9bdc6126..5e7d401d21e7 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -69,7 +69,8 @@ static inline int put_reg(struct task_struct *task, int regno,
69} 69}
70 70
71/* 71/*
72 * check that an address falls within the bounds of the target process's memory mappings 72 * check that an address falls within the bounds of the target process's memory
73 * mappings
73 */ 74 */
74static inline int is_user_addr_valid(struct task_struct *child, 75static inline int is_user_addr_valid(struct task_struct *child,
75 unsigned long start, unsigned long len) 76 unsigned long start, unsigned long len)
@@ -79,11 +80,11 @@ static inline int is_user_addr_valid(struct task_struct *child,
79 return -EIO; 80 return -EIO;
80 return 0; 81 return 0;
81#else 82#else
82 struct vm_list_struct *vml; 83 struct vm_area_struct *vma;
83 84
84 for (vml = child->mm->context.vmlist; vml; vml = vml->next) 85 vma = find_vma(child->mm, start);
85 if (start >= vml->vma->vm_start && start + len <= vml->vma->vm_end) 86 if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
86 return 0; 87 return 0;
87 88
88 return -EIO; 89 return -EIO;
89#endif 90#endif
diff --git a/arch/h8300/include/asm/mmu.h b/arch/h8300/include/asm/mmu.h
index 2ce06ea46104..31309969df70 100644
--- a/arch/h8300/include/asm/mmu.h
+++ b/arch/h8300/include/asm/mmu.h
@@ -4,7 +4,6 @@
4/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */ 4/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
5 5
6typedef struct { 6typedef struct {
7 struct vm_list_struct *vmlist;
8 unsigned long end_brk; 7 unsigned long end_brk;
9} mm_context_t; 8} mm_context_t;
10 9
diff --git a/arch/m68knommu/include/asm/mmu.h b/arch/m68knommu/include/asm/mmu.h
index 5fa6b68353ba..e2da1e6f09fe 100644
--- a/arch/m68knommu/include/asm/mmu.h
+++ b/arch/m68knommu/include/asm/mmu.h
@@ -4,7 +4,6 @@
4/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */ 4/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
5 5
6typedef struct { 6typedef struct {
7 struct vm_list_struct *vmlist;
8 unsigned long end_brk; 7 unsigned long end_brk;
9} mm_context_t; 8} mm_context_t;
10 9
diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h
index fdcb93bc6d11..6c43625bb1a5 100644
--- a/arch/sh/include/asm/mmu.h
+++ b/arch/sh/include/asm/mmu.h
@@ -9,7 +9,6 @@ typedef struct {
9 mm_context_id_t id; 9 mm_context_id_t id;
10 void *vdso; 10 void *vdso;
11#else 11#else
12 struct vm_list_struct *vmlist;
13 unsigned long end_brk; 12 unsigned long end_brk;
14#endif 13#endif
15#ifdef CONFIG_BINFMT_ELF_FDPIC 14#ifdef CONFIG_BINFMT_ELF_FDPIC
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index aa5b43205e37..22baf1b13493 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1567,11 +1567,9 @@ end_coredump:
1567static int elf_fdpic_dump_segments(struct file *file, size_t *size, 1567static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1568 unsigned long *limit, unsigned long mm_flags) 1568 unsigned long *limit, unsigned long mm_flags)
1569{ 1569{
1570 struct vm_list_struct *vml; 1570 struct vm_area_struct *vma;
1571
1572 for (vml = current->mm->context.vmlist; vml; vml = vml->next) {
1573 struct vm_area_struct *vma = vml->vma;
1574 1571
1572 for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1575 if (!maydump(vma, mm_flags)) 1573 if (!maydump(vma, mm_flags))
1576 continue; 1574 continue;
1577 1575
@@ -1617,9 +1615,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1617 elf_fpxregset_t *xfpu = NULL; 1615 elf_fpxregset_t *xfpu = NULL;
1618#endif 1616#endif
1619 int thread_status_size = 0; 1617 int thread_status_size = 0;
1620#ifndef CONFIG_MMU
1621 struct vm_list_struct *vml;
1622#endif
1623 elf_addr_t *auxv; 1618 elf_addr_t *auxv;
1624 unsigned long mm_flags; 1619 unsigned long mm_flags;
1625 1620
@@ -1685,13 +1680,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1685 fill_prstatus(prstatus, current, signr); 1680 fill_prstatus(prstatus, current, signr);
1686 elf_core_copy_regs(&prstatus->pr_reg, regs); 1681 elf_core_copy_regs(&prstatus->pr_reg, regs);
1687 1682
1688#ifdef CONFIG_MMU
1689 segs = current->mm->map_count; 1683 segs = current->mm->map_count;
1690#else
1691 segs = 0;
1692 for (vml = current->mm->context.vmlist; vml; vml = vml->next)
1693 segs++;
1694#endif
1695#ifdef ELF_CORE_EXTRA_PHDRS 1684#ifdef ELF_CORE_EXTRA_PHDRS
1696 segs += ELF_CORE_EXTRA_PHDRS; 1685 segs += ELF_CORE_EXTRA_PHDRS;
1697#endif 1686#endif
@@ -1766,20 +1755,10 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1766 mm_flags = current->mm->flags; 1755 mm_flags = current->mm->flags;
1767 1756
1768 /* write program headers for segments dump */ 1757 /* write program headers for segments dump */
1769 for ( 1758 for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1770#ifdef CONFIG_MMU
1771 vma = current->mm->mmap; vma; vma = vma->vm_next
1772#else
1773 vml = current->mm->context.vmlist; vml; vml = vml->next
1774#endif
1775 ) {
1776 struct elf_phdr phdr; 1759 struct elf_phdr phdr;
1777 size_t sz; 1760 size_t sz;
1778 1761
1779#ifndef CONFIG_MMU
1780 vma = vml->vma;
1781#endif
1782
1783 sz = vma->vm_end - vma->vm_start; 1762 sz = vma->vm_end - vma->vm_start;
1784 1763
1785 phdr.p_type = PT_LOAD; 1764 phdr.p_type = PT_LOAD;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3e8aeb8b61ce..cd53ff838498 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -41,8 +41,6 @@ do { \
41 (vmi)->used = 0; \ 41 (vmi)->used = 0; \
42 (vmi)->largest_chunk = 0; \ 42 (vmi)->largest_chunk = 0; \
43} while(0) 43} while(0)
44
45extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
46#endif 44#endif
47 45
48extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, 46extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index b1675c4e66da..43d23948384a 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -74,6 +74,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
74 "LowTotal: %8lu kB\n" 74 "LowTotal: %8lu kB\n"
75 "LowFree: %8lu kB\n" 75 "LowFree: %8lu kB\n"
76#endif 76#endif
77#ifndef CONFIG_MMU
78 "MmapCopy: %8lu kB\n"
79#endif
77 "SwapTotal: %8lu kB\n" 80 "SwapTotal: %8lu kB\n"
78 "SwapFree: %8lu kB\n" 81 "SwapFree: %8lu kB\n"
79 "Dirty: %8lu kB\n" 82 "Dirty: %8lu kB\n"
@@ -116,6 +119,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
116 K(i.totalram-i.totalhigh), 119 K(i.totalram-i.totalhigh),
117 K(i.freeram-i.freehigh), 120 K(i.freeram-i.freehigh),
118#endif 121#endif
122#ifndef CONFIG_MMU
123 K((unsigned long) atomic_read(&mmap_pages_allocated)),
124#endif
119 K(i.totalswap), 125 K(i.totalswap),
120 K(i.freeswap), 126 K(i.freeswap),
121 K(global_page_state(NR_FILE_DIRTY)), 127 K(global_page_state(NR_FILE_DIRTY)),
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 3f87d2632947..b446d7ad0b0d 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -33,33 +33,33 @@
33#include "internal.h" 33#include "internal.h"
34 34
35/* 35/*
36 * display a single VMA to a sequenced file 36 * display a single region to a sequenced file
37 */ 37 */
38int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) 38static int nommu_region_show(struct seq_file *m, struct vm_region *region)
39{ 39{
40 unsigned long ino = 0; 40 unsigned long ino = 0;
41 struct file *file; 41 struct file *file;
42 dev_t dev = 0; 42 dev_t dev = 0;
43 int flags, len; 43 int flags, len;
44 44
45 flags = vma->vm_flags; 45 flags = region->vm_flags;
46 file = vma->vm_file; 46 file = region->vm_file;
47 47
48 if (file) { 48 if (file) {
49 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 49 struct inode *inode = region->vm_file->f_path.dentry->d_inode;
50 dev = inode->i_sb->s_dev; 50 dev = inode->i_sb->s_dev;
51 ino = inode->i_ino; 51 ino = inode->i_ino;
52 } 52 }
53 53
54 seq_printf(m, 54 seq_printf(m,
55 "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", 55 "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
56 vma->vm_start, 56 region->vm_start,
57 vma->vm_end, 57 region->vm_end,
58 flags & VM_READ ? 'r' : '-', 58 flags & VM_READ ? 'r' : '-',
59 flags & VM_WRITE ? 'w' : '-', 59 flags & VM_WRITE ? 'w' : '-',
60 flags & VM_EXEC ? 'x' : '-', 60 flags & VM_EXEC ? 'x' : '-',
61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', 61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
62 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, 62 ((loff_t)region->vm_pgoff) << PAGE_SHIFT,
63 MAJOR(dev), MINOR(dev), ino, &len); 63 MAJOR(dev), MINOR(dev), ino, &len);
64 64
65 if (file) { 65 if (file) {
@@ -75,61 +75,54 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
75} 75}
76 76
77/* 77/*
78 * display a list of all the VMAs the kernel knows about 78 * display a list of all the REGIONs the kernel knows about
79 * - nommu kernals have a single flat list 79 * - nommu kernals have a single flat list
80 */ 80 */
81static int nommu_vma_list_show(struct seq_file *m, void *v) 81static int nommu_region_list_show(struct seq_file *m, void *_p)
82{ 82{
83 struct vm_area_struct *vma; 83 struct rb_node *p = _p;
84 84
85 vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb); 85 return nommu_region_show(m, rb_entry(p, struct vm_region, vm_rb));
86 return nommu_vma_show(m, vma);
87} 86}
88 87
89static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) 88static void *nommu_region_list_start(struct seq_file *m, loff_t *_pos)
90{ 89{
91 struct rb_node *_rb; 90 struct rb_node *p;
92 loff_t pos = *_pos; 91 loff_t pos = *_pos;
93 void *next = NULL;
94 92
95 down_read(&nommu_vma_sem); 93 down_read(&nommu_region_sem);
96 94
97 for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) { 95 for (p = rb_first(&nommu_region_tree); p; p = rb_next(p))
98 if (pos == 0) { 96 if (pos-- == 0)
99 next = _rb; 97 return p;
100 break; 98 return NULL;
101 }
102 pos--;
103 }
104
105 return next;
106} 99}
107 100
108static void nommu_vma_list_stop(struct seq_file *m, void *v) 101static void nommu_region_list_stop(struct seq_file *m, void *v)
109{ 102{
110 up_read(&nommu_vma_sem); 103 up_read(&nommu_region_sem);
111} 104}
112 105
113static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos) 106static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
114{ 107{
115 (*pos)++; 108 (*pos)++;
116 return rb_next((struct rb_node *) v); 109 return rb_next((struct rb_node *) v);
117} 110}
118 111
119static const struct seq_operations proc_nommu_vma_list_seqop = { 112static struct seq_operations proc_nommu_region_list_seqop = {
120 .start = nommu_vma_list_start, 113 .start = nommu_region_list_start,
121 .next = nommu_vma_list_next, 114 .next = nommu_region_list_next,
122 .stop = nommu_vma_list_stop, 115 .stop = nommu_region_list_stop,
123 .show = nommu_vma_list_show 116 .show = nommu_region_list_show
124}; 117};
125 118
126static int proc_nommu_vma_list_open(struct inode *inode, struct file *file) 119static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
127{ 120{
128 return seq_open(file, &proc_nommu_vma_list_seqop); 121 return seq_open(file, &proc_nommu_region_list_seqop);
129} 122}
130 123
131static const struct file_operations proc_nommu_vma_list_operations = { 124static const struct file_operations proc_nommu_region_list_operations = {
132 .open = proc_nommu_vma_list_open, 125 .open = proc_nommu_region_list_open,
133 .read = seq_read, 126 .read = seq_read,
134 .llseek = seq_lseek, 127 .llseek = seq_lseek,
135 .release = seq_release, 128 .release = seq_release,
@@ -137,7 +130,7 @@ static const struct file_operations proc_nommu_vma_list_operations = {
137 130
138static int __init proc_nommu_init(void) 131static int __init proc_nommu_init(void)
139{ 132{
140 proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations); 133 proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
141 return 0; 134 return 0;
142} 135}
143 136
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index d4a8be32b902..ca4a48d0d311 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -15,25 +15,25 @@
15 */ 15 */
16void task_mem(struct seq_file *m, struct mm_struct *mm) 16void task_mem(struct seq_file *m, struct mm_struct *mm)
17{ 17{
18 struct vm_list_struct *vml; 18 struct vm_area_struct *vma;
19 struct rb_node *p;
19 unsigned long bytes = 0, sbytes = 0, slack = 0; 20 unsigned long bytes = 0, sbytes = 0, slack = 0;
20 21
21 down_read(&mm->mmap_sem); 22 down_read(&mm->mmap_sem);
22 for (vml = mm->context.vmlist; vml; vml = vml->next) { 23 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
23 if (!vml->vma) 24 vma = rb_entry(p, struct vm_area_struct, vm_rb);
24 continue;
25 25
26 bytes += kobjsize(vml); 26 bytes += kobjsize(vma);
27 if (atomic_read(&mm->mm_count) > 1 || 27 if (atomic_read(&mm->mm_count) > 1 ||
28 atomic_read(&vml->vma->vm_usage) > 1 28 vma->vm_region ||
29 ) { 29 vma->vm_flags & VM_MAYSHARE) {
30 sbytes += kobjsize((void *) vml->vma->vm_start); 30 sbytes += kobjsize((void *) vma->vm_start);
31 sbytes += kobjsize(vml->vma); 31 if (vma->vm_region)
32 sbytes += kobjsize(vma->vm_region);
32 } else { 33 } else {
33 bytes += kobjsize((void *) vml->vma->vm_start); 34 bytes += kobjsize((void *) vma->vm_start);
34 bytes += kobjsize(vml->vma); 35 slack += kobjsize((void *) vma->vm_start) -
35 slack += kobjsize((void *) vml->vma->vm_start) - 36 (vma->vm_end - vma->vm_start);
36 (vml->vma->vm_end - vml->vma->vm_start);
37 } 37 }
38 } 38 }
39 39
@@ -70,13 +70,14 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
70 70
71unsigned long task_vsize(struct mm_struct *mm) 71unsigned long task_vsize(struct mm_struct *mm)
72{ 72{
73 struct vm_list_struct *tbp; 73 struct vm_area_struct *vma;
74 struct rb_node *p;
74 unsigned long vsize = 0; 75 unsigned long vsize = 0;
75 76
76 down_read(&mm->mmap_sem); 77 down_read(&mm->mmap_sem);
77 for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { 78 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
78 if (tbp->vma) 79 vma = rb_entry(p, struct vm_area_struct, vm_rb);
79 vsize += kobjsize((void *) tbp->vma->vm_start); 80 vsize += vma->vm_region->vm_end - vma->vm_region->vm_start;
80 } 81 }
81 up_read(&mm->mmap_sem); 82 up_read(&mm->mmap_sem);
82 return vsize; 83 return vsize;
@@ -85,16 +86,15 @@ unsigned long task_vsize(struct mm_struct *mm)
85int task_statm(struct mm_struct *mm, int *shared, int *text, 86int task_statm(struct mm_struct *mm, int *shared, int *text,
86 int *data, int *resident) 87 int *data, int *resident)
87{ 88{
88 struct vm_list_struct *tbp; 89 struct vm_area_struct *vma;
90 struct rb_node *p;
89 int size = kobjsize(mm); 91 int size = kobjsize(mm);
90 92
91 down_read(&mm->mmap_sem); 93 down_read(&mm->mmap_sem);
92 for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { 94 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
93 size += kobjsize(tbp); 95 vma = rb_entry(p, struct vm_area_struct, vm_rb);
94 if (tbp->vma) { 96 size += kobjsize(vma);
95 size += kobjsize(tbp->vma); 97 size += kobjsize((void *) vma->vm_start);
96 size += kobjsize((void *) tbp->vma->vm_start);
97 }
98 } 98 }
99 99
100 size += (*text = mm->end_code - mm->start_code); 100 size += (*text = mm->end_code - mm->start_code);
@@ -105,20 +105,62 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
105} 105}
106 106
107/* 107/*
108 * display a single VMA to a sequenced file
109 */
110static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
111{
112 unsigned long ino = 0;
113 struct file *file;
114 dev_t dev = 0;
115 int flags, len;
116
117 flags = vma->vm_flags;
118 file = vma->vm_file;
119
120 if (file) {
121 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
122 dev = inode->i_sb->s_dev;
123 ino = inode->i_ino;
124 }
125
126 seq_printf(m,
127 "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
128 vma->vm_start,
129 vma->vm_end,
130 flags & VM_READ ? 'r' : '-',
131 flags & VM_WRITE ? 'w' : '-',
132 flags & VM_EXEC ? 'x' : '-',
133 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
134 vma->vm_pgoff << PAGE_SHIFT,
135 MAJOR(dev), MINOR(dev), ino, &len);
136
137 if (file) {
138 len = 25 + sizeof(void *) * 6 - len;
139 if (len < 1)
140 len = 1;
141 seq_printf(m, "%*c", len, ' ');
142 seq_path(m, &file->f_path, "");
143 }
144
145 seq_putc(m, '\n');
146 return 0;
147}
148
149/*
108 * display mapping lines for a particular process's /proc/pid/maps 150 * display mapping lines for a particular process's /proc/pid/maps
109 */ 151 */
110static int show_map(struct seq_file *m, void *_vml) 152static int show_map(struct seq_file *m, void *_p)
111{ 153{
112 struct vm_list_struct *vml = _vml; 154 struct rb_node *p = _p;
113 155
114 return nommu_vma_show(m, vml->vma); 156 return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
115} 157}
116 158
117static void *m_start(struct seq_file *m, loff_t *pos) 159static void *m_start(struct seq_file *m, loff_t *pos)
118{ 160{
119 struct proc_maps_private *priv = m->private; 161 struct proc_maps_private *priv = m->private;
120 struct vm_list_struct *vml;
121 struct mm_struct *mm; 162 struct mm_struct *mm;
163 struct rb_node *p;
122 loff_t n = *pos; 164 loff_t n = *pos;
123 165
124 /* pin the task and mm whilst we play with them */ 166 /* pin the task and mm whilst we play with them */
@@ -134,9 +176,9 @@ static void *m_start(struct seq_file *m, loff_t *pos)
134 } 176 }
135 177
136 /* start from the Nth VMA */ 178 /* start from the Nth VMA */
137 for (vml = mm->context.vmlist; vml; vml = vml->next) 179 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
138 if (n-- == 0) 180 if (n-- == 0)
139 return vml; 181 return p;
140 return NULL; 182 return NULL;
141} 183}
142 184
@@ -152,12 +194,12 @@ static void m_stop(struct seq_file *m, void *_vml)
152 } 194 }
153} 195}
154 196
155static void *m_next(struct seq_file *m, void *_vml, loff_t *pos) 197static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
156{ 198{
157 struct vm_list_struct *vml = _vml; 199 struct rb_node *p = _p;
158 200
159 (*pos)++; 201 (*pos)++;
160 return vml ? vml->next : NULL; 202 return p ? rb_next(p) : NULL;
161} 203}
162 204
163static const struct seq_operations proc_pid_maps_ops = { 205static const struct seq_operations proc_pid_maps_ops = {
diff --git a/include/asm-frv/mmu.h b/include/asm-frv/mmu.h
index 22c03714fb14..86ca0e86e7d2 100644
--- a/include/asm-frv/mmu.h
+++ b/include/asm-frv/mmu.h
@@ -22,7 +22,6 @@ typedef struct {
22 unsigned long dtlb_ptd_mapping; /* [DAMR5] PTD mapping for dtlb cached PGE */ 22 unsigned long dtlb_ptd_mapping; /* [DAMR5] PTD mapping for dtlb cached PGE */
23 23
24#else 24#else
25 struct vm_list_struct *vmlist;
26 unsigned long end_brk; 25 unsigned long end_brk;
27 26
28#endif 27#endif
diff --git a/include/asm-m32r/mmu.h b/include/asm-m32r/mmu.h
index d9bd724479cf..150cb92bb666 100644
--- a/include/asm-m32r/mmu.h
+++ b/include/asm-m32r/mmu.h
@@ -4,7 +4,6 @@
4#if !defined(CONFIG_MMU) 4#if !defined(CONFIG_MMU)
5 5
6typedef struct { 6typedef struct {
7 struct vm_list_struct *vmlist;
8 unsigned long end_brk; 7 unsigned long end_brk;
9} mm_context_t; 8} mm_context_t;
10 9
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4a3d28c86443..b91a73fd1bcc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -56,19 +56,9 @@ extern unsigned long mmap_min_addr;
56 56
57extern struct kmem_cache *vm_area_cachep; 57extern struct kmem_cache *vm_area_cachep;
58 58
59/*
60 * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is
61 * disabled, then there's a single shared list of VMAs maintained by the
62 * system, and mm's subscribe to these individually
63 */
64struct vm_list_struct {
65 struct vm_list_struct *next;
66 struct vm_area_struct *vma;
67};
68
69#ifndef CONFIG_MMU 59#ifndef CONFIG_MMU
70extern struct rb_root nommu_vma_tree; 60extern struct rb_root nommu_region_tree;
71extern struct rw_semaphore nommu_vma_sem; 61extern struct rw_semaphore nommu_region_sem;
72 62
73extern unsigned int kobjsize(const void *objp); 63extern unsigned int kobjsize(const void *objp);
74#endif 64#endif
@@ -1061,6 +1051,7 @@ extern void memmap_init_zone(unsigned long, int, unsigned long,
1061 unsigned long, enum memmap_context); 1051 unsigned long, enum memmap_context);
1062extern void setup_per_zone_pages_min(void); 1052extern void setup_per_zone_pages_min(void);
1063extern void mem_init(void); 1053extern void mem_init(void);
1054extern void __init mmap_init(void);
1064extern void show_mem(void); 1055extern void show_mem(void);
1065extern void si_meminfo(struct sysinfo * val); 1056extern void si_meminfo(struct sysinfo * val);
1066extern void si_meminfo_node(struct sysinfo *val, int nid); 1057extern void si_meminfo_node(struct sysinfo *val, int nid);
@@ -1072,6 +1063,9 @@ extern void setup_per_cpu_pageset(void);
1072static inline void setup_per_cpu_pageset(void) {} 1063static inline void setup_per_cpu_pageset(void) {}
1073#endif 1064#endif
1074 1065
1066/* nommu.c */
1067extern atomic_t mmap_pages_allocated;
1068
1075/* prio_tree.c */ 1069/* prio_tree.c */
1076void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); 1070void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
1077void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); 1071void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9cfc9b627fdd..1c1e0d3a1714 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -97,6 +97,22 @@ struct page {
97}; 97};
98 98
99/* 99/*
100 * A region containing a mapping of a non-memory backed file under NOMMU
101 * conditions. These are held in a global tree and are pinned by the VMAs that
102 * map parts of them.
103 */
104struct vm_region {
105 struct rb_node vm_rb; /* link in global region tree */
106 unsigned long vm_flags; /* VMA vm_flags */
107 unsigned long vm_start; /* start address of region */
108 unsigned long vm_end; /* region initialised to here */
109 unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
110 struct file *vm_file; /* the backing file or NULL */
111
112 atomic_t vm_usage; /* region usage count */
113};
114
115/*
100 * This struct defines a memory VMM memory area. There is one of these 116 * This struct defines a memory VMM memory area. There is one of these
101 * per VM-area/task. A VM area is any part of the process virtual memory 117 * per VM-area/task. A VM area is any part of the process virtual memory
102 * space that has a special rule for the page-fault handlers (ie a shared 118 * space that has a special rule for the page-fault handlers (ie a shared
@@ -152,7 +168,7 @@ struct vm_area_struct {
152 unsigned long vm_truncate_count;/* truncate_count or restart_addr */ 168 unsigned long vm_truncate_count;/* truncate_count or restart_addr */
153 169
154#ifndef CONFIG_MMU 170#ifndef CONFIG_MMU
155 atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */ 171 struct vm_region *vm_region; /* NOMMU mapping region */
156#endif 172#endif
157#ifdef CONFIG_NUMA 173#ifdef CONFIG_NUMA
158 struct mempolicy *vm_policy; /* NUMA policy for the VMA */ 174 struct mempolicy *vm_policy; /* NUMA policy for the VMA */
diff --git a/ipc/shm.c b/ipc/shm.c
index b125b560240e..d0ab5527bf45 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -990,6 +990,7 @@ asmlinkage long sys_shmdt(char __user *shmaddr)
990 */ 990 */
991 vma = find_vma(mm, addr); 991 vma = find_vma(mm, addr);
992 992
993#ifdef CONFIG_MMU
993 while (vma) { 994 while (vma) {
994 next = vma->vm_next; 995 next = vma->vm_next;
995 996
@@ -1034,6 +1035,17 @@ asmlinkage long sys_shmdt(char __user *shmaddr)
1034 vma = next; 1035 vma = next;
1035 } 1036 }
1036 1037
1038#else /* CONFIG_MMU */
1039 /* under NOMMU conditions, the exact address to be destroyed must be
1040 * given */
1041 retval = -EINVAL;
1042 if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1043 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1044 retval = 0;
1045 }
1046
1047#endif
1048
1037 up_write(&mm->mmap_sem); 1049 up_write(&mm->mmap_sem);
1038 return retval; 1050 return retval;
1039} 1051}
diff --git a/kernel/fork.c b/kernel/fork.c
index 7b8f2a78be3d..0bce4a43bb37 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1481,12 +1481,10 @@ void __init proc_caches_init(void)
1481 fs_cachep = kmem_cache_create("fs_cache", 1481 fs_cachep = kmem_cache_create("fs_cache",
1482 sizeof(struct fs_struct), 0, 1482 sizeof(struct fs_struct), 0,
1483 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1483 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1484 vm_area_cachep = kmem_cache_create("vm_area_struct",
1485 sizeof(struct vm_area_struct), 0,
1486 SLAB_PANIC, NULL);
1487 mm_cachep = kmem_cache_create("mm_struct", 1484 mm_cachep = kmem_cache_create("mm_struct",
1488 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1485 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1489 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1486 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1487 mmap_init();
1490} 1488}
1491 1489
1492/* 1490/*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2e75478e9c69..d0a32aab03ff 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -512,6 +512,13 @@ config DEBUG_VIRTUAL
512 512
513 If unsure, say N. 513 If unsure, say N.
514 514
515config DEBUG_NOMMU_REGIONS
516 bool "Debug the global anon/private NOMMU mapping region tree"
517 depends on DEBUG_KERNEL && !MMU
518 help
519 This option causes the global tree of anonymous and private mapping
520 regions to be regularly checked for invalid topology.
521
515config DEBUG_WRITECOUNT 522config DEBUG_WRITECOUNT
516 bool "Debug filesystem writers count" 523 bool "Debug filesystem writers count"
517 depends on DEBUG_KERNEL 524 depends on DEBUG_KERNEL
diff --git a/mm/mmap.c b/mm/mmap.c
index a910c045cfd4..749623196cb9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2472,3 +2472,13 @@ void mm_drop_all_locks(struct mm_struct *mm)
2472 2472
2473 mutex_unlock(&mm_all_locks_mutex); 2473 mutex_unlock(&mm_all_locks_mutex);
2474} 2474}
2475
2476/*
2477 * initialise the VMA slab
2478 */
2479void __init mmap_init(void)
2480{
2481 vm_area_cachep = kmem_cache_create("vm_area_struct",
2482 sizeof(struct vm_area_struct), 0,
2483 SLAB_PANIC, NULL);
2484}
diff --git a/mm/nommu.c b/mm/nommu.c
index 23f355bbe262..0d363dfcf10e 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -6,7 +6,7 @@
6 * 6 *
7 * See Documentation/nommu-mmap.txt 7 * See Documentation/nommu-mmap.txt
8 * 8 *
9 * Copyright (c) 2004-2005 David Howells <dhowells@redhat.com> 9 * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com>
10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
@@ -33,6 +33,28 @@
33#include <asm/uaccess.h> 33#include <asm/uaccess.h>
34#include <asm/tlb.h> 34#include <asm/tlb.h>
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include "internal.h"
37
38static inline __attribute__((format(printf, 1, 2)))
39void no_printk(const char *fmt, ...)
40{
41}
42
43#if 0
44#define kenter(FMT, ...) \
45 printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
46#define kleave(FMT, ...) \
47 printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
48#define kdebug(FMT, ...) \
49 printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__)
50#else
51#define kenter(FMT, ...) \
52 no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
53#define kleave(FMT, ...) \
54 no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
55#define kdebug(FMT, ...) \
56 no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__)
57#endif
36 58
37#include "internal.h" 59#include "internal.h"
38 60
@@ -46,12 +68,15 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */
46int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 68int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
47int heap_stack_gap = 0; 69int heap_stack_gap = 0;
48 70
71atomic_t mmap_pages_allocated;
72
49EXPORT_SYMBOL(mem_map); 73EXPORT_SYMBOL(mem_map);
50EXPORT_SYMBOL(num_physpages); 74EXPORT_SYMBOL(num_physpages);
51 75
52/* list of shareable VMAs */ 76/* list of mapped, potentially shareable regions */
53struct rb_root nommu_vma_tree = RB_ROOT; 77static struct kmem_cache *vm_region_jar;
54DECLARE_RWSEM(nommu_vma_sem); 78struct rb_root nommu_region_tree = RB_ROOT;
79DECLARE_RWSEM(nommu_region_sem);
55 80
56struct vm_operations_struct generic_file_vm_ops = { 81struct vm_operations_struct generic_file_vm_ops = {
57}; 82};
@@ -400,129 +425,174 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
400 return mm->brk = brk; 425 return mm->brk = brk;
401} 426}
402 427
403#ifdef DEBUG 428/*
404static void show_process_blocks(void) 429 * initialise the VMA and region record slabs
430 */
431void __init mmap_init(void)
405{ 432{
406 struct vm_list_struct *vml; 433 vm_region_jar = kmem_cache_create("vm_region_jar",
407 434 sizeof(struct vm_region), 0,
408 printk("Process blocks %d:", current->pid); 435 SLAB_PANIC, NULL);
409 436 vm_area_cachep = kmem_cache_create("vm_area_struct",
410 for (vml = &current->mm->context.vmlist; vml; vml = vml->next) { 437 sizeof(struct vm_area_struct), 0,
411 printk(" %p: %p", vml, vml->vma); 438 SLAB_PANIC, NULL);
412 if (vml->vma)
413 printk(" (%d @%lx #%d)",
414 kobjsize((void *) vml->vma->vm_start),
415 vml->vma->vm_start,
416 atomic_read(&vml->vma->vm_usage));
417 printk(vml->next ? " ->" : ".\n");
418 }
419} 439}
420#endif /* DEBUG */
421 440
422/* 441/*
423 * add a VMA into a process's mm_struct in the appropriate place in the list 442 * validate the region tree
424 * - should be called with mm->mmap_sem held writelocked 443 * - the caller must hold the region lock
425 */ 444 */
426static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml) 445#ifdef CONFIG_DEBUG_NOMMU_REGIONS
446static noinline void validate_nommu_regions(void)
427{ 447{
428 struct vm_list_struct **ppv; 448 struct vm_region *region, *last;
449 struct rb_node *p, *lastp;
429 450
430 for (ppv = &current->mm->context.vmlist; *ppv; ppv = &(*ppv)->next) 451 lastp = rb_first(&nommu_region_tree);
431 if ((*ppv)->vma->vm_start > vml->vma->vm_start) 452 if (!lastp)
432 break; 453 return;
454
455 last = rb_entry(lastp, struct vm_region, vm_rb);
456 if (unlikely(last->vm_end <= last->vm_start))
457 BUG();
458
459 while ((p = rb_next(lastp))) {
460 region = rb_entry(p, struct vm_region, vm_rb);
461 last = rb_entry(lastp, struct vm_region, vm_rb);
462
463 if (unlikely(region->vm_end <= region->vm_start))
464 BUG();
465 if (unlikely(region->vm_start < last->vm_end))
466 BUG();
433 467
434 vml->next = *ppv; 468 lastp = p;
435 *ppv = vml; 469 }
436} 470}
471#else
472#define validate_nommu_regions() do {} while(0)
473#endif
437 474
438/* 475/*
439 * look up the first VMA in which addr resides, NULL if none 476 * add a region into the global tree
440 * - should be called with mm->mmap_sem at least held readlocked
441 */ 477 */
442struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 478static void add_nommu_region(struct vm_region *region)
443{ 479{
444 struct vm_list_struct *loop, *vml; 480 struct vm_region *pregion;
481 struct rb_node **p, *parent;
445 482
446 /* search the vm_start ordered list */ 483 validate_nommu_regions();
447 vml = NULL; 484
448 for (loop = mm->context.vmlist; loop; loop = loop->next) { 485 BUG_ON(region->vm_start & ~PAGE_MASK);
449 if (loop->vma->vm_start > addr) 486
450 break; 487 parent = NULL;
451 vml = loop; 488 p = &nommu_region_tree.rb_node;
489 while (*p) {
490 parent = *p;
491 pregion = rb_entry(parent, struct vm_region, vm_rb);
492 if (region->vm_start < pregion->vm_start)
493 p = &(*p)->rb_left;
494 else if (region->vm_start > pregion->vm_start)
495 p = &(*p)->rb_right;
496 else if (pregion == region)
497 return;
498 else
499 BUG();
452 } 500 }
453 501
454 if (vml && vml->vma->vm_end > addr) 502 rb_link_node(&region->vm_rb, parent, p);
455 return vml->vma; 503 rb_insert_color(&region->vm_rb, &nommu_region_tree);
456 504
457 return NULL; 505 validate_nommu_regions();
458} 506}
459EXPORT_SYMBOL(find_vma);
460 507
461/* 508/*
462 * find a VMA 509 * delete a region from the global tree
463 * - we don't extend stack VMAs under NOMMU conditions
464 */ 510 */
465struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 511static void delete_nommu_region(struct vm_region *region)
466{ 512{
467 return find_vma(mm, addr); 513 BUG_ON(!nommu_region_tree.rb_node);
468}
469 514
470int expand_stack(struct vm_area_struct *vma, unsigned long address) 515 validate_nommu_regions();
471{ 516 rb_erase(&region->vm_rb, &nommu_region_tree);
472 return -ENOMEM; 517 validate_nommu_regions();
473} 518}
474 519
475/* 520/*
476 * look up the first VMA exactly that exactly matches addr 521 * free a contiguous series of pages
477 * - should be called with mm->mmap_sem at least held readlocked
478 */ 522 */
479static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 523static void free_page_series(unsigned long from, unsigned long to)
480 unsigned long addr)
481{ 524{
482 struct vm_list_struct *vml; 525 for (; from < to; from += PAGE_SIZE) {
483 526 struct page *page = virt_to_page(from);
484 /* search the vm_start ordered list */ 527
485 for (vml = mm->context.vmlist; vml; vml = vml->next) { 528 kdebug("- free %lx", from);
486 if (vml->vma->vm_start == addr) 529 atomic_dec(&mmap_pages_allocated);
487 return vml->vma; 530 if (page_count(page) != 1)
488 if (vml->vma->vm_start > addr) 531 kdebug("free page %p [%d]", page, page_count(page));
489 break; 532 put_page(page);
490 } 533 }
491
492 return NULL;
493} 534}
494 535
495/* 536/*
496 * find a VMA in the global tree 537 * release a reference to a region
538 * - the caller must hold the region semaphore, which this releases
539 * - the region may not have been added to the tree yet, in which case vm_end
540 * will equal vm_start
497 */ 541 */
498static inline struct vm_area_struct *find_nommu_vma(unsigned long start) 542static void __put_nommu_region(struct vm_region *region)
543 __releases(nommu_region_sem)
499{ 544{
500 struct vm_area_struct *vma; 545 kenter("%p{%d}", region, atomic_read(&region->vm_usage));
501 struct rb_node *n = nommu_vma_tree.rb_node;
502 546
503 while (n) { 547 BUG_ON(!nommu_region_tree.rb_node);
504 vma = rb_entry(n, struct vm_area_struct, vm_rb);
505 548
506 if (start < vma->vm_start) 549 if (atomic_dec_and_test(&region->vm_usage)) {
507 n = n->rb_left; 550 if (region->vm_end > region->vm_start)
508 else if (start > vma->vm_start) 551 delete_nommu_region(region);
509 n = n->rb_right; 552 up_write(&nommu_region_sem);
510 else 553
511 return vma; 554 if (region->vm_file)
555 fput(region->vm_file);
556
557 /* IO memory and memory shared directly out of the pagecache
558 * from ramfs/tmpfs mustn't be released here */
559 if (region->vm_flags & VM_MAPPED_COPY) {
560 kdebug("free series");
561 free_page_series(region->vm_start, region->vm_end);
562 }
563 kmem_cache_free(vm_region_jar, region);
564 } else {
565 up_write(&nommu_region_sem);
512 } 566 }
567}
513 568
514 return NULL; 569/*
570 * release a reference to a region
571 */
572static void put_nommu_region(struct vm_region *region)
573{
574 down_write(&nommu_region_sem);
575 __put_nommu_region(region);
515} 576}
516 577
517/* 578/*
518 * add a VMA in the global tree 579 * add a VMA into a process's mm_struct in the appropriate place in the list
580 * and tree and add to the address space's page tree also if not an anonymous
581 * page
582 * - should be called with mm->mmap_sem held writelocked
519 */ 583 */
520static void add_nommu_vma(struct vm_area_struct *vma) 584static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
521{ 585{
522 struct vm_area_struct *pvma; 586 struct vm_area_struct *pvma, **pp;
523 struct address_space *mapping; 587 struct address_space *mapping;
524 struct rb_node **p = &nommu_vma_tree.rb_node; 588 struct rb_node **p, *parent;
525 struct rb_node *parent = NULL; 589
590 kenter(",%p", vma);
591
592 BUG_ON(!vma->vm_region);
593
594 mm->map_count++;
595 vma->vm_mm = mm;
526 596
527 /* add the VMA to the mapping */ 597 /* add the VMA to the mapping */
528 if (vma->vm_file) { 598 if (vma->vm_file) {
@@ -533,42 +603,62 @@ static void add_nommu_vma(struct vm_area_struct *vma)
533 flush_dcache_mmap_unlock(mapping); 603 flush_dcache_mmap_unlock(mapping);
534 } 604 }
535 605
536 /* add the VMA to the master list */ 606 /* add the VMA to the tree */
607 parent = NULL;
608 p = &mm->mm_rb.rb_node;
537 while (*p) { 609 while (*p) {
538 parent = *p; 610 parent = *p;
539 pvma = rb_entry(parent, struct vm_area_struct, vm_rb); 611 pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
540 612
541 if (vma->vm_start < pvma->vm_start) { 613 /* sort by: start addr, end addr, VMA struct addr in that order
614 * (the latter is necessary as we may get identical VMAs) */
615 if (vma->vm_start < pvma->vm_start)
542 p = &(*p)->rb_left; 616 p = &(*p)->rb_left;
543 } 617 else if (vma->vm_start > pvma->vm_start)
544 else if (vma->vm_start > pvma->vm_start) {
545 p = &(*p)->rb_right; 618 p = &(*p)->rb_right;
546 } 619 else if (vma->vm_end < pvma->vm_end)
547 else { 620 p = &(*p)->rb_left;
548 /* mappings are at the same address - this can only 621 else if (vma->vm_end > pvma->vm_end)
549 * happen for shared-mem chardevs and shared file 622 p = &(*p)->rb_right;
550 * mappings backed by ramfs/tmpfs */ 623 else if (vma < pvma)
551 BUG_ON(!(pvma->vm_flags & VM_SHARED)); 624 p = &(*p)->rb_left;
552 625 else if (vma > pvma)
553 if (vma < pvma) 626 p = &(*p)->rb_right;
554 p = &(*p)->rb_left; 627 else
555 else if (vma > pvma) 628 BUG();
556 p = &(*p)->rb_right;
557 else
558 BUG();
559 }
560 } 629 }
561 630
562 rb_link_node(&vma->vm_rb, parent, p); 631 rb_link_node(&vma->vm_rb, parent, p);
563 rb_insert_color(&vma->vm_rb, &nommu_vma_tree); 632 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
633
634 /* add VMA to the VMA list also */
635 for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) {
636 if (pvma->vm_start > vma->vm_start)
637 break;
638 if (pvma->vm_start < vma->vm_start)
639 continue;
640 if (pvma->vm_end < vma->vm_end)
641 break;
642 }
643
644 vma->vm_next = *pp;
645 *pp = vma;
564} 646}
565 647
566/* 648/*
567 * delete a VMA from the global list 649 * delete a VMA from its owning mm_struct and address space
568 */ 650 */
569static void delete_nommu_vma(struct vm_area_struct *vma) 651static void delete_vma_from_mm(struct vm_area_struct *vma)
570{ 652{
653 struct vm_area_struct **pp;
571 struct address_space *mapping; 654 struct address_space *mapping;
655 struct mm_struct *mm = vma->vm_mm;
656
657 kenter("%p", vma);
658
659 mm->map_count--;
660 if (mm->mmap_cache == vma)
661 mm->mmap_cache = NULL;
572 662
573 /* remove the VMA from the mapping */ 663 /* remove the VMA from the mapping */
574 if (vma->vm_file) { 664 if (vma->vm_file) {
@@ -579,8 +669,115 @@ static void delete_nommu_vma(struct vm_area_struct *vma)
579 flush_dcache_mmap_unlock(mapping); 669 flush_dcache_mmap_unlock(mapping);
580 } 670 }
581 671
582 /* remove from the master list */ 672 /* remove from the MM's tree and list */
583 rb_erase(&vma->vm_rb, &nommu_vma_tree); 673 rb_erase(&vma->vm_rb, &mm->mm_rb);
674 for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) {
675 if (*pp == vma) {
676 *pp = vma->vm_next;
677 break;
678 }
679 }
680
681 vma->vm_mm = NULL;
682}
683
684/*
685 * destroy a VMA record
686 */
687static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
688{
689 kenter("%p", vma);
690 if (vma->vm_ops && vma->vm_ops->close)
691 vma->vm_ops->close(vma);
692 if (vma->vm_file) {
693 fput(vma->vm_file);
694 if (vma->vm_flags & VM_EXECUTABLE)
695 removed_exe_file_vma(mm);
696 }
697 put_nommu_region(vma->vm_region);
698 kmem_cache_free(vm_area_cachep, vma);
699}
700
701/*
702 * look up the first VMA in which addr resides, NULL if none
703 * - should be called with mm->mmap_sem at least held readlocked
704 */
705struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
706{
707 struct vm_area_struct *vma;
708 struct rb_node *n = mm->mm_rb.rb_node;
709
710 /* check the cache first */
711 vma = mm->mmap_cache;
712 if (vma && vma->vm_start <= addr && vma->vm_end > addr)
713 return vma;
714
715 /* trawl the tree (there may be multiple mappings in which addr
716 * resides) */
717 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
718 vma = rb_entry(n, struct vm_area_struct, vm_rb);
719 if (vma->vm_start > addr)
720 return NULL;
721 if (vma->vm_end > addr) {
722 mm->mmap_cache = vma;
723 return vma;
724 }
725 }
726
727 return NULL;
728}
729EXPORT_SYMBOL(find_vma);
730
731/*
732 * find a VMA
733 * - we don't extend stack VMAs under NOMMU conditions
734 */
735struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
736{
737 return find_vma(mm, addr);
738}
739
740/*
741 * expand a stack to a given address
742 * - not supported under NOMMU conditions
743 */
744int expand_stack(struct vm_area_struct *vma, unsigned long address)
745{
746 return -ENOMEM;
747}
748
749/*
750 * look up the first VMA exactly that exactly matches addr
751 * - should be called with mm->mmap_sem at least held readlocked
752 */
753static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
754 unsigned long addr,
755 unsigned long len)
756{
757 struct vm_area_struct *vma;
758 struct rb_node *n = mm->mm_rb.rb_node;
759 unsigned long end = addr + len;
760
761 /* check the cache first */
762 vma = mm->mmap_cache;
763 if (vma && vma->vm_start == addr && vma->vm_end == end)
764 return vma;
765
766 /* trawl the tree (there may be multiple mappings in which addr
767 * resides) */
768 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
769 vma = rb_entry(n, struct vm_area_struct, vm_rb);
770 if (vma->vm_start < addr)
771 continue;
772 if (vma->vm_start > addr)
773 return NULL;
774 if (vma->vm_end == end) {
775 mm->mmap_cache = vma;
776 return vma;
777 }
778 }
779
780 return NULL;
584} 781}
585 782
586/* 783/*
@@ -595,7 +792,7 @@ static int validate_mmap_request(struct file *file,
595 unsigned long pgoff, 792 unsigned long pgoff,
596 unsigned long *_capabilities) 793 unsigned long *_capabilities)
597{ 794{
598 unsigned long capabilities; 795 unsigned long capabilities, rlen;
599 unsigned long reqprot = prot; 796 unsigned long reqprot = prot;
600 int ret; 797 int ret;
601 798
@@ -615,12 +812,12 @@ static int validate_mmap_request(struct file *file,
615 return -EINVAL; 812 return -EINVAL;
616 813
617 /* Careful about overflows.. */ 814 /* Careful about overflows.. */
618 len = PAGE_ALIGN(len); 815 rlen = PAGE_ALIGN(len);
619 if (!len || len > TASK_SIZE) 816 if (!rlen || rlen > TASK_SIZE)
620 return -ENOMEM; 817 return -ENOMEM;
621 818
622 /* offset overflow? */ 819 /* offset overflow? */
623 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) 820 if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff)
624 return -EOVERFLOW; 821 return -EOVERFLOW;
625 822
626 if (file) { 823 if (file) {
@@ -794,9 +991,10 @@ static unsigned long determine_vm_flags(struct file *file,
794} 991}
795 992
796/* 993/*
797 * set up a shared mapping on a file 994 * set up a shared mapping on a file (the driver or filesystem provides and
995 * pins the storage)
798 */ 996 */
799static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len) 997static int do_mmap_shared_file(struct vm_area_struct *vma)
800{ 998{
801 int ret; 999 int ret;
802 1000
@@ -814,10 +1012,14 @@ static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len)
814/* 1012/*
815 * set up a private mapping or an anonymous shared mapping 1013 * set up a private mapping or an anonymous shared mapping
816 */ 1014 */
817static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) 1015static int do_mmap_private(struct vm_area_struct *vma,
1016 struct vm_region *region,
1017 unsigned long len)
818{ 1018{
1019 struct page *pages;
1020 unsigned long total, point, n, rlen;
819 void *base; 1021 void *base;
820 int ret; 1022 int ret, order;
821 1023
822 /* invoke the file's mapping function so that it can keep track of 1024 /* invoke the file's mapping function so that it can keep track of
823 * shared mappings on devices or memory 1025 * shared mappings on devices or memory
@@ -836,23 +1038,46 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
836 * make a private copy of the data and map that instead */ 1038 * make a private copy of the data and map that instead */
837 } 1039 }
838 1040
1041 rlen = PAGE_ALIGN(len);
1042
839 /* allocate some memory to hold the mapping 1043 /* allocate some memory to hold the mapping
840 * - note that this may not return a page-aligned address if the object 1044 * - note that this may not return a page-aligned address if the object
841 * we're allocating is smaller than a page 1045 * we're allocating is smaller than a page
842 */ 1046 */
843 base = kmalloc(len, GFP_KERNEL|__GFP_COMP); 1047 order = get_order(rlen);
844 if (!base) 1048 kdebug("alloc order %d for %lx", order, len);
1049
1050 pages = alloc_pages(GFP_KERNEL, order);
1051 if (!pages)
845 goto enomem; 1052 goto enomem;
846 1053
847 vma->vm_start = (unsigned long) base; 1054 /* we allocated a power-of-2 sized page set, so we need to trim off the
848 vma->vm_end = vma->vm_start + len; 1055 * excess */
849 vma->vm_flags |= VM_MAPPED_COPY; 1056 total = 1 << order;
1057 atomic_add(total, &mmap_pages_allocated);
1058
1059 point = rlen >> PAGE_SHIFT;
1060 while (total > point) {
1061 order = ilog2(total - point);
1062 n = 1 << order;
1063 kdebug("shave %lu/%lu @%lu", n, total - point, total);
1064 atomic_sub(n, &mmap_pages_allocated);
1065 total -= n;
1066 set_page_refcounted(pages + total);
1067 __free_pages(pages + total, order);
1068 }
1069
1070 total = rlen >> PAGE_SHIFT;
1071 for (point = 1; point < total; point++)
1072 set_page_refcounted(&pages[point]);
850 1073
851#ifdef WARN_ON_SLACK 1074 base = page_address(pages);
852 if (len + WARN_ON_SLACK <= kobjsize(result)) 1075 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
853 printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", 1076 region->vm_start = (unsigned long) base;
854 len, current->pid, kobjsize(result) - len); 1077 region->vm_end = region->vm_start + rlen;
855#endif 1078
1079 vma->vm_start = region->vm_start;
1080 vma->vm_end = region->vm_start + len;
856 1081
857 if (vma->vm_file) { 1082 if (vma->vm_file) {
858 /* read the contents of a file into the copy */ 1083 /* read the contents of a file into the copy */
@@ -864,26 +1089,27 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
864 1089
865 old_fs = get_fs(); 1090 old_fs = get_fs();
866 set_fs(KERNEL_DS); 1091 set_fs(KERNEL_DS);
867 ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos); 1092 ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos);
868 set_fs(old_fs); 1093 set_fs(old_fs);
869 1094
870 if (ret < 0) 1095 if (ret < 0)
871 goto error_free; 1096 goto error_free;
872 1097
873 /* clear the last little bit */ 1098 /* clear the last little bit */
874 if (ret < len) 1099 if (ret < rlen)
875 memset(base + ret, 0, len - ret); 1100 memset(base + ret, 0, rlen - ret);
876 1101
877 } else { 1102 } else {
878 /* if it's an anonymous mapping, then just clear it */ 1103 /* if it's an anonymous mapping, then just clear it */
879 memset(base, 0, len); 1104 memset(base, 0, rlen);
880 } 1105 }
881 1106
882 return 0; 1107 return 0;
883 1108
884error_free: 1109error_free:
885 kfree(base); 1110 free_page_series(region->vm_start, region->vm_end);
886 vma->vm_start = 0; 1111 region->vm_start = vma->vm_start = 0;
1112 region->vm_end = vma->vm_end = 0;
887 return ret; 1113 return ret;
888 1114
889enomem: 1115enomem:
@@ -903,13 +1129,14 @@ unsigned long do_mmap_pgoff(struct file *file,
903 unsigned long flags, 1129 unsigned long flags,
904 unsigned long pgoff) 1130 unsigned long pgoff)
905{ 1131{
906 struct vm_list_struct *vml = NULL; 1132 struct vm_area_struct *vma;
907 struct vm_area_struct *vma = NULL; 1133 struct vm_region *region;
908 struct rb_node *rb; 1134 struct rb_node *rb;
909 unsigned long capabilities, vm_flags; 1135 unsigned long capabilities, vm_flags, result;
910 void *result;
911 int ret; 1136 int ret;
912 1137
1138 kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
1139
913 if (!(flags & MAP_FIXED)) 1140 if (!(flags & MAP_FIXED))
914 addr = round_hint_to_min(addr); 1141 addr = round_hint_to_min(addr);
915 1142
@@ -917,73 +1144,120 @@ unsigned long do_mmap_pgoff(struct file *file,
917 * mapping */ 1144 * mapping */
918 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1145 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
919 &capabilities); 1146 &capabilities);
920 if (ret < 0) 1147 if (ret < 0) {
1148 kleave(" = %d [val]", ret);
921 return ret; 1149 return ret;
1150 }
922 1151
923 /* we've determined that we can make the mapping, now translate what we 1152 /* we've determined that we can make the mapping, now translate what we
924 * now know into VMA flags */ 1153 * now know into VMA flags */
925 vm_flags = determine_vm_flags(file, prot, flags, capabilities); 1154 vm_flags = determine_vm_flags(file, prot, flags, capabilities);
926 1155
927 /* we're going to need to record the mapping if it works */ 1156 /* we're going to need to record the mapping */
928 vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL); 1157 region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
929 if (!vml) 1158 if (!region)
930 goto error_getting_vml; 1159 goto error_getting_region;
1160
1161 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1162 if (!vma)
1163 goto error_getting_vma;
1164
1165 atomic_set(&region->vm_usage, 1);
1166 region->vm_flags = vm_flags;
1167 region->vm_pgoff = pgoff;
931 1168
932 down_write(&nommu_vma_sem); 1169 INIT_LIST_HEAD(&vma->anon_vma_node);
1170 vma->vm_flags = vm_flags;
1171 vma->vm_pgoff = pgoff;
933 1172
934 /* if we want to share, we need to check for VMAs created by other 1173 if (file) {
1174 region->vm_file = file;
1175 get_file(file);
1176 vma->vm_file = file;
1177 get_file(file);
1178 if (vm_flags & VM_EXECUTABLE) {
1179 added_exe_file_vma(current->mm);
1180 vma->vm_mm = current->mm;
1181 }
1182 }
1183
1184 down_write(&nommu_region_sem);
1185
1186 /* if we want to share, we need to check for regions created by other
935 * mmap() calls that overlap with our proposed mapping 1187 * mmap() calls that overlap with our proposed mapping
936 * - we can only share with an exact match on most regular files 1188 * - we can only share with a superset match on most regular files
937 * - shared mappings on character devices and memory backed files are 1189 * - shared mappings on character devices and memory backed files are
938 * permitted to overlap inexactly as far as we are concerned for in 1190 * permitted to overlap inexactly as far as we are concerned for in
939 * these cases, sharing is handled in the driver or filesystem rather 1191 * these cases, sharing is handled in the driver or filesystem rather
940 * than here 1192 * than here
941 */ 1193 */
942 if (vm_flags & VM_MAYSHARE) { 1194 if (vm_flags & VM_MAYSHARE) {
943 unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1195 struct vm_region *pregion;
944 unsigned long vmpglen; 1196 unsigned long pglen, rpglen, pgend, rpgend, start;
945 1197
946 /* suppress VMA sharing for shared regions */ 1198 pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
947 if (vm_flags & VM_SHARED && 1199 pgend = pgoff + pglen;
948 capabilities & BDI_CAP_MAP_DIRECT)
949 goto dont_share_VMAs;
950 1200
951 for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { 1201 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
952 vma = rb_entry(rb, struct vm_area_struct, vm_rb); 1202 pregion = rb_entry(rb, struct vm_region, vm_rb);
953 1203
954 if (!(vma->vm_flags & VM_MAYSHARE)) 1204 if (!(pregion->vm_flags & VM_MAYSHARE))
955 continue; 1205 continue;
956 1206
957 /* search for overlapping mappings on the same file */ 1207 /* search for overlapping mappings on the same file */
958 if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode) 1208 if (pregion->vm_file->f_path.dentry->d_inode !=
1209 file->f_path.dentry->d_inode)
959 continue; 1210 continue;
960 1211
961 if (vma->vm_pgoff >= pgoff + pglen) 1212 if (pregion->vm_pgoff >= pgend)
962 continue; 1213 continue;
963 1214
964 vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1; 1215 rpglen = pregion->vm_end - pregion->vm_start;
965 vmpglen >>= PAGE_SHIFT; 1216 rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT;
966 if (pgoff >= vma->vm_pgoff + vmpglen) 1217 rpgend = pregion->vm_pgoff + rpglen;
1218 if (pgoff >= rpgend)
967 continue; 1219 continue;
968 1220
969 /* handle inexactly overlapping matches between mappings */ 1221 /* handle inexactly overlapping matches between
970 if (vma->vm_pgoff != pgoff || vmpglen != pglen) { 1222 * mappings */
1223 if ((pregion->vm_pgoff != pgoff || rpglen != pglen) &&
1224 !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) {
1225 /* new mapping is not a subset of the region */
971 if (!(capabilities & BDI_CAP_MAP_DIRECT)) 1226 if (!(capabilities & BDI_CAP_MAP_DIRECT))
972 goto sharing_violation; 1227 goto sharing_violation;
973 continue; 1228 continue;
974 } 1229 }
975 1230
976 /* we've found a VMA we can share */ 1231 /* we've found a region we can share */
977 atomic_inc(&vma->vm_usage); 1232 atomic_inc(&pregion->vm_usage);
978 1233 vma->vm_region = pregion;
979 vml->vma = vma; 1234 start = pregion->vm_start;
980 result = (void *) vma->vm_start; 1235 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
981 goto shared; 1236 vma->vm_start = start;
1237 vma->vm_end = start + len;
1238
1239 if (pregion->vm_flags & VM_MAPPED_COPY) {
1240 kdebug("share copy");
1241 vma->vm_flags |= VM_MAPPED_COPY;
1242 } else {
1243 kdebug("share mmap");
1244 ret = do_mmap_shared_file(vma);
1245 if (ret < 0) {
1246 vma->vm_region = NULL;
1247 vma->vm_start = 0;
1248 vma->vm_end = 0;
1249 atomic_dec(&pregion->vm_usage);
1250 pregion = NULL;
1251 goto error_just_free;
1252 }
1253 }
1254 fput(region->vm_file);
1255 kmem_cache_free(vm_region_jar, region);
1256 region = pregion;
1257 result = start;
1258 goto share;
982 } 1259 }
983 1260
984 dont_share_VMAs:
985 vma = NULL;
986
987 /* obtain the address at which to make a shared mapping 1261 /* obtain the address at which to make a shared mapping
988 * - this is the hook for quasi-memory character devices to 1262 * - this is the hook for quasi-memory character devices to
989 * tell us the location of a shared mapping 1263 * tell us the location of a shared mapping
@@ -994,102 +1268,93 @@ unsigned long do_mmap_pgoff(struct file *file,
994 if (IS_ERR((void *) addr)) { 1268 if (IS_ERR((void *) addr)) {
995 ret = addr; 1269 ret = addr;
996 if (ret != (unsigned long) -ENOSYS) 1270 if (ret != (unsigned long) -ENOSYS)
997 goto error; 1271 goto error_just_free;
998 1272
999 /* the driver refused to tell us where to site 1273 /* the driver refused to tell us where to site
1000 * the mapping so we'll have to attempt to copy 1274 * the mapping so we'll have to attempt to copy
1001 * it */ 1275 * it */
1002 ret = (unsigned long) -ENODEV; 1276 ret = (unsigned long) -ENODEV;
1003 if (!(capabilities & BDI_CAP_MAP_COPY)) 1277 if (!(capabilities & BDI_CAP_MAP_COPY))
1004 goto error; 1278 goto error_just_free;
1005 1279
1006 capabilities &= ~BDI_CAP_MAP_DIRECT; 1280 capabilities &= ~BDI_CAP_MAP_DIRECT;
1281 } else {
1282 vma->vm_start = region->vm_start = addr;
1283 vma->vm_end = region->vm_end = addr + len;
1007 } 1284 }
1008 } 1285 }
1009 } 1286 }
1010 1287
1011 /* we're going to need a VMA struct as well */ 1288 vma->vm_region = region;
1012 vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
1013 if (!vma)
1014 goto error_getting_vma;
1015
1016 INIT_LIST_HEAD(&vma->anon_vma_node);
1017 atomic_set(&vma->vm_usage, 1);
1018 if (file) {
1019 get_file(file);
1020 if (vm_flags & VM_EXECUTABLE) {
1021 added_exe_file_vma(current->mm);
1022 vma->vm_mm = current->mm;
1023 }
1024 }
1025 vma->vm_file = file;
1026 vma->vm_flags = vm_flags;
1027 vma->vm_start = addr;
1028 vma->vm_end = addr + len;
1029 vma->vm_pgoff = pgoff;
1030
1031 vml->vma = vma;
1032 1289
1033 /* set up the mapping */ 1290 /* set up the mapping */
1034 if (file && vma->vm_flags & VM_SHARED) 1291 if (file && vma->vm_flags & VM_SHARED)
1035 ret = do_mmap_shared_file(vma, len); 1292 ret = do_mmap_shared_file(vma);
1036 else 1293 else
1037 ret = do_mmap_private(vma, len); 1294 ret = do_mmap_private(vma, region, len);
1038 if (ret < 0) 1295 if (ret < 0)
1039 goto error; 1296 goto error_put_region;
1297
1298 add_nommu_region(region);
1040 1299
1041 /* okay... we have a mapping; now we have to register it */ 1300 /* okay... we have a mapping; now we have to register it */
1042 result = (void *) vma->vm_start; 1301 result = vma->vm_start;
1043 1302
1044 current->mm->total_vm += len >> PAGE_SHIFT; 1303 current->mm->total_vm += len >> PAGE_SHIFT;
1045 1304
1046 add_nommu_vma(vma); 1305share:
1306 add_vma_to_mm(current->mm, vma);
1047 1307
1048 shared: 1308 up_write(&nommu_region_sem);
1049 add_vma_to_mm(current->mm, vml);
1050
1051 up_write(&nommu_vma_sem);
1052 1309
1053 if (prot & PROT_EXEC) 1310 if (prot & PROT_EXEC)
1054 flush_icache_range((unsigned long) result, 1311 flush_icache_range(result, result + len);
1055 (unsigned long) result + len);
1056 1312
1057#ifdef DEBUG 1313 kleave(" = %lx", result);
1058 printk("do_mmap:\n"); 1314 return result;
1059 show_process_blocks();
1060#endif
1061 1315
1062 return (unsigned long) result; 1316error_put_region:
1063 1317 __put_nommu_region(region);
1064 error:
1065 up_write(&nommu_vma_sem);
1066 kfree(vml);
1067 if (vma) { 1318 if (vma) {
1068 if (vma->vm_file) { 1319 if (vma->vm_file) {
1069 fput(vma->vm_file); 1320 fput(vma->vm_file);
1070 if (vma->vm_flags & VM_EXECUTABLE) 1321 if (vma->vm_flags & VM_EXECUTABLE)
1071 removed_exe_file_vma(vma->vm_mm); 1322 removed_exe_file_vma(vma->vm_mm);
1072 } 1323 }
1073 kfree(vma); 1324 kmem_cache_free(vm_area_cachep, vma);
1074 } 1325 }
1326 kleave(" = %d [pr]", ret);
1075 return ret; 1327 return ret;
1076 1328
1077 sharing_violation: 1329error_just_free:
1078 up_write(&nommu_vma_sem); 1330 up_write(&nommu_region_sem);
1079 printk("Attempt to share mismatched mappings\n"); 1331error:
1080 kfree(vml); 1332 fput(region->vm_file);
1081 return -EINVAL; 1333 kmem_cache_free(vm_region_jar, region);
1334 fput(vma->vm_file);
1335 if (vma->vm_flags & VM_EXECUTABLE)
1336 removed_exe_file_vma(vma->vm_mm);
1337 kmem_cache_free(vm_area_cachep, vma);
1338 kleave(" = %d", ret);
1339 return ret;
1340
1341sharing_violation:
1342 up_write(&nommu_region_sem);
1343 printk(KERN_WARNING "Attempt to share mismatched mappings\n");
1344 ret = -EINVAL;
1345 goto error;
1082 1346
1083 error_getting_vma: 1347error_getting_vma:
1084 up_write(&nommu_vma_sem); 1348 kmem_cache_free(vm_region_jar, region);
1085 kfree(vml); 1349 printk(KERN_WARNING "Allocation of vma for %lu byte allocation"
1086 printk("Allocation of vma for %lu byte allocation from process %d failed\n", 1350 " from process %d failed\n",
1087 len, current->pid); 1351 len, current->pid);
1088 show_free_areas(); 1352 show_free_areas();
1089 return -ENOMEM; 1353 return -ENOMEM;
1090 1354
1091 error_getting_vml: 1355error_getting_region:
1092 printk("Allocation of vml for %lu byte allocation from process %d failed\n", 1356 printk(KERN_WARNING "Allocation of vm region for %lu byte allocation"
1357 " from process %d failed\n",
1093 len, current->pid); 1358 len, current->pid);
1094 show_free_areas(); 1359 show_free_areas();
1095 return -ENOMEM; 1360 return -ENOMEM;
@@ -1097,77 +1362,180 @@ unsigned long do_mmap_pgoff(struct file *file,
1097EXPORT_SYMBOL(do_mmap_pgoff); 1362EXPORT_SYMBOL(do_mmap_pgoff);
1098 1363
1099/* 1364/*
1100 * handle mapping disposal for uClinux 1365 * split a vma into two pieces at address 'addr', a new vma is allocated either
1366 * for the first part or the tail.
1101 */ 1367 */
1102static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma) 1368int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
1369 unsigned long addr, int new_below)
1103{ 1370{
1104 if (vma) { 1371 struct vm_area_struct *new;
1105 down_write(&nommu_vma_sem); 1372 struct vm_region *region;
1373 unsigned long npages;
1106 1374
1107 if (atomic_dec_and_test(&vma->vm_usage)) { 1375 kenter("");
1108 delete_nommu_vma(vma);
1109 1376
1110 if (vma->vm_ops && vma->vm_ops->close) 1377 /* we're only permitted to split anonymous regions that have a single
1111 vma->vm_ops->close(vma); 1378 * owner */
1379 if (vma->vm_file ||
1380 atomic_read(&vma->vm_region->vm_usage) != 1)
1381 return -ENOMEM;
1112 1382
1113 /* IO memory and memory shared directly out of the pagecache from 1383 if (mm->map_count >= sysctl_max_map_count)
1114 * ramfs/tmpfs mustn't be released here */ 1384 return -ENOMEM;
1115 if (vma->vm_flags & VM_MAPPED_COPY)
1116 kfree((void *) vma->vm_start);
1117 1385
1118 if (vma->vm_file) { 1386 region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL);
1119 fput(vma->vm_file); 1387 if (!region)
1120 if (vma->vm_flags & VM_EXECUTABLE) 1388 return -ENOMEM;
1121 removed_exe_file_vma(mm); 1389
1122 } 1390 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1123 kfree(vma); 1391 if (!new) {
1124 } 1392 kmem_cache_free(vm_region_jar, region);
1393 return -ENOMEM;
1394 }
1395
1396 /* most fields are the same, copy all, and then fixup */
1397 *new = *vma;
1398 *region = *vma->vm_region;
1399 new->vm_region = region;
1400
1401 npages = (addr - vma->vm_start) >> PAGE_SHIFT;
1402
1403 if (new_below) {
1404 region->vm_end = new->vm_end = addr;
1405 } else {
1406 region->vm_start = new->vm_start = addr;
1407 region->vm_pgoff = new->vm_pgoff += npages;
1408 }
1125 1409
1126 up_write(&nommu_vma_sem); 1410 if (new->vm_ops && new->vm_ops->open)
1411 new->vm_ops->open(new);
1412
1413 delete_vma_from_mm(vma);
1414 down_write(&nommu_region_sem);
1415 delete_nommu_region(vma->vm_region);
1416 if (new_below) {
1417 vma->vm_region->vm_start = vma->vm_start = addr;
1418 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages;
1419 } else {
1420 vma->vm_region->vm_end = vma->vm_end = addr;
1127 } 1421 }
1422 add_nommu_region(vma->vm_region);
1423 add_nommu_region(new->vm_region);
1424 up_write(&nommu_region_sem);
1425 add_vma_to_mm(mm, vma);
1426 add_vma_to_mm(mm, new);
1427 return 0;
1128} 1428}
1129 1429
1130/* 1430/*
1131 * release a mapping 1431 * shrink a VMA by removing the specified chunk from either the beginning or
1132 * - under NOMMU conditions the parameters must match exactly to the mapping to 1432 * the end
1133 * be removed
1134 */ 1433 */
1135int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) 1434static int shrink_vma(struct mm_struct *mm,
1435 struct vm_area_struct *vma,
1436 unsigned long from, unsigned long to)
1136{ 1437{
1137 struct vm_list_struct *vml, **parent; 1438 struct vm_region *region;
1138 unsigned long end = addr + len;
1139 1439
1140#ifdef DEBUG 1440 kenter("");
1141 printk("do_munmap:\n");
1142#endif
1143 1441
1144 for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) { 1442 /* adjust the VMA's pointers, which may reposition it in the MM's tree
1145 if ((*parent)->vma->vm_start > addr) 1443 * and list */
1146 break; 1444 delete_vma_from_mm(vma);
1147 if ((*parent)->vma->vm_start == addr && 1445 if (from > vma->vm_start)
1148 ((len == 0) || ((*parent)->vma->vm_end == end))) 1446 vma->vm_end = from;
1149 goto found; 1447 else
1150 } 1448 vma->vm_start = to;
1449 add_vma_to_mm(mm, vma);
1151 1450
1152 printk("munmap of non-mmaped memory by process %d (%s): %p\n", 1451 /* cut the backing region down to size */
1153 current->pid, current->comm, (void *) addr); 1452 region = vma->vm_region;
1154 return -EINVAL; 1453 BUG_ON(atomic_read(&region->vm_usage) != 1);
1155 1454
1156 found: 1455 down_write(&nommu_region_sem);
1157 vml = *parent; 1456 delete_nommu_region(region);
1457 if (from > region->vm_start)
1458 region->vm_end = from;
1459 else
1460 region->vm_start = to;
1461 add_nommu_region(region);
1462 up_write(&nommu_region_sem);
1158 1463
1159 put_vma(mm, vml->vma); 1464 free_page_series(from, to);
1465 return 0;
1466}
1160 1467
1161 *parent = vml->next; 1468/*
1162 kfree(vml); 1469 * release a mapping
1470 * - under NOMMU conditions the chunk to be unmapped must be backed by a single
1471 * VMA, though it need not cover the whole VMA
1472 */
1473int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1474{
1475 struct vm_area_struct *vma;
1476 struct rb_node *rb;
1477 unsigned long end = start + len;
1478 int ret;
1163 1479
1164 update_hiwater_vm(mm); 1480 kenter(",%lx,%zx", start, len);
1165 mm->total_vm -= len >> PAGE_SHIFT;
1166 1481
1167#ifdef DEBUG 1482 if (len == 0)
1168 show_process_blocks(); 1483 return -EINVAL;
1169#endif 1484
1485 /* find the first potentially overlapping VMA */
1486 vma = find_vma(mm, start);
1487 if (!vma) {
1488 printk(KERN_WARNING
1489 "munmap of memory not mmapped by process %d (%s):"
1490 " 0x%lx-0x%lx\n",
1491 current->pid, current->comm, start, start + len - 1);
1492 return -EINVAL;
1493 }
1170 1494
1495 /* we're allowed to split an anonymous VMA but not a file-backed one */
1496 if (vma->vm_file) {
1497 do {
1498 if (start > vma->vm_start) {
1499 kleave(" = -EINVAL [miss]");
1500 return -EINVAL;
1501 }
1502 if (end == vma->vm_end)
1503 goto erase_whole_vma;
1504 rb = rb_next(&vma->vm_rb);
1505 vma = rb_entry(rb, struct vm_area_struct, vm_rb);
1506 } while (rb);
1507 kleave(" = -EINVAL [split file]");
1508 return -EINVAL;
1509 } else {
1510 /* the chunk must be a subset of the VMA found */
1511 if (start == vma->vm_start && end == vma->vm_end)
1512 goto erase_whole_vma;
1513 if (start < vma->vm_start || end > vma->vm_end) {
1514 kleave(" = -EINVAL [superset]");
1515 return -EINVAL;
1516 }
1517 if (start & ~PAGE_MASK) {
1518 kleave(" = -EINVAL [unaligned start]");
1519 return -EINVAL;
1520 }
1521 if (end != vma->vm_end && end & ~PAGE_MASK) {
1522 kleave(" = -EINVAL [unaligned split]");
1523 return -EINVAL;
1524 }
1525 if (start != vma->vm_start && end != vma->vm_end) {
1526 ret = split_vma(mm, vma, start, 1);
1527 if (ret < 0) {
1528 kleave(" = %d [split]", ret);
1529 return ret;
1530 }
1531 }
1532 return shrink_vma(mm, vma, start, end);
1533 }
1534
1535erase_whole_vma:
1536 delete_vma_from_mm(vma);
1537 delete_vma(mm, vma);
1538 kleave(" = 0");
1171 return 0; 1539 return 0;
1172} 1540}
1173EXPORT_SYMBOL(do_munmap); 1541EXPORT_SYMBOL(do_munmap);
@@ -1184,29 +1552,26 @@ asmlinkage long sys_munmap(unsigned long addr, size_t len)
1184} 1552}
1185 1553
1186/* 1554/*
1187 * Release all mappings 1555 * release all the mappings made in a process's VM space
1188 */ 1556 */
1189void exit_mmap(struct mm_struct * mm) 1557void exit_mmap(struct mm_struct *mm)
1190{ 1558{
1191 struct vm_list_struct *tmp; 1559 struct vm_area_struct *vma;
1192 1560
1193 if (mm) { 1561 if (!mm)
1194#ifdef DEBUG 1562 return;
1195 printk("Exit_mmap:\n");
1196#endif
1197 1563
1198 mm->total_vm = 0; 1564 kenter("");
1199 1565
1200 while ((tmp = mm->context.vmlist)) { 1566 mm->total_vm = 0;
1201 mm->context.vmlist = tmp->next;
1202 put_vma(mm, tmp->vma);
1203 kfree(tmp);
1204 }
1205 1567
1206#ifdef DEBUG 1568 while ((vma = mm->mmap)) {
1207 show_process_blocks(); 1569 mm->mmap = vma->vm_next;
1208#endif 1570 delete_vma_from_mm(vma);
1571 delete_vma(mm, vma);
1209 } 1572 }
1573
1574 kleave("");
1210} 1575}
1211 1576
1212unsigned long do_brk(unsigned long addr, unsigned long len) 1577unsigned long do_brk(unsigned long addr, unsigned long len)
@@ -1219,8 +1584,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
1219 * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 1584 * time (controlled by the MREMAP_MAYMOVE flag and available VM space)
1220 * 1585 *
1221 * under NOMMU conditions, we only permit changing a mapping's size, and only 1586 * under NOMMU conditions, we only permit changing a mapping's size, and only
1222 * as long as it stays within the hole allocated by the kmalloc() call in 1587 * as long as it stays within the region allocated by do_mmap_private() and the
1223 * do_mmap_pgoff() and the block is not shareable 1588 * block is not shareable
1224 * 1589 *
1225 * MREMAP_FIXED is not supported under NOMMU conditions 1590 * MREMAP_FIXED is not supported under NOMMU conditions
1226 */ 1591 */
@@ -1231,13 +1596,16 @@ unsigned long do_mremap(unsigned long addr,
1231 struct vm_area_struct *vma; 1596 struct vm_area_struct *vma;
1232 1597
1233 /* insanity checks first */ 1598 /* insanity checks first */
1234 if (new_len == 0) 1599 if (old_len == 0 || new_len == 0)
1235 return (unsigned long) -EINVAL; 1600 return (unsigned long) -EINVAL;
1236 1601
1602 if (addr & ~PAGE_MASK)
1603 return -EINVAL;
1604
1237 if (flags & MREMAP_FIXED && new_addr != addr) 1605 if (flags & MREMAP_FIXED && new_addr != addr)
1238 return (unsigned long) -EINVAL; 1606 return (unsigned long) -EINVAL;
1239 1607
1240 vma = find_vma_exact(current->mm, addr); 1608 vma = find_vma_exact(current->mm, addr, old_len);
1241 if (!vma) 1609 if (!vma)
1242 return (unsigned long) -EINVAL; 1610 return (unsigned long) -EINVAL;
1243 1611
@@ -1247,19 +1615,19 @@ unsigned long do_mremap(unsigned long addr,
1247 if (vma->vm_flags & VM_MAYSHARE) 1615 if (vma->vm_flags & VM_MAYSHARE)
1248 return (unsigned long) -EPERM; 1616 return (unsigned long) -EPERM;
1249 1617
1250 if (new_len > kobjsize((void *) addr)) 1618 if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start)
1251 return (unsigned long) -ENOMEM; 1619 return (unsigned long) -ENOMEM;
1252 1620
1253 /* all checks complete - do it */ 1621 /* all checks complete - do it */
1254 vma->vm_end = vma->vm_start + new_len; 1622 vma->vm_end = vma->vm_start + new_len;
1255
1256 return vma->vm_start; 1623 return vma->vm_start;
1257} 1624}
1258EXPORT_SYMBOL(do_mremap); 1625EXPORT_SYMBOL(do_mremap);
1259 1626
1260asmlinkage unsigned long sys_mremap(unsigned long addr, 1627asmlinkage
1261 unsigned long old_len, unsigned long new_len, 1628unsigned long sys_mremap(unsigned long addr,
1262 unsigned long flags, unsigned long new_addr) 1629 unsigned long old_len, unsigned long new_len,
1630 unsigned long flags, unsigned long new_addr)
1263{ 1631{
1264 unsigned long ret; 1632 unsigned long ret;
1265 1633