aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-09 17:00:58 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-09 17:00:58 -0500
commitc40f6f8bbc4cbd2902671aacd587400ddca62627 (patch)
treea991e5521e10943f4457fb7f494e00aec75cc7df
parent1a7d0f0bec4be078ce2cfb11538c0f4ffbbed8e5 (diff)
parentcb6ff208076b5f434db1b8c983429269d719cef5 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-nommu
* git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-nommu: NOMMU: Support XIP on initramfs NOMMU: Teach kobjsize() about VMA regions. FLAT: Don't attempt to expand the userspace stack to fill the space allocated FDPIC: Don't attempt to expand the userspace stack to fill the space allocated NOMMU: Improve procfs output using per-MM VMAs NOMMU: Make mmap allocation page trimming behaviour configurable. NOMMU: Make VMAs per MM as for MMU-mode linux NOMMU: Delete askedalloc and realalloc variables NOMMU: Rename ARM's struct vm_region NOMMU: Fix cleanup handling in ramfs_nommu_get_umapped_area()
-rw-r--r--Documentation/nommu-mmap.txt31
-rw-r--r--Documentation/sysctl/vm.txt18
-rw-r--r--arch/arm/include/asm/mmu.h1
-rw-r--r--arch/arm/mm/dma-mapping.c28
-rw-r--r--arch/blackfin/include/asm/mmu.h1
-rw-r--r--arch/blackfin/kernel/ptrace.c6
-rw-r--r--arch/blackfin/kernel/traps.c11
-rw-r--r--arch/frv/kernel/ptrace.c11
-rw-r--r--arch/h8300/include/asm/mmu.h1
-rw-r--r--arch/m68knommu/include/asm/mmu.h1
-rw-r--r--arch/sh/include/asm/mmu.h1
-rw-r--r--fs/binfmt_elf_fdpic.c35
-rw-r--r--fs/binfmt_flat.c34
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/meminfo.c6
-rw-r--r--fs/proc/nommu.c71
-rw-r--r--fs/proc/task_nommu.c120
-rw-r--r--fs/ramfs/file-nommu.c21
-rw-r--r--include/asm-frv/mmu.h1
-rw-r--r--include/asm-m32r/mmu.h1
-rw-r--r--include/linux/mm.h18
-rw-r--r--include/linux/mm_types.h19
-rw-r--r--init/initramfs.c1
-rw-r--r--ipc/shm.c12
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/sysctl.c14
-rw-r--r--lib/Kconfig.debug7
-rw-r--r--mm/mmap.c10
-rw-r--r--mm/nommu.c1027
29 files changed, 992 insertions, 521 deletions
diff --git a/Documentation/nommu-mmap.txt b/Documentation/nommu-mmap.txt
index 7714f57caad5..b565e8279d13 100644
--- a/Documentation/nommu-mmap.txt
+++ b/Documentation/nommu-mmap.txt
@@ -109,12 +109,18 @@ and it's also much more restricted in the latter case:
109FURTHER NOTES ON NO-MMU MMAP 109FURTHER NOTES ON NO-MMU MMAP
110============================ 110============================
111 111
112 (*) A request for a private mapping of less than a page in size may not return 112 (*) A request for a private mapping of a file may return a buffer that is not
113 a page-aligned buffer. This is because the kernel calls kmalloc() to 113 page-aligned. This is because XIP may take place, and the data may not be
114 allocate the buffer, not get_free_page(). 114 paged aligned in the backing store.
115 115
116 (*) A list of all the mappings on the system is visible through /proc/maps in 116 (*) A request for an anonymous mapping will always be page aligned. If
117 no-MMU mode. 117 possible the size of the request should be a power of two otherwise some
118 of the space may be wasted as the kernel must allocate a power-of-2
119 granule but will only discard the excess if appropriately configured as
120 this has an effect on fragmentation.
121
122 (*) A list of all the private copy and anonymous mappings on the system is
123 visible through /proc/maps in no-MMU mode.
118 124
119 (*) A list of all the mappings in use by a process is visible through 125 (*) A list of all the mappings in use by a process is visible through
120 /proc/<pid>/maps in no-MMU mode. 126 /proc/<pid>/maps in no-MMU mode.
@@ -242,3 +248,18 @@ PROVIDING SHAREABLE BLOCK DEVICE SUPPORT
242Provision of shared mappings on block device files is exactly the same as for 248Provision of shared mappings on block device files is exactly the same as for
243character devices. If there isn't a real device underneath, then the driver 249character devices. If there isn't a real device underneath, then the driver
244should allocate sufficient contiguous memory to honour any supported mapping. 250should allocate sufficient contiguous memory to honour any supported mapping.
251
252
253=================================
254ADJUSTING PAGE TRIMMING BEHAVIOUR
255=================================
256
257NOMMU mmap automatically rounds up to the nearest power-of-2 number of pages
258when performing an allocation. This can have adverse effects on memory
259fragmentation, and as such, is left configurable. The default behaviour is to
260aggressively trim allocations and discard any excess pages back in to the page
261allocator. In order to retain finer-grained control over fragmentation, this
262behaviour can either be disabled completely, or bumped up to a higher page
263watermark where trimming begins.
264
265Page trimming behaviour is configurable via the sysctl `vm.nr_trim_pages'.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index cd05994a49e6..a3415070bcac 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -38,6 +38,7 @@ Currently, these files are in /proc/sys/vm:
38- numa_zonelist_order 38- numa_zonelist_order
39- nr_hugepages 39- nr_hugepages
40- nr_overcommit_hugepages 40- nr_overcommit_hugepages
41- nr_trim_pages (only if CONFIG_MMU=n)
41 42
42============================================================== 43==============================================================
43 44
@@ -348,3 +349,20 @@ Change the maximum size of the hugepage pool. The maximum is
348nr_hugepages + nr_overcommit_hugepages. 349nr_hugepages + nr_overcommit_hugepages.
349 350
350See Documentation/vm/hugetlbpage.txt 351See Documentation/vm/hugetlbpage.txt
352
353==============================================================
354
355nr_trim_pages
356
357This is available only on NOMMU kernels.
358
359This value adjusts the excess page trimming behaviour of power-of-2 aligned
360NOMMU mmap allocations.
361
362A value of 0 disables trimming of allocations entirely, while a value of 1
363trims excess pages aggressively. Any value >= 1 acts as the watermark where
364trimming of allocations is initiated.
365
366The default value is 1.
367
368See Documentation/nommu-mmap.txt for more information.
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 53099d4ee421..b561584d04a1 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -24,7 +24,6 @@ typedef struct {
24 * modified for 2.6 by Hyok S. Choi <hyok.choi@samsung.com> 24 * modified for 2.6 by Hyok S. Choi <hyok.choi@samsung.com>
25 */ 25 */
26typedef struct { 26typedef struct {
27 struct vm_list_struct *vmlist;
28 unsigned long end_brk; 27 unsigned long end_brk;
29} mm_context_t; 28} mm_context_t;
30 29
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 67960017dc8f..310e479309ef 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -71,7 +71,7 @@ static DEFINE_SPINLOCK(consistent_lock);
71 * the amount of RAM found at boot time.) I would imagine that get_vm_area() 71 * the amount of RAM found at boot time.) I would imagine that get_vm_area()
72 * would have to initialise this each time prior to calling vm_region_alloc(). 72 * would have to initialise this each time prior to calling vm_region_alloc().
73 */ 73 */
74struct vm_region { 74struct arm_vm_region {
75 struct list_head vm_list; 75 struct list_head vm_list;
76 unsigned long vm_start; 76 unsigned long vm_start;
77 unsigned long vm_end; 77 unsigned long vm_end;
@@ -79,20 +79,20 @@ struct vm_region {
79 int vm_active; 79 int vm_active;
80}; 80};
81 81
82static struct vm_region consistent_head = { 82static struct arm_vm_region consistent_head = {
83 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), 83 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
84 .vm_start = CONSISTENT_BASE, 84 .vm_start = CONSISTENT_BASE,
85 .vm_end = CONSISTENT_END, 85 .vm_end = CONSISTENT_END,
86}; 86};
87 87
88static struct vm_region * 88static struct arm_vm_region *
89vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp) 89arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp)
90{ 90{
91 unsigned long addr = head->vm_start, end = head->vm_end - size; 91 unsigned long addr = head->vm_start, end = head->vm_end - size;
92 unsigned long flags; 92 unsigned long flags;
93 struct vm_region *c, *new; 93 struct arm_vm_region *c, *new;
94 94
95 new = kmalloc(sizeof(struct vm_region), gfp); 95 new = kmalloc(sizeof(struct arm_vm_region), gfp);
96 if (!new) 96 if (!new)
97 goto out; 97 goto out;
98 98
@@ -127,9 +127,9 @@ vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
127 return NULL; 127 return NULL;
128} 128}
129 129
130static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr) 130static struct arm_vm_region *arm_vm_region_find(struct arm_vm_region *head, unsigned long addr)
131{ 131{
132 struct vm_region *c; 132 struct arm_vm_region *c;
133 133
134 list_for_each_entry(c, &head->vm_list, vm_list) { 134 list_for_each_entry(c, &head->vm_list, vm_list) {
135 if (c->vm_active && c->vm_start == addr) 135 if (c->vm_active && c->vm_start == addr)
@@ -149,7 +149,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
149 pgprot_t prot) 149 pgprot_t prot)
150{ 150{
151 struct page *page; 151 struct page *page;
152 struct vm_region *c; 152 struct arm_vm_region *c;
153 unsigned long order; 153 unsigned long order;
154 u64 mask = ISA_DMA_THRESHOLD, limit; 154 u64 mask = ISA_DMA_THRESHOLD, limit;
155 155
@@ -214,7 +214,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
214 /* 214 /*
215 * Allocate a virtual address in the consistent mapping region. 215 * Allocate a virtual address in the consistent mapping region.
216 */ 216 */
217 c = vm_region_alloc(&consistent_head, size, 217 c = arm_vm_region_alloc(&consistent_head, size,
218 gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); 218 gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
219 if (c) { 219 if (c) {
220 pte_t *pte; 220 pte_t *pte;
@@ -311,13 +311,13 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
311 void *cpu_addr, dma_addr_t dma_addr, size_t size) 311 void *cpu_addr, dma_addr_t dma_addr, size_t size)
312{ 312{
313 unsigned long flags, user_size, kern_size; 313 unsigned long flags, user_size, kern_size;
314 struct vm_region *c; 314 struct arm_vm_region *c;
315 int ret = -ENXIO; 315 int ret = -ENXIO;
316 316
317 user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 317 user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
318 318
319 spin_lock_irqsave(&consistent_lock, flags); 319 spin_lock_irqsave(&consistent_lock, flags);
320 c = vm_region_find(&consistent_head, (unsigned long)cpu_addr); 320 c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
321 spin_unlock_irqrestore(&consistent_lock, flags); 321 spin_unlock_irqrestore(&consistent_lock, flags);
322 322
323 if (c) { 323 if (c) {
@@ -359,7 +359,7 @@ EXPORT_SYMBOL(dma_mmap_writecombine);
359 */ 359 */
360void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) 360void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
361{ 361{
362 struct vm_region *c; 362 struct arm_vm_region *c;
363 unsigned long flags, addr; 363 unsigned long flags, addr;
364 pte_t *ptep; 364 pte_t *ptep;
365 int idx; 365 int idx;
@@ -378,7 +378,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr
378 size = PAGE_ALIGN(size); 378 size = PAGE_ALIGN(size);
379 379
380 spin_lock_irqsave(&consistent_lock, flags); 380 spin_lock_irqsave(&consistent_lock, flags);
381 c = vm_region_find(&consistent_head, (unsigned long)cpu_addr); 381 c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
382 if (!c) 382 if (!c)
383 goto no_area; 383 goto no_area;
384 384
diff --git a/arch/blackfin/include/asm/mmu.h b/arch/blackfin/include/asm/mmu.h
index 757e43906ed4..dbfd686360e6 100644
--- a/arch/blackfin/include/asm/mmu.h
+++ b/arch/blackfin/include/asm/mmu.h
@@ -10,7 +10,6 @@ struct sram_list_struct {
10}; 10};
11 11
12typedef struct { 12typedef struct {
13 struct vm_list_struct *vmlist;
14 unsigned long end_brk; 13 unsigned long end_brk;
15 unsigned long stack_start; 14 unsigned long stack_start;
16 15
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index d2d388536630..594e325b40e4 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -160,15 +160,15 @@ put_reg(struct task_struct *task, int regno, unsigned long data)
160static inline int is_user_addr_valid(struct task_struct *child, 160static inline int is_user_addr_valid(struct task_struct *child,
161 unsigned long start, unsigned long len) 161 unsigned long start, unsigned long len)
162{ 162{
163 struct vm_list_struct *vml; 163 struct vm_area_struct *vma;
164 struct sram_list_struct *sraml; 164 struct sram_list_struct *sraml;
165 165
166 /* overflow */ 166 /* overflow */
167 if (start + len < start) 167 if (start + len < start)
168 return -EIO; 168 return -EIO;
169 169
170 for (vml = child->mm->context.vmlist; vml; vml = vml->next) 170 vma = find_vma(child->mm, start);
171 if (start >= vml->vma->vm_start && start + len < vml->vma->vm_end) 171 if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
172 return 0; 172 return 0;
173 173
174 for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next) 174 for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next)
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 17d8e4172896..5b0667da8d05 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -32,6 +32,7 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/kallsyms.h> 33#include <linux/kallsyms.h>
34#include <linux/fs.h> 34#include <linux/fs.h>
35#include <linux/rbtree.h>
35#include <asm/traps.h> 36#include <asm/traps.h>
36#include <asm/cacheflush.h> 37#include <asm/cacheflush.h>
37#include <asm/cplb.h> 38#include <asm/cplb.h>
@@ -83,6 +84,7 @@ static void decode_address(char *buf, unsigned long address)
83 struct mm_struct *mm; 84 struct mm_struct *mm;
84 unsigned long flags, offset; 85 unsigned long flags, offset;
85 unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic(); 86 unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic();
87 struct rb_node *n;
86 88
87#ifdef CONFIG_KALLSYMS 89#ifdef CONFIG_KALLSYMS
88 unsigned long symsize; 90 unsigned long symsize;
@@ -128,9 +130,10 @@ static void decode_address(char *buf, unsigned long address)
128 if (!mm) 130 if (!mm)
129 continue; 131 continue;
130 132
131 vml = mm->context.vmlist; 133 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
132 while (vml) { 134 struct vm_area_struct *vma;
133 struct vm_area_struct *vma = vml->vma; 135
136 vma = rb_entry(n, struct vm_area_struct, vm_rb);
134 137
135 if (address >= vma->vm_start && address < vma->vm_end) { 138 if (address >= vma->vm_start && address < vma->vm_end) {
136 char _tmpbuf[256]; 139 char _tmpbuf[256];
@@ -176,8 +179,6 @@ static void decode_address(char *buf, unsigned long address)
176 179
177 goto done; 180 goto done;
178 } 181 }
179
180 vml = vml->next;
181 } 182 }
182 if (!in_atomic) 183 if (!in_atomic)
183 mmput(mm); 184 mmput(mm);
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index 709e9bdc6126..5e7d401d21e7 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -69,7 +69,8 @@ static inline int put_reg(struct task_struct *task, int regno,
69} 69}
70 70
71/* 71/*
72 * check that an address falls within the bounds of the target process's memory mappings 72 * check that an address falls within the bounds of the target process's memory
73 * mappings
73 */ 74 */
74static inline int is_user_addr_valid(struct task_struct *child, 75static inline int is_user_addr_valid(struct task_struct *child,
75 unsigned long start, unsigned long len) 76 unsigned long start, unsigned long len)
@@ -79,11 +80,11 @@ static inline int is_user_addr_valid(struct task_struct *child,
79 return -EIO; 80 return -EIO;
80 return 0; 81 return 0;
81#else 82#else
82 struct vm_list_struct *vml; 83 struct vm_area_struct *vma;
83 84
84 for (vml = child->mm->context.vmlist; vml; vml = vml->next) 85 vma = find_vma(child->mm, start);
85 if (start >= vml->vma->vm_start && start + len <= vml->vma->vm_end) 86 if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
86 return 0; 87 return 0;
87 88
88 return -EIO; 89 return -EIO;
89#endif 90#endif
diff --git a/arch/h8300/include/asm/mmu.h b/arch/h8300/include/asm/mmu.h
index 2ce06ea46104..31309969df70 100644
--- a/arch/h8300/include/asm/mmu.h
+++ b/arch/h8300/include/asm/mmu.h
@@ -4,7 +4,6 @@
4/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */ 4/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
5 5
6typedef struct { 6typedef struct {
7 struct vm_list_struct *vmlist;
8 unsigned long end_brk; 7 unsigned long end_brk;
9} mm_context_t; 8} mm_context_t;
10 9
diff --git a/arch/m68knommu/include/asm/mmu.h b/arch/m68knommu/include/asm/mmu.h
index 5fa6b68353ba..e2da1e6f09fe 100644
--- a/arch/m68knommu/include/asm/mmu.h
+++ b/arch/m68knommu/include/asm/mmu.h
@@ -4,7 +4,6 @@
4/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */ 4/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
5 5
6typedef struct { 6typedef struct {
7 struct vm_list_struct *vmlist;
8 unsigned long end_brk; 7 unsigned long end_brk;
9} mm_context_t; 8} mm_context_t;
10 9
diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h
index fdcb93bc6d11..6c43625bb1a5 100644
--- a/arch/sh/include/asm/mmu.h
+++ b/arch/sh/include/asm/mmu.h
@@ -9,7 +9,6 @@ typedef struct {
9 mm_context_id_t id; 9 mm_context_id_t id;
10 void *vdso; 10 void *vdso;
11#else 11#else
12 struct vm_list_struct *vmlist;
13 unsigned long end_brk; 12 unsigned long end_brk;
14#endif 13#endif
15#ifdef CONFIG_BINFMT_ELF_FDPIC 14#ifdef CONFIG_BINFMT_ELF_FDPIC
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index aa5b43205e37..f3e72c5c19f5 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -168,9 +168,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
168 struct elf_fdpic_params exec_params, interp_params; 168 struct elf_fdpic_params exec_params, interp_params;
169 struct elf_phdr *phdr; 169 struct elf_phdr *phdr;
170 unsigned long stack_size, entryaddr; 170 unsigned long stack_size, entryaddr;
171#ifndef CONFIG_MMU
172 unsigned long fullsize;
173#endif
174#ifdef ELF_FDPIC_PLAT_INIT 171#ifdef ELF_FDPIC_PLAT_INIT
175 unsigned long dynaddr; 172 unsigned long dynaddr;
176#endif 173#endif
@@ -390,11 +387,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
390 goto error_kill; 387 goto error_kill;
391 } 388 }
392 389
393 /* expand the stack mapping to use up the entire allocation granule */
394 fullsize = kobjsize((char *) current->mm->start_brk);
395 if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size,
396 fullsize, 0, 0)))
397 stack_size = fullsize;
398 up_write(&current->mm->mmap_sem); 390 up_write(&current->mm->mmap_sem);
399 391
400 current->mm->brk = current->mm->start_brk; 392 current->mm->brk = current->mm->start_brk;
@@ -1567,11 +1559,9 @@ end_coredump:
1567static int elf_fdpic_dump_segments(struct file *file, size_t *size, 1559static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1568 unsigned long *limit, unsigned long mm_flags) 1560 unsigned long *limit, unsigned long mm_flags)
1569{ 1561{
1570 struct vm_list_struct *vml; 1562 struct vm_area_struct *vma;
1571
1572 for (vml = current->mm->context.vmlist; vml; vml = vml->next) {
1573 struct vm_area_struct *vma = vml->vma;
1574 1563
1564 for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1575 if (!maydump(vma, mm_flags)) 1565 if (!maydump(vma, mm_flags))
1576 continue; 1566 continue;
1577 1567
@@ -1617,9 +1607,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1617 elf_fpxregset_t *xfpu = NULL; 1607 elf_fpxregset_t *xfpu = NULL;
1618#endif 1608#endif
1619 int thread_status_size = 0; 1609 int thread_status_size = 0;
1620#ifndef CONFIG_MMU
1621 struct vm_list_struct *vml;
1622#endif
1623 elf_addr_t *auxv; 1610 elf_addr_t *auxv;
1624 unsigned long mm_flags; 1611 unsigned long mm_flags;
1625 1612
@@ -1685,13 +1672,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1685 fill_prstatus(prstatus, current, signr); 1672 fill_prstatus(prstatus, current, signr);
1686 elf_core_copy_regs(&prstatus->pr_reg, regs); 1673 elf_core_copy_regs(&prstatus->pr_reg, regs);
1687 1674
1688#ifdef CONFIG_MMU
1689 segs = current->mm->map_count; 1675 segs = current->mm->map_count;
1690#else
1691 segs = 0;
1692 for (vml = current->mm->context.vmlist; vml; vml = vml->next)
1693 segs++;
1694#endif
1695#ifdef ELF_CORE_EXTRA_PHDRS 1676#ifdef ELF_CORE_EXTRA_PHDRS
1696 segs += ELF_CORE_EXTRA_PHDRS; 1677 segs += ELF_CORE_EXTRA_PHDRS;
1697#endif 1678#endif
@@ -1766,20 +1747,10 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1766 mm_flags = current->mm->flags; 1747 mm_flags = current->mm->flags;
1767 1748
1768 /* write program headers for segments dump */ 1749 /* write program headers for segments dump */
1769 for ( 1750 for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1770#ifdef CONFIG_MMU
1771 vma = current->mm->mmap; vma; vma = vma->vm_next
1772#else
1773 vml = current->mm->context.vmlist; vml; vml = vml->next
1774#endif
1775 ) {
1776 struct elf_phdr phdr; 1751 struct elf_phdr phdr;
1777 size_t sz; 1752 size_t sz;
1778 1753
1779#ifndef CONFIG_MMU
1780 vma = vml->vma;
1781#endif
1782
1783 sz = vma->vm_end - vma->vm_start; 1754 sz = vma->vm_end - vma->vm_start;
1784 1755
1785 phdr.p_type = PT_LOAD; 1756 phdr.p_type = PT_LOAD;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 7bbd5c6b3725..5cebf0b37798 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -417,8 +417,8 @@ static int load_flat_file(struct linux_binprm * bprm,
417 unsigned long textpos = 0, datapos = 0, result; 417 unsigned long textpos = 0, datapos = 0, result;
418 unsigned long realdatastart = 0; 418 unsigned long realdatastart = 0;
419 unsigned long text_len, data_len, bss_len, stack_len, flags; 419 unsigned long text_len, data_len, bss_len, stack_len, flags;
420 unsigned long len, reallen, memp = 0; 420 unsigned long len, memp = 0;
421 unsigned long extra, rlim; 421 unsigned long memp_size, extra, rlim;
422 unsigned long *reloc = 0, *rp; 422 unsigned long *reloc = 0, *rp;
423 struct inode *inode; 423 struct inode *inode;
424 int i, rev, relocs = 0; 424 int i, rev, relocs = 0;
@@ -543,17 +543,10 @@ static int load_flat_file(struct linux_binprm * bprm,
543 } 543 }
544 544
545 len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); 545 len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
546 len = PAGE_ALIGN(len);
546 down_write(&current->mm->mmap_sem); 547 down_write(&current->mm->mmap_sem);
547 realdatastart = do_mmap(0, 0, len, 548 realdatastart = do_mmap(0, 0, len,
548 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); 549 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
549 /* Remap to use all availabe slack region space */
550 if (realdatastart && (realdatastart < (unsigned long)-4096)) {
551 reallen = kobjsize((void *)realdatastart);
552 if (reallen > len) {
553 realdatastart = do_mremap(realdatastart, len,
554 reallen, MREMAP_FIXED, realdatastart);
555 }
556 }
557 up_write(&current->mm->mmap_sem); 550 up_write(&current->mm->mmap_sem);
558 551
559 if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) { 552 if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) {
@@ -591,21 +584,14 @@ static int load_flat_file(struct linux_binprm * bprm,
591 584
592 reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len)); 585 reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len));
593 memp = realdatastart; 586 memp = realdatastart;
594 587 memp_size = len;
595 } else { 588 } else {
596 589
597 len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); 590 len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
591 len = PAGE_ALIGN(len);
598 down_write(&current->mm->mmap_sem); 592 down_write(&current->mm->mmap_sem);
599 textpos = do_mmap(0, 0, len, 593 textpos = do_mmap(0, 0, len,
600 PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); 594 PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
601 /* Remap to use all availabe slack region space */
602 if (textpos && (textpos < (unsigned long) -4096)) {
603 reallen = kobjsize((void *)textpos);
604 if (reallen > len) {
605 textpos = do_mremap(textpos, len, reallen,
606 MREMAP_FIXED, textpos);
607 }
608 }
609 up_write(&current->mm->mmap_sem); 595 up_write(&current->mm->mmap_sem);
610 596
611 if (!textpos || textpos >= (unsigned long) -4096) { 597 if (!textpos || textpos >= (unsigned long) -4096) {
@@ -622,7 +608,7 @@ static int load_flat_file(struct linux_binprm * bprm,
622 reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) + 608 reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) +
623 MAX_SHARED_LIBS * sizeof(unsigned long)); 609 MAX_SHARED_LIBS * sizeof(unsigned long));
624 memp = textpos; 610 memp = textpos;
625 611 memp_size = len;
626#ifdef CONFIG_BINFMT_ZFLAT 612#ifdef CONFIG_BINFMT_ZFLAT
627 /* 613 /*
628 * load it all in and treat it like a RAM load from now on 614 * load it all in and treat it like a RAM load from now on
@@ -680,10 +666,12 @@ static int load_flat_file(struct linux_binprm * bprm,
680 * set up the brk stuff, uses any slack left in data/bss/stack 666 * set up the brk stuff, uses any slack left in data/bss/stack
681 * allocation. We put the brk after the bss (between the bss 667 * allocation. We put the brk after the bss (between the bss
682 * and stack) like other platforms. 668 * and stack) like other platforms.
669 * Userspace code relies on the stack pointer starting out at
670 * an address right at the end of a page.
683 */ 671 */
684 current->mm->start_brk = datapos + data_len + bss_len; 672 current->mm->start_brk = datapos + data_len + bss_len;
685 current->mm->brk = (current->mm->start_brk + 3) & ~3; 673 current->mm->brk = (current->mm->start_brk + 3) & ~3;
686 current->mm->context.end_brk = memp + kobjsize((void *) memp) - stack_len; 674 current->mm->context.end_brk = memp + memp_size - stack_len;
687 } 675 }
688 676
689 if (flags & FLAT_FLAG_KTRACE) 677 if (flags & FLAT_FLAG_KTRACE)
@@ -790,8 +778,8 @@ static int load_flat_file(struct linux_binprm * bprm,
790 778
791 /* zero the BSS, BRK and stack areas */ 779 /* zero the BSS, BRK and stack areas */
792 memset((void*)(datapos + data_len), 0, bss_len + 780 memset((void*)(datapos + data_len), 0, bss_len +
793 (memp + kobjsize((void *) memp) - stack_len - /* end brk */ 781 (memp + memp_size - stack_len - /* end brk */
794 libinfo->lib_list[id].start_brk) + /* start brk */ 782 libinfo->lib_list[id].start_brk) + /* start brk */
795 stack_len); 783 stack_len);
796 784
797 return 0; 785 return 0;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3e8aeb8b61ce..cd53ff838498 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -41,8 +41,6 @@ do { \
41 (vmi)->used = 0; \ 41 (vmi)->used = 0; \
42 (vmi)->largest_chunk = 0; \ 42 (vmi)->largest_chunk = 0; \
43} while(0) 43} while(0)
44
45extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
46#endif 44#endif
47 45
48extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, 46extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index b1675c4e66da..43d23948384a 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -74,6 +74,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
74 "LowTotal: %8lu kB\n" 74 "LowTotal: %8lu kB\n"
75 "LowFree: %8lu kB\n" 75 "LowFree: %8lu kB\n"
76#endif 76#endif
77#ifndef CONFIG_MMU
78 "MmapCopy: %8lu kB\n"
79#endif
77 "SwapTotal: %8lu kB\n" 80 "SwapTotal: %8lu kB\n"
78 "SwapFree: %8lu kB\n" 81 "SwapFree: %8lu kB\n"
79 "Dirty: %8lu kB\n" 82 "Dirty: %8lu kB\n"
@@ -116,6 +119,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
116 K(i.totalram-i.totalhigh), 119 K(i.totalram-i.totalhigh),
117 K(i.freeram-i.freehigh), 120 K(i.freeram-i.freehigh),
118#endif 121#endif
122#ifndef CONFIG_MMU
123 K((unsigned long) atomic_read(&mmap_pages_allocated)),
124#endif
119 K(i.totalswap), 125 K(i.totalswap),
120 K(i.freeswap), 126 K(i.freeswap),
121 K(global_page_state(NR_FILE_DIRTY)), 127 K(global_page_state(NR_FILE_DIRTY)),
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 3f87d2632947..b446d7ad0b0d 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -33,33 +33,33 @@
33#include "internal.h" 33#include "internal.h"
34 34
35/* 35/*
36 * display a single VMA to a sequenced file 36 * display a single region to a sequenced file
37 */ 37 */
38int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) 38static int nommu_region_show(struct seq_file *m, struct vm_region *region)
39{ 39{
40 unsigned long ino = 0; 40 unsigned long ino = 0;
41 struct file *file; 41 struct file *file;
42 dev_t dev = 0; 42 dev_t dev = 0;
43 int flags, len; 43 int flags, len;
44 44
45 flags = vma->vm_flags; 45 flags = region->vm_flags;
46 file = vma->vm_file; 46 file = region->vm_file;
47 47
48 if (file) { 48 if (file) {
49 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 49 struct inode *inode = region->vm_file->f_path.dentry->d_inode;
50 dev = inode->i_sb->s_dev; 50 dev = inode->i_sb->s_dev;
51 ino = inode->i_ino; 51 ino = inode->i_ino;
52 } 52 }
53 53
54 seq_printf(m, 54 seq_printf(m,
55 "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", 55 "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
56 vma->vm_start, 56 region->vm_start,
57 vma->vm_end, 57 region->vm_end,
58 flags & VM_READ ? 'r' : '-', 58 flags & VM_READ ? 'r' : '-',
59 flags & VM_WRITE ? 'w' : '-', 59 flags & VM_WRITE ? 'w' : '-',
60 flags & VM_EXEC ? 'x' : '-', 60 flags & VM_EXEC ? 'x' : '-',
61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', 61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
62 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, 62 ((loff_t)region->vm_pgoff) << PAGE_SHIFT,
63 MAJOR(dev), MINOR(dev), ino, &len); 63 MAJOR(dev), MINOR(dev), ino, &len);
64 64
65 if (file) { 65 if (file) {
@@ -75,61 +75,54 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
75} 75}
76 76
77/* 77/*
78 * display a list of all the VMAs the kernel knows about 78 * display a list of all the REGIONs the kernel knows about
79 * - nommu kernals have a single flat list 79 * - nommu kernals have a single flat list
80 */ 80 */
81static int nommu_vma_list_show(struct seq_file *m, void *v) 81static int nommu_region_list_show(struct seq_file *m, void *_p)
82{ 82{
83 struct vm_area_struct *vma; 83 struct rb_node *p = _p;
84 84
85 vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb); 85 return nommu_region_show(m, rb_entry(p, struct vm_region, vm_rb));
86 return nommu_vma_show(m, vma);
87} 86}
88 87
89static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) 88static void *nommu_region_list_start(struct seq_file *m, loff_t *_pos)
90{ 89{
91 struct rb_node *_rb; 90 struct rb_node *p;
92 loff_t pos = *_pos; 91 loff_t pos = *_pos;
93 void *next = NULL;
94 92
95 down_read(&nommu_vma_sem); 93 down_read(&nommu_region_sem);
96 94
97 for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) { 95 for (p = rb_first(&nommu_region_tree); p; p = rb_next(p))
98 if (pos == 0) { 96 if (pos-- == 0)
99 next = _rb; 97 return p;
100 break; 98 return NULL;
101 }
102 pos--;
103 }
104
105 return next;
106} 99}
107 100
108static void nommu_vma_list_stop(struct seq_file *m, void *v) 101static void nommu_region_list_stop(struct seq_file *m, void *v)
109{ 102{
110 up_read(&nommu_vma_sem); 103 up_read(&nommu_region_sem);
111} 104}
112 105
113static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos) 106static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
114{ 107{
115 (*pos)++; 108 (*pos)++;
116 return rb_next((struct rb_node *) v); 109 return rb_next((struct rb_node *) v);
117} 110}
118 111
119static const struct seq_operations proc_nommu_vma_list_seqop = { 112static struct seq_operations proc_nommu_region_list_seqop = {
120 .start = nommu_vma_list_start, 113 .start = nommu_region_list_start,
121 .next = nommu_vma_list_next, 114 .next = nommu_region_list_next,
122 .stop = nommu_vma_list_stop, 115 .stop = nommu_region_list_stop,
123 .show = nommu_vma_list_show 116 .show = nommu_region_list_show
124}; 117};
125 118
126static int proc_nommu_vma_list_open(struct inode *inode, struct file *file) 119static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
127{ 120{
128 return seq_open(file, &proc_nommu_vma_list_seqop); 121 return seq_open(file, &proc_nommu_region_list_seqop);
129} 122}
130 123
131static const struct file_operations proc_nommu_vma_list_operations = { 124static const struct file_operations proc_nommu_region_list_operations = {
132 .open = proc_nommu_vma_list_open, 125 .open = proc_nommu_region_list_open,
133 .read = seq_read, 126 .read = seq_read,
134 .llseek = seq_lseek, 127 .llseek = seq_lseek,
135 .release = seq_release, 128 .release = seq_release,
@@ -137,7 +130,7 @@ static const struct file_operations proc_nommu_vma_list_operations = {
137 130
138static int __init proc_nommu_init(void) 131static int __init proc_nommu_init(void)
139{ 132{
140 proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations); 133 proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
141 return 0; 134 return 0;
142} 135}
143 136
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index d4a8be32b902..343ea1216bc8 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -15,25 +15,32 @@
15 */ 15 */
16void task_mem(struct seq_file *m, struct mm_struct *mm) 16void task_mem(struct seq_file *m, struct mm_struct *mm)
17{ 17{
18 struct vm_list_struct *vml; 18 struct vm_area_struct *vma;
19 unsigned long bytes = 0, sbytes = 0, slack = 0; 19 struct vm_region *region;
20 struct rb_node *p;
21 unsigned long bytes = 0, sbytes = 0, slack = 0, size;
20 22
21 down_read(&mm->mmap_sem); 23 down_read(&mm->mmap_sem);
22 for (vml = mm->context.vmlist; vml; vml = vml->next) { 24 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
23 if (!vml->vma) 25 vma = rb_entry(p, struct vm_area_struct, vm_rb);
24 continue; 26
27 bytes += kobjsize(vma);
28
29 region = vma->vm_region;
30 if (region) {
31 size = kobjsize(region);
32 size += region->vm_end - region->vm_start;
33 } else {
34 size = vma->vm_end - vma->vm_start;
35 }
25 36
26 bytes += kobjsize(vml);
27 if (atomic_read(&mm->mm_count) > 1 || 37 if (atomic_read(&mm->mm_count) > 1 ||
28 atomic_read(&vml->vma->vm_usage) > 1 38 vma->vm_flags & VM_MAYSHARE) {
29 ) { 39 sbytes += size;
30 sbytes += kobjsize((void *) vml->vma->vm_start);
31 sbytes += kobjsize(vml->vma);
32 } else { 40 } else {
33 bytes += kobjsize((void *) vml->vma->vm_start); 41 bytes += size;
34 bytes += kobjsize(vml->vma); 42 if (region)
35 slack += kobjsize((void *) vml->vma->vm_start) - 43 slack = region->vm_end - vma->vm_end;
36 (vml->vma->vm_end - vml->vma->vm_start);
37 } 44 }
38 } 45 }
39 46
@@ -70,13 +77,14 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
70 77
71unsigned long task_vsize(struct mm_struct *mm) 78unsigned long task_vsize(struct mm_struct *mm)
72{ 79{
73 struct vm_list_struct *tbp; 80 struct vm_area_struct *vma;
81 struct rb_node *p;
74 unsigned long vsize = 0; 82 unsigned long vsize = 0;
75 83
76 down_read(&mm->mmap_sem); 84 down_read(&mm->mmap_sem);
77 for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { 85 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
78 if (tbp->vma) 86 vma = rb_entry(p, struct vm_area_struct, vm_rb);
79 vsize += kobjsize((void *) tbp->vma->vm_start); 87 vsize += vma->vm_end - vma->vm_start;
80 } 88 }
81 up_read(&mm->mmap_sem); 89 up_read(&mm->mmap_sem);
82 return vsize; 90 return vsize;
@@ -85,15 +93,19 @@ unsigned long task_vsize(struct mm_struct *mm)
85int task_statm(struct mm_struct *mm, int *shared, int *text, 93int task_statm(struct mm_struct *mm, int *shared, int *text,
86 int *data, int *resident) 94 int *data, int *resident)
87{ 95{
88 struct vm_list_struct *tbp; 96 struct vm_area_struct *vma;
97 struct vm_region *region;
98 struct rb_node *p;
89 int size = kobjsize(mm); 99 int size = kobjsize(mm);
90 100
91 down_read(&mm->mmap_sem); 101 down_read(&mm->mmap_sem);
92 for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { 102 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
93 size += kobjsize(tbp); 103 vma = rb_entry(p, struct vm_area_struct, vm_rb);
94 if (tbp->vma) { 104 size += kobjsize(vma);
95 size += kobjsize(tbp->vma); 105 region = vma->vm_region;
96 size += kobjsize((void *) tbp->vma->vm_start); 106 if (region) {
107 size += kobjsize(region);
108 size += region->vm_end - region->vm_start;
97 } 109 }
98 } 110 }
99 111
@@ -105,20 +117,62 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
105} 117}
106 118
107/* 119/*
120 * display a single VMA to a sequenced file
121 */
122static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
123{
124 unsigned long ino = 0;
125 struct file *file;
126 dev_t dev = 0;
127 int flags, len;
128
129 flags = vma->vm_flags;
130 file = vma->vm_file;
131
132 if (file) {
133 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
134 dev = inode->i_sb->s_dev;
135 ino = inode->i_ino;
136 }
137
138 seq_printf(m,
139 "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
140 vma->vm_start,
141 vma->vm_end,
142 flags & VM_READ ? 'r' : '-',
143 flags & VM_WRITE ? 'w' : '-',
144 flags & VM_EXEC ? 'x' : '-',
145 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
146 vma->vm_pgoff << PAGE_SHIFT,
147 MAJOR(dev), MINOR(dev), ino, &len);
148
149 if (file) {
150 len = 25 + sizeof(void *) * 6 - len;
151 if (len < 1)
152 len = 1;
153 seq_printf(m, "%*c", len, ' ');
154 seq_path(m, &file->f_path, "");
155 }
156
157 seq_putc(m, '\n');
158 return 0;
159}
160
161/*
108 * display mapping lines for a particular process's /proc/pid/maps 162 * display mapping lines for a particular process's /proc/pid/maps
109 */ 163 */
110static int show_map(struct seq_file *m, void *_vml) 164static int show_map(struct seq_file *m, void *_p)
111{ 165{
112 struct vm_list_struct *vml = _vml; 166 struct rb_node *p = _p;
113 167
114 return nommu_vma_show(m, vml->vma); 168 return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
115} 169}
116 170
117static void *m_start(struct seq_file *m, loff_t *pos) 171static void *m_start(struct seq_file *m, loff_t *pos)
118{ 172{
119 struct proc_maps_private *priv = m->private; 173 struct proc_maps_private *priv = m->private;
120 struct vm_list_struct *vml;
121 struct mm_struct *mm; 174 struct mm_struct *mm;
175 struct rb_node *p;
122 loff_t n = *pos; 176 loff_t n = *pos;
123 177
124 /* pin the task and mm whilst we play with them */ 178 /* pin the task and mm whilst we play with them */
@@ -134,9 +188,9 @@ static void *m_start(struct seq_file *m, loff_t *pos)
134 } 188 }
135 189
136 /* start from the Nth VMA */ 190 /* start from the Nth VMA */
137 for (vml = mm->context.vmlist; vml; vml = vml->next) 191 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
138 if (n-- == 0) 192 if (n-- == 0)
139 return vml; 193 return p;
140 return NULL; 194 return NULL;
141} 195}
142 196
@@ -152,12 +206,12 @@ static void m_stop(struct seq_file *m, void *_vml)
152 } 206 }
153} 207}
154 208
155static void *m_next(struct seq_file *m, void *_vml, loff_t *pos) 209static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
156{ 210{
157 struct vm_list_struct *vml = _vml; 211 struct rb_node *p = _p;
158 212
159 (*pos)++; 213 (*pos)++;
160 return vml ? vml->next : NULL; 214 return p ? rb_next(p) : NULL;
161} 215}
162 216
163static const struct seq_operations proc_pid_maps_ops = { 217static const struct seq_operations proc_pid_maps_ops = {
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 76acdbc34611..b9b567a28376 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -262,11 +262,11 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
262 ret = -ENOMEM; 262 ret = -ENOMEM;
263 pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL); 263 pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL);
264 if (!pages) 264 if (!pages)
265 goto out; 265 goto out_free;
266 266
267 nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages); 267 nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages);
268 if (nr != lpages) 268 if (nr != lpages)
269 goto out; /* leave if some pages were missing */ 269 goto out_free_pages; /* leave if some pages were missing */
270 270
271 /* check the pages for physical adjacency */ 271 /* check the pages for physical adjacency */
272 ptr = pages; 272 ptr = pages;
@@ -274,19 +274,18 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
274 page++; 274 page++;
275 for (loop = lpages; loop > 1; loop--) 275 for (loop = lpages; loop > 1; loop--)
276 if (*ptr++ != page++) 276 if (*ptr++ != page++)
277 goto out; 277 goto out_free_pages;
278 278
279 /* okay - all conditions fulfilled */ 279 /* okay - all conditions fulfilled */
280 ret = (unsigned long) page_address(pages[0]); 280 ret = (unsigned long) page_address(pages[0]);
281 281
282 out: 282out_free_pages:
283 if (pages) { 283 ptr = pages;
284 ptr = pages; 284 for (loop = nr; loop > 0; loop--)
285 for (loop = lpages; loop > 0; loop--) 285 put_page(*ptr++);
286 put_page(*ptr++); 286out_free:
287 kfree(pages); 287 kfree(pages);
288 } 288out:
289
290 return ret; 289 return ret;
291} 290}
292 291
diff --git a/include/asm-frv/mmu.h b/include/asm-frv/mmu.h
index 22c03714fb14..86ca0e86e7d2 100644
--- a/include/asm-frv/mmu.h
+++ b/include/asm-frv/mmu.h
@@ -22,7 +22,6 @@ typedef struct {
22 unsigned long dtlb_ptd_mapping; /* [DAMR5] PTD mapping for dtlb cached PGE */ 22 unsigned long dtlb_ptd_mapping; /* [DAMR5] PTD mapping for dtlb cached PGE */
23 23
24#else 24#else
25 struct vm_list_struct *vmlist;
26 unsigned long end_brk; 25 unsigned long end_brk;
27 26
28#endif 27#endif
diff --git a/include/asm-m32r/mmu.h b/include/asm-m32r/mmu.h
index d9bd724479cf..150cb92bb666 100644
--- a/include/asm-m32r/mmu.h
+++ b/include/asm-m32r/mmu.h
@@ -4,7 +4,6 @@
4#if !defined(CONFIG_MMU) 4#if !defined(CONFIG_MMU)
5 5
6typedef struct { 6typedef struct {
7 struct vm_list_struct *vmlist;
8 unsigned long end_brk; 7 unsigned long end_brk;
9} mm_context_t; 8} mm_context_t;
10 9
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4a3d28c86443..b91a73fd1bcc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -56,19 +56,9 @@ extern unsigned long mmap_min_addr;
56 56
57extern struct kmem_cache *vm_area_cachep; 57extern struct kmem_cache *vm_area_cachep;
58 58
59/*
60 * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is
61 * disabled, then there's a single shared list of VMAs maintained by the
62 * system, and mm's subscribe to these individually
63 */
64struct vm_list_struct {
65 struct vm_list_struct *next;
66 struct vm_area_struct *vma;
67};
68
69#ifndef CONFIG_MMU 59#ifndef CONFIG_MMU
70extern struct rb_root nommu_vma_tree; 60extern struct rb_root nommu_region_tree;
71extern struct rw_semaphore nommu_vma_sem; 61extern struct rw_semaphore nommu_region_sem;
72 62
73extern unsigned int kobjsize(const void *objp); 63extern unsigned int kobjsize(const void *objp);
74#endif 64#endif
@@ -1061,6 +1051,7 @@ extern void memmap_init_zone(unsigned long, int, unsigned long,
1061 unsigned long, enum memmap_context); 1051 unsigned long, enum memmap_context);
1062extern void setup_per_zone_pages_min(void); 1052extern void setup_per_zone_pages_min(void);
1063extern void mem_init(void); 1053extern void mem_init(void);
1054extern void __init mmap_init(void);
1064extern void show_mem(void); 1055extern void show_mem(void);
1065extern void si_meminfo(struct sysinfo * val); 1056extern void si_meminfo(struct sysinfo * val);
1066extern void si_meminfo_node(struct sysinfo *val, int nid); 1057extern void si_meminfo_node(struct sysinfo *val, int nid);
@@ -1072,6 +1063,9 @@ extern void setup_per_cpu_pageset(void);
1072static inline void setup_per_cpu_pageset(void) {} 1063static inline void setup_per_cpu_pageset(void) {}
1073#endif 1064#endif
1074 1065
1066/* nommu.c */
1067extern atomic_t mmap_pages_allocated;
1068
1075/* prio_tree.c */ 1069/* prio_tree.c */
1076void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); 1070void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
1077void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); 1071void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9cfc9b627fdd..92915e81443f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -97,6 +97,23 @@ struct page {
97}; 97};
98 98
99/* 99/*
100 * A region containing a mapping of a non-memory backed file under NOMMU
101 * conditions. These are held in a global tree and are pinned by the VMAs that
102 * map parts of them.
103 */
104struct vm_region {
105 struct rb_node vm_rb; /* link in global region tree */
106 unsigned long vm_flags; /* VMA vm_flags */
107 unsigned long vm_start; /* start address of region */
108 unsigned long vm_end; /* region initialised to here */
109 unsigned long vm_top; /* region allocated to here */
110 unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
111 struct file *vm_file; /* the backing file or NULL */
112
113 atomic_t vm_usage; /* region usage count */
114};
115
116/*
100 * This struct defines a memory VMM memory area. There is one of these 117 * This struct defines a memory VMM memory area. There is one of these
101 * per VM-area/task. A VM area is any part of the process virtual memory 118 * per VM-area/task. A VM area is any part of the process virtual memory
102 * space that has a special rule for the page-fault handlers (ie a shared 119 * space that has a special rule for the page-fault handlers (ie a shared
@@ -152,7 +169,7 @@ struct vm_area_struct {
152 unsigned long vm_truncate_count;/* truncate_count or restart_addr */ 169 unsigned long vm_truncate_count;/* truncate_count or restart_addr */
153 170
154#ifndef CONFIG_MMU 171#ifndef CONFIG_MMU
155 atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */ 172 struct vm_region *vm_region; /* NOMMU mapping region */
156#endif 173#endif
157#ifdef CONFIG_NUMA 174#ifdef CONFIG_NUMA
158 struct mempolicy *vm_policy; /* NUMA policy for the VMA */ 175 struct mempolicy *vm_policy; /* NUMA policy for the VMA */
diff --git a/init/initramfs.c b/init/initramfs.c
index 4f5ba75aaa7c..d9c941c0c3ca 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -317,6 +317,7 @@ static int __init do_name(void)
317 if (wfd >= 0) { 317 if (wfd >= 0) {
318 sys_fchown(wfd, uid, gid); 318 sys_fchown(wfd, uid, gid);
319 sys_fchmod(wfd, mode); 319 sys_fchmod(wfd, mode);
320 sys_ftruncate(wfd, body_len);
320 vcollected = kstrdup(collected, GFP_KERNEL); 321 vcollected = kstrdup(collected, GFP_KERNEL);
321 state = CopyFile; 322 state = CopyFile;
322 } 323 }
diff --git a/ipc/shm.c b/ipc/shm.c
index b125b560240e..d0ab5527bf45 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -990,6 +990,7 @@ asmlinkage long sys_shmdt(char __user *shmaddr)
990 */ 990 */
991 vma = find_vma(mm, addr); 991 vma = find_vma(mm, addr);
992 992
993#ifdef CONFIG_MMU
993 while (vma) { 994 while (vma) {
994 next = vma->vm_next; 995 next = vma->vm_next;
995 996
@@ -1034,6 +1035,17 @@ asmlinkage long sys_shmdt(char __user *shmaddr)
1034 vma = next; 1035 vma = next;
1035 } 1036 }
1036 1037
1038#else /* CONFIG_MMU */
1039 /* under NOMMU conditions, the exact address to be destroyed must be
1040 * given */
1041 retval = -EINVAL;
1042 if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1043 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1044 retval = 0;
1045 }
1046
1047#endif
1048
1037 up_write(&mm->mmap_sem); 1049 up_write(&mm->mmap_sem);
1038 return retval; 1050 return retval;
1039} 1051}
diff --git a/kernel/fork.c b/kernel/fork.c
index 4018308048cf..1d68f1255dd8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1481,12 +1481,10 @@ void __init proc_caches_init(void)
1481 fs_cachep = kmem_cache_create("fs_cache", 1481 fs_cachep = kmem_cache_create("fs_cache",
1482 sizeof(struct fs_struct), 0, 1482 sizeof(struct fs_struct), 0,
1483 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1483 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1484 vm_area_cachep = kmem_cache_create("vm_area_struct",
1485 sizeof(struct vm_area_struct), 0,
1486 SLAB_PANIC, NULL);
1487 mm_cachep = kmem_cache_create("mm_struct", 1484 mm_cachep = kmem_cache_create("mm_struct",
1488 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1485 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1489 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1486 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1487 mmap_init();
1490} 1488}
1491 1489
1492/* 1490/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 92f6e5bc3c24..89d74436318c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -82,6 +82,9 @@ extern int percpu_pagelist_fraction;
82extern int compat_log; 82extern int compat_log;
83extern int latencytop_enabled; 83extern int latencytop_enabled;
84extern int sysctl_nr_open_min, sysctl_nr_open_max; 84extern int sysctl_nr_open_min, sysctl_nr_open_max;
85#ifndef CONFIG_MMU
86extern int sysctl_nr_trim_pages;
87#endif
85#ifdef CONFIG_RCU_TORTURE_TEST 88#ifdef CONFIG_RCU_TORTURE_TEST
86extern int rcutorture_runnable; 89extern int rcutorture_runnable;
87#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 90#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
@@ -1102,6 +1105,17 @@ static struct ctl_table vm_table[] = {
1102 .mode = 0644, 1105 .mode = 0644,
1103 .proc_handler = &proc_dointvec 1106 .proc_handler = &proc_dointvec
1104 }, 1107 },
1108#else
1109 {
1110 .ctl_name = CTL_UNNUMBERED,
1111 .procname = "nr_trim_pages",
1112 .data = &sysctl_nr_trim_pages,
1113 .maxlen = sizeof(sysctl_nr_trim_pages),
1114 .mode = 0644,
1115 .proc_handler = &proc_dointvec_minmax,
1116 .strategy = &sysctl_intvec,
1117 .extra1 = &zero,
1118 },
1105#endif 1119#endif
1106 { 1120 {
1107 .ctl_name = VM_LAPTOP_MODE, 1121 .ctl_name = VM_LAPTOP_MODE,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2e75478e9c69..d0a32aab03ff 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -512,6 +512,13 @@ config DEBUG_VIRTUAL
512 512
513 If unsure, say N. 513 If unsure, say N.
514 514
515config DEBUG_NOMMU_REGIONS
516 bool "Debug the global anon/private NOMMU mapping region tree"
517 depends on DEBUG_KERNEL && !MMU
518 help
519 This option causes the global tree of anonymous and private mapping
520 regions to be regularly checked for invalid topology.
521
515config DEBUG_WRITECOUNT 522config DEBUG_WRITECOUNT
516 bool "Debug filesystem writers count" 523 bool "Debug filesystem writers count"
517 depends on DEBUG_KERNEL 524 depends on DEBUG_KERNEL
diff --git a/mm/mmap.c b/mm/mmap.c
index a910c045cfd4..749623196cb9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2472,3 +2472,13 @@ void mm_drop_all_locks(struct mm_struct *mm)
2472 2472
2473 mutex_unlock(&mm_all_locks_mutex); 2473 mutex_unlock(&mm_all_locks_mutex);
2474} 2474}
2475
2476/*
2477 * initialise the VMA slab
2478 */
2479void __init mmap_init(void)
2480{
2481 vm_area_cachep = kmem_cache_create("vm_area_struct",
2482 sizeof(struct vm_area_struct), 0,
2483 SLAB_PANIC, NULL);
2484}
diff --git a/mm/nommu.c b/mm/nommu.c
index 1c28ea3a4e9c..60ed8375c986 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -6,11 +6,11 @@
6 * 6 *
7 * See Documentation/nommu-mmap.txt 7 * See Documentation/nommu-mmap.txt
8 * 8 *
9 * Copyright (c) 2004-2005 David Howells <dhowells@redhat.com> 9 * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com>
10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
13 * Copyright (c) 2007 Paul Mundt <lethal@linux-sh.org> 13 * Copyright (c) 2007-2008 Paul Mundt <lethal@linux-sh.org>
14 */ 14 */
15 15
16#include <linux/module.h> 16#include <linux/module.h>
@@ -33,6 +33,28 @@
33#include <asm/uaccess.h> 33#include <asm/uaccess.h>
34#include <asm/tlb.h> 34#include <asm/tlb.h>
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include "internal.h"
37
38static inline __attribute__((format(printf, 1, 2)))
39void no_printk(const char *fmt, ...)
40{
41}
42
43#if 0
44#define kenter(FMT, ...) \
45 printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
46#define kleave(FMT, ...) \
47 printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
48#define kdebug(FMT, ...) \
49 printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__)
50#else
51#define kenter(FMT, ...) \
52 no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
53#define kleave(FMT, ...) \
54 no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
55#define kdebug(FMT, ...) \
56 no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__)
57#endif
36 58
37#include "internal.h" 59#include "internal.h"
38 60
@@ -40,19 +62,22 @@ void *high_memory;
40struct page *mem_map; 62struct page *mem_map;
41unsigned long max_mapnr; 63unsigned long max_mapnr;
42unsigned long num_physpages; 64unsigned long num_physpages;
43unsigned long askedalloc, realalloc;
44atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); 65atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
45int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ 66int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
46int sysctl_overcommit_ratio = 50; /* default is 50% */ 67int sysctl_overcommit_ratio = 50; /* default is 50% */
47int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 68int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
69int sysctl_nr_trim_pages = 1; /* page trimming behaviour */
48int heap_stack_gap = 0; 70int heap_stack_gap = 0;
49 71
72atomic_t mmap_pages_allocated;
73
50EXPORT_SYMBOL(mem_map); 74EXPORT_SYMBOL(mem_map);
51EXPORT_SYMBOL(num_physpages); 75EXPORT_SYMBOL(num_physpages);
52 76
53/* list of shareable VMAs */ 77/* list of mapped, potentially shareable regions */
54struct rb_root nommu_vma_tree = RB_ROOT; 78static struct kmem_cache *vm_region_jar;
55DECLARE_RWSEM(nommu_vma_sem); 79struct rb_root nommu_region_tree = RB_ROOT;
80DECLARE_RWSEM(nommu_region_sem);
56 81
57struct vm_operations_struct generic_file_vm_ops = { 82struct vm_operations_struct generic_file_vm_ops = {
58}; 83};
@@ -124,6 +149,20 @@ unsigned int kobjsize(const void *objp)
124 return ksize(objp); 149 return ksize(objp);
125 150
126 /* 151 /*
152 * If it's not a compound page, see if we have a matching VMA
153 * region. This test is intentionally done in reverse order,
154 * so if there's no VMA, we still fall through and hand back
155 * PAGE_SIZE for 0-order pages.
156 */
157 if (!PageCompound(page)) {
158 struct vm_area_struct *vma;
159
160 vma = find_vma(current->mm, (unsigned long)objp);
161 if (vma)
162 return vma->vm_end - vma->vm_start;
163 }
164
165 /*
127 * The ksize() function is only guaranteed to work for pointers 166 * The ksize() function is only guaranteed to work for pointers
128 * returned by kmalloc(). So handle arbitrary pointers here. 167 * returned by kmalloc(). So handle arbitrary pointers here.
129 */ 168 */
@@ -401,129 +440,178 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
401 return mm->brk = brk; 440 return mm->brk = brk;
402} 441}
403 442
404#ifdef DEBUG 443/*
405static void show_process_blocks(void) 444 * initialise the VMA and region record slabs
445 */
446void __init mmap_init(void)
406{ 447{
407 struct vm_list_struct *vml; 448 vm_region_jar = kmem_cache_create("vm_region_jar",
408 449 sizeof(struct vm_region), 0,
409 printk("Process blocks %d:", current->pid); 450 SLAB_PANIC, NULL);
410 451 vm_area_cachep = kmem_cache_create("vm_area_struct",
411 for (vml = &current->mm->context.vmlist; vml; vml = vml->next) { 452 sizeof(struct vm_area_struct), 0,
412 printk(" %p: %p", vml, vml->vma); 453 SLAB_PANIC, NULL);
413 if (vml->vma)
414 printk(" (%d @%lx #%d)",
415 kobjsize((void *) vml->vma->vm_start),
416 vml->vma->vm_start,
417 atomic_read(&vml->vma->vm_usage));
418 printk(vml->next ? " ->" : ".\n");
419 }
420} 454}
421#endif /* DEBUG */
422 455
423/* 456/*
424 * add a VMA into a process's mm_struct in the appropriate place in the list 457 * validate the region tree
425 * - should be called with mm->mmap_sem held writelocked 458 * - the caller must hold the region lock
426 */ 459 */
427static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml) 460#ifdef CONFIG_DEBUG_NOMMU_REGIONS
461static noinline void validate_nommu_regions(void)
428{ 462{
429 struct vm_list_struct **ppv; 463 struct vm_region *region, *last;
430 464 struct rb_node *p, *lastp;
431 for (ppv = &current->mm->context.vmlist; *ppv; ppv = &(*ppv)->next) 465
432 if ((*ppv)->vma->vm_start > vml->vma->vm_start) 466 lastp = rb_first(&nommu_region_tree);
433 break; 467 if (!lastp)
434 468 return;
435 vml->next = *ppv; 469
436 *ppv = vml; 470 last = rb_entry(lastp, struct vm_region, vm_rb);
471 if (unlikely(last->vm_end <= last->vm_start))
472 BUG();
473 if (unlikely(last->vm_top < last->vm_end))
474 BUG();
475
476 while ((p = rb_next(lastp))) {
477 region = rb_entry(p, struct vm_region, vm_rb);
478 last = rb_entry(lastp, struct vm_region, vm_rb);
479
480 if (unlikely(region->vm_end <= region->vm_start))
481 BUG();
482 if (unlikely(region->vm_top < region->vm_end))
483 BUG();
484 if (unlikely(region->vm_start < last->vm_top))
485 BUG();
486
487 lastp = p;
488 }
437} 489}
490#else
491#define validate_nommu_regions() do {} while(0)
492#endif
438 493
439/* 494/*
440 * look up the first VMA in which addr resides, NULL if none 495 * add a region into the global tree
441 * - should be called with mm->mmap_sem at least held readlocked
442 */ 496 */
443struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 497static void add_nommu_region(struct vm_region *region)
444{ 498{
445 struct vm_list_struct *loop, *vml; 499 struct vm_region *pregion;
500 struct rb_node **p, *parent;
446 501
447 /* search the vm_start ordered list */ 502 validate_nommu_regions();
448 vml = NULL; 503
449 for (loop = mm->context.vmlist; loop; loop = loop->next) { 504 BUG_ON(region->vm_start & ~PAGE_MASK);
450 if (loop->vma->vm_start > addr) 505
451 break; 506 parent = NULL;
452 vml = loop; 507 p = &nommu_region_tree.rb_node;
508 while (*p) {
509 parent = *p;
510 pregion = rb_entry(parent, struct vm_region, vm_rb);
511 if (region->vm_start < pregion->vm_start)
512 p = &(*p)->rb_left;
513 else if (region->vm_start > pregion->vm_start)
514 p = &(*p)->rb_right;
515 else if (pregion == region)
516 return;
517 else
518 BUG();
453 } 519 }
454 520
455 if (vml && vml->vma->vm_end > addr) 521 rb_link_node(&region->vm_rb, parent, p);
456 return vml->vma; 522 rb_insert_color(&region->vm_rb, &nommu_region_tree);
457 523
458 return NULL; 524 validate_nommu_regions();
459} 525}
460EXPORT_SYMBOL(find_vma);
461 526
462/* 527/*
463 * find a VMA 528 * delete a region from the global tree
464 * - we don't extend stack VMAs under NOMMU conditions
465 */ 529 */
466struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 530static void delete_nommu_region(struct vm_region *region)
467{ 531{
468 return find_vma(mm, addr); 532 BUG_ON(!nommu_region_tree.rb_node);
469}
470 533
471int expand_stack(struct vm_area_struct *vma, unsigned long address) 534 validate_nommu_regions();
472{ 535 rb_erase(&region->vm_rb, &nommu_region_tree);
473 return -ENOMEM; 536 validate_nommu_regions();
474} 537}
475 538
476/* 539/*
477 * look up the first VMA exactly that exactly matches addr 540 * free a contiguous series of pages
478 * - should be called with mm->mmap_sem at least held readlocked
479 */ 541 */
480static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 542static void free_page_series(unsigned long from, unsigned long to)
481 unsigned long addr)
482{ 543{
483 struct vm_list_struct *vml; 544 for (; from < to; from += PAGE_SIZE) {
484 545 struct page *page = virt_to_page(from);
485 /* search the vm_start ordered list */ 546
486 for (vml = mm->context.vmlist; vml; vml = vml->next) { 547 kdebug("- free %lx", from);
487 if (vml->vma->vm_start == addr) 548 atomic_dec(&mmap_pages_allocated);
488 return vml->vma; 549 if (page_count(page) != 1)
489 if (vml->vma->vm_start > addr) 550 kdebug("free page %p [%d]", page, page_count(page));
490 break; 551 put_page(page);
491 } 552 }
492
493 return NULL;
494} 553}
495 554
496/* 555/*
497 * find a VMA in the global tree 556 * release a reference to a region
557 * - the caller must hold the region semaphore, which this releases
558 * - the region may not have been added to the tree yet, in which case vm_top
559 * will equal vm_start
498 */ 560 */
499static inline struct vm_area_struct *find_nommu_vma(unsigned long start) 561static void __put_nommu_region(struct vm_region *region)
562 __releases(nommu_region_sem)
500{ 563{
501 struct vm_area_struct *vma; 564 kenter("%p{%d}", region, atomic_read(&region->vm_usage));
502 struct rb_node *n = nommu_vma_tree.rb_node;
503 565
504 while (n) { 566 BUG_ON(!nommu_region_tree.rb_node);
505 vma = rb_entry(n, struct vm_area_struct, vm_rb);
506 567
507 if (start < vma->vm_start) 568 if (atomic_dec_and_test(&region->vm_usage)) {
508 n = n->rb_left; 569 if (region->vm_top > region->vm_start)
509 else if (start > vma->vm_start) 570 delete_nommu_region(region);
510 n = n->rb_right; 571 up_write(&nommu_region_sem);
511 else 572
512 return vma; 573 if (region->vm_file)
574 fput(region->vm_file);
575
576 /* IO memory and memory shared directly out of the pagecache
577 * from ramfs/tmpfs mustn't be released here */
578 if (region->vm_flags & VM_MAPPED_COPY) {
579 kdebug("free series");
580 free_page_series(region->vm_start, region->vm_top);
581 }
582 kmem_cache_free(vm_region_jar, region);
583 } else {
584 up_write(&nommu_region_sem);
513 } 585 }
586}
514 587
515 return NULL; 588/*
589 * release a reference to a region
590 */
591static void put_nommu_region(struct vm_region *region)
592{
593 down_write(&nommu_region_sem);
594 __put_nommu_region(region);
516} 595}
517 596
518/* 597/*
519 * add a VMA in the global tree 598 * add a VMA into a process's mm_struct in the appropriate place in the list
599 * and tree and add to the address space's page tree also if not an anonymous
600 * page
601 * - should be called with mm->mmap_sem held writelocked
520 */ 602 */
521static void add_nommu_vma(struct vm_area_struct *vma) 603static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
522{ 604{
523 struct vm_area_struct *pvma; 605 struct vm_area_struct *pvma, **pp;
524 struct address_space *mapping; 606 struct address_space *mapping;
525 struct rb_node **p = &nommu_vma_tree.rb_node; 607 struct rb_node **p, *parent;
526 struct rb_node *parent = NULL; 608
609 kenter(",%p", vma);
610
611 BUG_ON(!vma->vm_region);
612
613 mm->map_count++;
614 vma->vm_mm = mm;
527 615
528 /* add the VMA to the mapping */ 616 /* add the VMA to the mapping */
529 if (vma->vm_file) { 617 if (vma->vm_file) {
@@ -534,42 +622,62 @@ static void add_nommu_vma(struct vm_area_struct *vma)
534 flush_dcache_mmap_unlock(mapping); 622 flush_dcache_mmap_unlock(mapping);
535 } 623 }
536 624
537 /* add the VMA to the master list */ 625 /* add the VMA to the tree */
626 parent = NULL;
627 p = &mm->mm_rb.rb_node;
538 while (*p) { 628 while (*p) {
539 parent = *p; 629 parent = *p;
540 pvma = rb_entry(parent, struct vm_area_struct, vm_rb); 630 pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
541 631
542 if (vma->vm_start < pvma->vm_start) { 632 /* sort by: start addr, end addr, VMA struct addr in that order
633 * (the latter is necessary as we may get identical VMAs) */
634 if (vma->vm_start < pvma->vm_start)
543 p = &(*p)->rb_left; 635 p = &(*p)->rb_left;
544 } 636 else if (vma->vm_start > pvma->vm_start)
545 else if (vma->vm_start > pvma->vm_start) {
546 p = &(*p)->rb_right; 637 p = &(*p)->rb_right;
547 } 638 else if (vma->vm_end < pvma->vm_end)
548 else { 639 p = &(*p)->rb_left;
549 /* mappings are at the same address - this can only 640 else if (vma->vm_end > pvma->vm_end)
550 * happen for shared-mem chardevs and shared file 641 p = &(*p)->rb_right;
551 * mappings backed by ramfs/tmpfs */ 642 else if (vma < pvma)
552 BUG_ON(!(pvma->vm_flags & VM_SHARED)); 643 p = &(*p)->rb_left;
553 644 else if (vma > pvma)
554 if (vma < pvma) 645 p = &(*p)->rb_right;
555 p = &(*p)->rb_left; 646 else
556 else if (vma > pvma) 647 BUG();
557 p = &(*p)->rb_right;
558 else
559 BUG();
560 }
561 } 648 }
562 649
563 rb_link_node(&vma->vm_rb, parent, p); 650 rb_link_node(&vma->vm_rb, parent, p);
564 rb_insert_color(&vma->vm_rb, &nommu_vma_tree); 651 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
652
653 /* add VMA to the VMA list also */
654 for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) {
655 if (pvma->vm_start > vma->vm_start)
656 break;
657 if (pvma->vm_start < vma->vm_start)
658 continue;
659 if (pvma->vm_end < vma->vm_end)
660 break;
661 }
662
663 vma->vm_next = *pp;
664 *pp = vma;
565} 665}
566 666
567/* 667/*
568 * delete a VMA from the global list 668 * delete a VMA from its owning mm_struct and address space
569 */ 669 */
570static void delete_nommu_vma(struct vm_area_struct *vma) 670static void delete_vma_from_mm(struct vm_area_struct *vma)
571{ 671{
672 struct vm_area_struct **pp;
572 struct address_space *mapping; 673 struct address_space *mapping;
674 struct mm_struct *mm = vma->vm_mm;
675
676 kenter("%p", vma);
677
678 mm->map_count--;
679 if (mm->mmap_cache == vma)
680 mm->mmap_cache = NULL;
573 681
574 /* remove the VMA from the mapping */ 682 /* remove the VMA from the mapping */
575 if (vma->vm_file) { 683 if (vma->vm_file) {
@@ -580,8 +688,115 @@ static void delete_nommu_vma(struct vm_area_struct *vma)
580 flush_dcache_mmap_unlock(mapping); 688 flush_dcache_mmap_unlock(mapping);
581 } 689 }
582 690
583 /* remove from the master list */ 691 /* remove from the MM's tree and list */
584 rb_erase(&vma->vm_rb, &nommu_vma_tree); 692 rb_erase(&vma->vm_rb, &mm->mm_rb);
693 for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) {
694 if (*pp == vma) {
695 *pp = vma->vm_next;
696 break;
697 }
698 }
699
700 vma->vm_mm = NULL;
701}
702
703/*
704 * destroy a VMA record
705 */
706static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
707{
708 kenter("%p", vma);
709 if (vma->vm_ops && vma->vm_ops->close)
710 vma->vm_ops->close(vma);
711 if (vma->vm_file) {
712 fput(vma->vm_file);
713 if (vma->vm_flags & VM_EXECUTABLE)
714 removed_exe_file_vma(mm);
715 }
716 put_nommu_region(vma->vm_region);
717 kmem_cache_free(vm_area_cachep, vma);
718}
719
720/*
721 * look up the first VMA in which addr resides, NULL if none
722 * - should be called with mm->mmap_sem at least held readlocked
723 */
724struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
725{
726 struct vm_area_struct *vma;
727 struct rb_node *n = mm->mm_rb.rb_node;
728
729 /* check the cache first */
730 vma = mm->mmap_cache;
731 if (vma && vma->vm_start <= addr && vma->vm_end > addr)
732 return vma;
733
734 /* trawl the tree (there may be multiple mappings in which addr
735 * resides) */
736 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
737 vma = rb_entry(n, struct vm_area_struct, vm_rb);
738 if (vma->vm_start > addr)
739 return NULL;
740 if (vma->vm_end > addr) {
741 mm->mmap_cache = vma;
742 return vma;
743 }
744 }
745
746 return NULL;
747}
748EXPORT_SYMBOL(find_vma);
749
750/*
751 * find a VMA
752 * - we don't extend stack VMAs under NOMMU conditions
753 */
754struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
755{
756 return find_vma(mm, addr);
757}
758
759/*
760 * expand a stack to a given address
761 * - not supported under NOMMU conditions
762 */
763int expand_stack(struct vm_area_struct *vma, unsigned long address)
764{
765 return -ENOMEM;
766}
767
768/*
769 * look up the first VMA exactly that exactly matches addr
770 * - should be called with mm->mmap_sem at least held readlocked
771 */
772static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
773 unsigned long addr,
774 unsigned long len)
775{
776 struct vm_area_struct *vma;
777 struct rb_node *n = mm->mm_rb.rb_node;
778 unsigned long end = addr + len;
779
780 /* check the cache first */
781 vma = mm->mmap_cache;
782 if (vma && vma->vm_start == addr && vma->vm_end == end)
783 return vma;
784
785 /* trawl the tree (there may be multiple mappings in which addr
786 * resides) */
787 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
788 vma = rb_entry(n, struct vm_area_struct, vm_rb);
789 if (vma->vm_start < addr)
790 continue;
791 if (vma->vm_start > addr)
792 return NULL;
793 if (vma->vm_end == end) {
794 mm->mmap_cache = vma;
795 return vma;
796 }
797 }
798
799 return NULL;
585} 800}
586 801
587/* 802/*
@@ -596,7 +811,7 @@ static int validate_mmap_request(struct file *file,
596 unsigned long pgoff, 811 unsigned long pgoff,
597 unsigned long *_capabilities) 812 unsigned long *_capabilities)
598{ 813{
599 unsigned long capabilities; 814 unsigned long capabilities, rlen;
600 unsigned long reqprot = prot; 815 unsigned long reqprot = prot;
601 int ret; 816 int ret;
602 817
@@ -616,12 +831,12 @@ static int validate_mmap_request(struct file *file,
616 return -EINVAL; 831 return -EINVAL;
617 832
618 /* Careful about overflows.. */ 833 /* Careful about overflows.. */
619 len = PAGE_ALIGN(len); 834 rlen = PAGE_ALIGN(len);
620 if (!len || len > TASK_SIZE) 835 if (!rlen || rlen > TASK_SIZE)
621 return -ENOMEM; 836 return -ENOMEM;
622 837
623 /* offset overflow? */ 838 /* offset overflow? */
624 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) 839 if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff)
625 return -EOVERFLOW; 840 return -EOVERFLOW;
626 841
627 if (file) { 842 if (file) {
@@ -795,13 +1010,18 @@ static unsigned long determine_vm_flags(struct file *file,
795} 1010}
796 1011
797/* 1012/*
798 * set up a shared mapping on a file 1013 * set up a shared mapping on a file (the driver or filesystem provides and
1014 * pins the storage)
799 */ 1015 */
800static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len) 1016static int do_mmap_shared_file(struct vm_area_struct *vma)
801{ 1017{
802 int ret; 1018 int ret;
803 1019
804 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 1020 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
1021 if (ret == 0) {
1022 vma->vm_region->vm_top = vma->vm_region->vm_end;
1023 return ret;
1024 }
805 if (ret != -ENOSYS) 1025 if (ret != -ENOSYS)
806 return ret; 1026 return ret;
807 1027
@@ -815,10 +1035,14 @@ static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len)
815/* 1035/*
816 * set up a private mapping or an anonymous shared mapping 1036 * set up a private mapping or an anonymous shared mapping
817 */ 1037 */
818static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) 1038static int do_mmap_private(struct vm_area_struct *vma,
1039 struct vm_region *region,
1040 unsigned long len)
819{ 1041{
1042 struct page *pages;
1043 unsigned long total, point, n, rlen;
820 void *base; 1044 void *base;
821 int ret; 1045 int ret, order;
822 1046
823 /* invoke the file's mapping function so that it can keep track of 1047 /* invoke the file's mapping function so that it can keep track of
824 * shared mappings on devices or memory 1048 * shared mappings on devices or memory
@@ -826,34 +1050,63 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
826 */ 1050 */
827 if (vma->vm_file) { 1051 if (vma->vm_file) {
828 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 1052 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
829 if (ret != -ENOSYS) { 1053 if (ret == 0) {
830 /* shouldn't return success if we're not sharing */ 1054 /* shouldn't return success if we're not sharing */
831 BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE)); 1055 BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
832 return ret; /* success or a real error */ 1056 vma->vm_region->vm_top = vma->vm_region->vm_end;
1057 return ret;
833 } 1058 }
1059 if (ret != -ENOSYS)
1060 return ret;
834 1061
835 /* getting an ENOSYS error indicates that direct mmap isn't 1062 /* getting an ENOSYS error indicates that direct mmap isn't
836 * possible (as opposed to tried but failed) so we'll try to 1063 * possible (as opposed to tried but failed) so we'll try to
837 * make a private copy of the data and map that instead */ 1064 * make a private copy of the data and map that instead */
838 } 1065 }
839 1066
1067 rlen = PAGE_ALIGN(len);
1068
840 /* allocate some memory to hold the mapping 1069 /* allocate some memory to hold the mapping
841 * - note that this may not return a page-aligned address if the object 1070 * - note that this may not return a page-aligned address if the object
842 * we're allocating is smaller than a page 1071 * we're allocating is smaller than a page
843 */ 1072 */
844 base = kmalloc(len, GFP_KERNEL|__GFP_COMP); 1073 order = get_order(rlen);
845 if (!base) 1074 kdebug("alloc order %d for %lx", order, len);
1075
1076 pages = alloc_pages(GFP_KERNEL, order);
1077 if (!pages)
846 goto enomem; 1078 goto enomem;
847 1079
848 vma->vm_start = (unsigned long) base; 1080 total = 1 << order;
849 vma->vm_end = vma->vm_start + len; 1081 atomic_add(total, &mmap_pages_allocated);
850 vma->vm_flags |= VM_MAPPED_COPY; 1082
1083 point = rlen >> PAGE_SHIFT;
1084
1085 /* we allocated a power-of-2 sized page set, so we may want to trim off
1086 * the excess */
1087 if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) {
1088 while (total > point) {
1089 order = ilog2(total - point);
1090 n = 1 << order;
1091 kdebug("shave %lu/%lu @%lu", n, total - point, total);
1092 atomic_sub(n, &mmap_pages_allocated);
1093 total -= n;
1094 set_page_refcounted(pages + total);
1095 __free_pages(pages + total, order);
1096 }
1097 }
1098
1099 for (point = 1; point < total; point++)
1100 set_page_refcounted(&pages[point]);
851 1101
852#ifdef WARN_ON_SLACK 1102 base = page_address(pages);
853 if (len + WARN_ON_SLACK <= kobjsize(result)) 1103 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
854 printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", 1104 region->vm_start = (unsigned long) base;
855 len, current->pid, kobjsize(result) - len); 1105 region->vm_end = region->vm_start + rlen;
856#endif 1106 region->vm_top = region->vm_start + (total << PAGE_SHIFT);
1107
1108 vma->vm_start = region->vm_start;
1109 vma->vm_end = region->vm_start + len;
857 1110
858 if (vma->vm_file) { 1111 if (vma->vm_file) {
859 /* read the contents of a file into the copy */ 1112 /* read the contents of a file into the copy */
@@ -865,26 +1118,28 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
865 1118
866 old_fs = get_fs(); 1119 old_fs = get_fs();
867 set_fs(KERNEL_DS); 1120 set_fs(KERNEL_DS);
868 ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos); 1121 ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos);
869 set_fs(old_fs); 1122 set_fs(old_fs);
870 1123
871 if (ret < 0) 1124 if (ret < 0)
872 goto error_free; 1125 goto error_free;
873 1126
874 /* clear the last little bit */ 1127 /* clear the last little bit */
875 if (ret < len) 1128 if (ret < rlen)
876 memset(base + ret, 0, len - ret); 1129 memset(base + ret, 0, rlen - ret);
877 1130
878 } else { 1131 } else {
879 /* if it's an anonymous mapping, then just clear it */ 1132 /* if it's an anonymous mapping, then just clear it */
880 memset(base, 0, len); 1133 memset(base, 0, rlen);
881 } 1134 }
882 1135
883 return 0; 1136 return 0;
884 1137
885error_free: 1138error_free:
886 kfree(base); 1139 free_page_series(region->vm_start, region->vm_end);
887 vma->vm_start = 0; 1140 region->vm_start = vma->vm_start = 0;
1141 region->vm_end = vma->vm_end = 0;
1142 region->vm_top = 0;
888 return ret; 1143 return ret;
889 1144
890enomem: 1145enomem:
@@ -904,13 +1159,14 @@ unsigned long do_mmap_pgoff(struct file *file,
904 unsigned long flags, 1159 unsigned long flags,
905 unsigned long pgoff) 1160 unsigned long pgoff)
906{ 1161{
907 struct vm_list_struct *vml = NULL; 1162 struct vm_area_struct *vma;
908 struct vm_area_struct *vma = NULL; 1163 struct vm_region *region;
909 struct rb_node *rb; 1164 struct rb_node *rb;
910 unsigned long capabilities, vm_flags; 1165 unsigned long capabilities, vm_flags, result;
911 void *result;
912 int ret; 1166 int ret;
913 1167
1168 kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
1169
914 if (!(flags & MAP_FIXED)) 1170 if (!(flags & MAP_FIXED))
915 addr = round_hint_to_min(addr); 1171 addr = round_hint_to_min(addr);
916 1172
@@ -918,73 +1174,120 @@ unsigned long do_mmap_pgoff(struct file *file,
918 * mapping */ 1174 * mapping */
919 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1175 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
920 &capabilities); 1176 &capabilities);
921 if (ret < 0) 1177 if (ret < 0) {
1178 kleave(" = %d [val]", ret);
922 return ret; 1179 return ret;
1180 }
923 1181
924 /* we've determined that we can make the mapping, now translate what we 1182 /* we've determined that we can make the mapping, now translate what we
925 * now know into VMA flags */ 1183 * now know into VMA flags */
926 vm_flags = determine_vm_flags(file, prot, flags, capabilities); 1184 vm_flags = determine_vm_flags(file, prot, flags, capabilities);
927 1185
928 /* we're going to need to record the mapping if it works */ 1186 /* we're going to need to record the mapping */
929 vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL); 1187 region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
930 if (!vml) 1188 if (!region)
931 goto error_getting_vml; 1189 goto error_getting_region;
1190
1191 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1192 if (!vma)
1193 goto error_getting_vma;
1194
1195 atomic_set(&region->vm_usage, 1);
1196 region->vm_flags = vm_flags;
1197 region->vm_pgoff = pgoff;
1198
1199 INIT_LIST_HEAD(&vma->anon_vma_node);
1200 vma->vm_flags = vm_flags;
1201 vma->vm_pgoff = pgoff;
932 1202
933 down_write(&nommu_vma_sem); 1203 if (file) {
1204 region->vm_file = file;
1205 get_file(file);
1206 vma->vm_file = file;
1207 get_file(file);
1208 if (vm_flags & VM_EXECUTABLE) {
1209 added_exe_file_vma(current->mm);
1210 vma->vm_mm = current->mm;
1211 }
1212 }
934 1213
935 /* if we want to share, we need to check for VMAs created by other 1214 down_write(&nommu_region_sem);
1215
1216 /* if we want to share, we need to check for regions created by other
936 * mmap() calls that overlap with our proposed mapping 1217 * mmap() calls that overlap with our proposed mapping
937 * - we can only share with an exact match on most regular files 1218 * - we can only share with a superset match on most regular files
938 * - shared mappings on character devices and memory backed files are 1219 * - shared mappings on character devices and memory backed files are
939 * permitted to overlap inexactly as far as we are concerned for in 1220 * permitted to overlap inexactly as far as we are concerned for in
940 * these cases, sharing is handled in the driver or filesystem rather 1221 * these cases, sharing is handled in the driver or filesystem rather
941 * than here 1222 * than here
942 */ 1223 */
943 if (vm_flags & VM_MAYSHARE) { 1224 if (vm_flags & VM_MAYSHARE) {
944 unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1225 struct vm_region *pregion;
945 unsigned long vmpglen; 1226 unsigned long pglen, rpglen, pgend, rpgend, start;
946 1227
947 /* suppress VMA sharing for shared regions */ 1228 pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
948 if (vm_flags & VM_SHARED && 1229 pgend = pgoff + pglen;
949 capabilities & BDI_CAP_MAP_DIRECT)
950 goto dont_share_VMAs;
951 1230
952 for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { 1231 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
953 vma = rb_entry(rb, struct vm_area_struct, vm_rb); 1232 pregion = rb_entry(rb, struct vm_region, vm_rb);
954 1233
955 if (!(vma->vm_flags & VM_MAYSHARE)) 1234 if (!(pregion->vm_flags & VM_MAYSHARE))
956 continue; 1235 continue;
957 1236
958 /* search for overlapping mappings on the same file */ 1237 /* search for overlapping mappings on the same file */
959 if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode) 1238 if (pregion->vm_file->f_path.dentry->d_inode !=
1239 file->f_path.dentry->d_inode)
960 continue; 1240 continue;
961 1241
962 if (vma->vm_pgoff >= pgoff + pglen) 1242 if (pregion->vm_pgoff >= pgend)
963 continue; 1243 continue;
964 1244
965 vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1; 1245 rpglen = pregion->vm_end - pregion->vm_start;
966 vmpglen >>= PAGE_SHIFT; 1246 rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT;
967 if (pgoff >= vma->vm_pgoff + vmpglen) 1247 rpgend = pregion->vm_pgoff + rpglen;
1248 if (pgoff >= rpgend)
968 continue; 1249 continue;
969 1250
970 /* handle inexactly overlapping matches between mappings */ 1251 /* handle inexactly overlapping matches between
971 if (vma->vm_pgoff != pgoff || vmpglen != pglen) { 1252 * mappings */
1253 if ((pregion->vm_pgoff != pgoff || rpglen != pglen) &&
1254 !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) {
1255 /* new mapping is not a subset of the region */
972 if (!(capabilities & BDI_CAP_MAP_DIRECT)) 1256 if (!(capabilities & BDI_CAP_MAP_DIRECT))
973 goto sharing_violation; 1257 goto sharing_violation;
974 continue; 1258 continue;
975 } 1259 }
976 1260
977 /* we've found a VMA we can share */ 1261 /* we've found a region we can share */
978 atomic_inc(&vma->vm_usage); 1262 atomic_inc(&pregion->vm_usage);
979 1263 vma->vm_region = pregion;
980 vml->vma = vma; 1264 start = pregion->vm_start;
981 result = (void *) vma->vm_start; 1265 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
982 goto shared; 1266 vma->vm_start = start;
1267 vma->vm_end = start + len;
1268
1269 if (pregion->vm_flags & VM_MAPPED_COPY) {
1270 kdebug("share copy");
1271 vma->vm_flags |= VM_MAPPED_COPY;
1272 } else {
1273 kdebug("share mmap");
1274 ret = do_mmap_shared_file(vma);
1275 if (ret < 0) {
1276 vma->vm_region = NULL;
1277 vma->vm_start = 0;
1278 vma->vm_end = 0;
1279 atomic_dec(&pregion->vm_usage);
1280 pregion = NULL;
1281 goto error_just_free;
1282 }
1283 }
1284 fput(region->vm_file);
1285 kmem_cache_free(vm_region_jar, region);
1286 region = pregion;
1287 result = start;
1288 goto share;
983 } 1289 }
984 1290
985 dont_share_VMAs:
986 vma = NULL;
987
988 /* obtain the address at which to make a shared mapping 1291 /* obtain the address at which to make a shared mapping
989 * - this is the hook for quasi-memory character devices to 1292 * - this is the hook for quasi-memory character devices to
990 * tell us the location of a shared mapping 1293 * tell us the location of a shared mapping
@@ -995,113 +1298,93 @@ unsigned long do_mmap_pgoff(struct file *file,
995 if (IS_ERR((void *) addr)) { 1298 if (IS_ERR((void *) addr)) {
996 ret = addr; 1299 ret = addr;
997 if (ret != (unsigned long) -ENOSYS) 1300 if (ret != (unsigned long) -ENOSYS)
998 goto error; 1301 goto error_just_free;
999 1302
1000 /* the driver refused to tell us where to site 1303 /* the driver refused to tell us where to site
1001 * the mapping so we'll have to attempt to copy 1304 * the mapping so we'll have to attempt to copy
1002 * it */ 1305 * it */
1003 ret = (unsigned long) -ENODEV; 1306 ret = (unsigned long) -ENODEV;
1004 if (!(capabilities & BDI_CAP_MAP_COPY)) 1307 if (!(capabilities & BDI_CAP_MAP_COPY))
1005 goto error; 1308 goto error_just_free;
1006 1309
1007 capabilities &= ~BDI_CAP_MAP_DIRECT; 1310 capabilities &= ~BDI_CAP_MAP_DIRECT;
1311 } else {
1312 vma->vm_start = region->vm_start = addr;
1313 vma->vm_end = region->vm_end = addr + len;
1008 } 1314 }
1009 } 1315 }
1010 } 1316 }
1011 1317
1012 /* we're going to need a VMA struct as well */ 1318 vma->vm_region = region;
1013 vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
1014 if (!vma)
1015 goto error_getting_vma;
1016
1017 INIT_LIST_HEAD(&vma->anon_vma_node);
1018 atomic_set(&vma->vm_usage, 1);
1019 if (file) {
1020 get_file(file);
1021 if (vm_flags & VM_EXECUTABLE) {
1022 added_exe_file_vma(current->mm);
1023 vma->vm_mm = current->mm;
1024 }
1025 }
1026 vma->vm_file = file;
1027 vma->vm_flags = vm_flags;
1028 vma->vm_start = addr;
1029 vma->vm_end = addr + len;
1030 vma->vm_pgoff = pgoff;
1031
1032 vml->vma = vma;
1033 1319
1034 /* set up the mapping */ 1320 /* set up the mapping */
1035 if (file && vma->vm_flags & VM_SHARED) 1321 if (file && vma->vm_flags & VM_SHARED)
1036 ret = do_mmap_shared_file(vma, len); 1322 ret = do_mmap_shared_file(vma);
1037 else 1323 else
1038 ret = do_mmap_private(vma, len); 1324 ret = do_mmap_private(vma, region, len);
1039 if (ret < 0) 1325 if (ret < 0)
1040 goto error; 1326 goto error_put_region;
1041
1042 /* okay... we have a mapping; now we have to register it */
1043 result = (void *) vma->vm_start;
1044 1327
1045 if (vma->vm_flags & VM_MAPPED_COPY) { 1328 add_nommu_region(region);
1046 realalloc += kobjsize(result);
1047 askedalloc += len;
1048 }
1049 1329
1050 realalloc += kobjsize(vma); 1330 /* okay... we have a mapping; now we have to register it */
1051 askedalloc += sizeof(*vma); 1331 result = vma->vm_start;
1052 1332
1053 current->mm->total_vm += len >> PAGE_SHIFT; 1333 current->mm->total_vm += len >> PAGE_SHIFT;
1054 1334
1055 add_nommu_vma(vma); 1335share:
1056 1336 add_vma_to_mm(current->mm, vma);
1057 shared:
1058 realalloc += kobjsize(vml);
1059 askedalloc += sizeof(*vml);
1060
1061 add_vma_to_mm(current->mm, vml);
1062 1337
1063 up_write(&nommu_vma_sem); 1338 up_write(&nommu_region_sem);
1064 1339
1065 if (prot & PROT_EXEC) 1340 if (prot & PROT_EXEC)
1066 flush_icache_range((unsigned long) result, 1341 flush_icache_range(result, result + len);
1067 (unsigned long) result + len);
1068 1342
1069#ifdef DEBUG 1343 kleave(" = %lx", result);
1070 printk("do_mmap:\n"); 1344 return result;
1071 show_process_blocks();
1072#endif
1073
1074 return (unsigned long) result;
1075 1345
1076 error: 1346error_put_region:
1077 up_write(&nommu_vma_sem); 1347 __put_nommu_region(region);
1078 kfree(vml);
1079 if (vma) { 1348 if (vma) {
1080 if (vma->vm_file) { 1349 if (vma->vm_file) {
1081 fput(vma->vm_file); 1350 fput(vma->vm_file);
1082 if (vma->vm_flags & VM_EXECUTABLE) 1351 if (vma->vm_flags & VM_EXECUTABLE)
1083 removed_exe_file_vma(vma->vm_mm); 1352 removed_exe_file_vma(vma->vm_mm);
1084 } 1353 }
1085 kfree(vma); 1354 kmem_cache_free(vm_area_cachep, vma);
1086 } 1355 }
1356 kleave(" = %d [pr]", ret);
1087 return ret; 1357 return ret;
1088 1358
1089 sharing_violation: 1359error_just_free:
1090 up_write(&nommu_vma_sem); 1360 up_write(&nommu_region_sem);
1091 printk("Attempt to share mismatched mappings\n"); 1361error:
1092 kfree(vml); 1362 fput(region->vm_file);
1093 return -EINVAL; 1363 kmem_cache_free(vm_region_jar, region);
1364 fput(vma->vm_file);
1365 if (vma->vm_flags & VM_EXECUTABLE)
1366 removed_exe_file_vma(vma->vm_mm);
1367 kmem_cache_free(vm_area_cachep, vma);
1368 kleave(" = %d", ret);
1369 return ret;
1094 1370
1095 error_getting_vma: 1371sharing_violation:
1096 up_write(&nommu_vma_sem); 1372 up_write(&nommu_region_sem);
1097 kfree(vml); 1373 printk(KERN_WARNING "Attempt to share mismatched mappings\n");
1098 printk("Allocation of vma for %lu byte allocation from process %d failed\n", 1374 ret = -EINVAL;
1375 goto error;
1376
1377error_getting_vma:
1378 kmem_cache_free(vm_region_jar, region);
1379 printk(KERN_WARNING "Allocation of vma for %lu byte allocation"
1380 " from process %d failed\n",
1099 len, current->pid); 1381 len, current->pid);
1100 show_free_areas(); 1382 show_free_areas();
1101 return -ENOMEM; 1383 return -ENOMEM;
1102 1384
1103 error_getting_vml: 1385error_getting_region:
1104 printk("Allocation of vml for %lu byte allocation from process %d failed\n", 1386 printk(KERN_WARNING "Allocation of vm region for %lu byte allocation"
1387 " from process %d failed\n",
1105 len, current->pid); 1388 len, current->pid);
1106 show_free_areas(); 1389 show_free_areas();
1107 return -ENOMEM; 1390 return -ENOMEM;
@@ -1109,85 +1392,183 @@ unsigned long do_mmap_pgoff(struct file *file,
1109EXPORT_SYMBOL(do_mmap_pgoff); 1392EXPORT_SYMBOL(do_mmap_pgoff);
1110 1393
1111/* 1394/*
1112 * handle mapping disposal for uClinux 1395 * split a vma into two pieces at address 'addr', a new vma is allocated either
1396 * for the first part or the tail.
1113 */ 1397 */
1114static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma) 1398int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
1399 unsigned long addr, int new_below)
1115{ 1400{
1116 if (vma) { 1401 struct vm_area_struct *new;
1117 down_write(&nommu_vma_sem); 1402 struct vm_region *region;
1403 unsigned long npages;
1118 1404
1119 if (atomic_dec_and_test(&vma->vm_usage)) { 1405 kenter("");
1120 delete_nommu_vma(vma);
1121 1406
1122 if (vma->vm_ops && vma->vm_ops->close) 1407 /* we're only permitted to split anonymous regions that have a single
1123 vma->vm_ops->close(vma); 1408 * owner */
1409 if (vma->vm_file ||
1410 atomic_read(&vma->vm_region->vm_usage) != 1)
1411 return -ENOMEM;
1124 1412
1125 /* IO memory and memory shared directly out of the pagecache from 1413 if (mm->map_count >= sysctl_max_map_count)
1126 * ramfs/tmpfs mustn't be released here */ 1414 return -ENOMEM;
1127 if (vma->vm_flags & VM_MAPPED_COPY) {
1128 realalloc -= kobjsize((void *) vma->vm_start);
1129 askedalloc -= vma->vm_end - vma->vm_start;
1130 kfree((void *) vma->vm_start);
1131 }
1132 1415
1133 realalloc -= kobjsize(vma); 1416 region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL);
1134 askedalloc -= sizeof(*vma); 1417 if (!region)
1418 return -ENOMEM;
1135 1419
1136 if (vma->vm_file) { 1420 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1137 fput(vma->vm_file); 1421 if (!new) {
1138 if (vma->vm_flags & VM_EXECUTABLE) 1422 kmem_cache_free(vm_region_jar, region);
1139 removed_exe_file_vma(mm); 1423 return -ENOMEM;
1140 } 1424 }
1141 kfree(vma); 1425
1142 } 1426 /* most fields are the same, copy all, and then fixup */
1427 *new = *vma;
1428 *region = *vma->vm_region;
1429 new->vm_region = region;
1430
1431 npages = (addr - vma->vm_start) >> PAGE_SHIFT;
1143 1432
1144 up_write(&nommu_vma_sem); 1433 if (new_below) {
1434 region->vm_top = region->vm_end = new->vm_end = addr;
1435 } else {
1436 region->vm_start = new->vm_start = addr;
1437 region->vm_pgoff = new->vm_pgoff += npages;
1438 }
1439
1440 if (new->vm_ops && new->vm_ops->open)
1441 new->vm_ops->open(new);
1442
1443 delete_vma_from_mm(vma);
1444 down_write(&nommu_region_sem);
1445 delete_nommu_region(vma->vm_region);
1446 if (new_below) {
1447 vma->vm_region->vm_start = vma->vm_start = addr;
1448 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages;
1449 } else {
1450 vma->vm_region->vm_end = vma->vm_end = addr;
1451 vma->vm_region->vm_top = addr;
1145 } 1452 }
1453 add_nommu_region(vma->vm_region);
1454 add_nommu_region(new->vm_region);
1455 up_write(&nommu_region_sem);
1456 add_vma_to_mm(mm, vma);
1457 add_vma_to_mm(mm, new);
1458 return 0;
1146} 1459}
1147 1460
1148/* 1461/*
1149 * release a mapping 1462 * shrink a VMA by removing the specified chunk from either the beginning or
1150 * - under NOMMU conditions the parameters must match exactly to the mapping to 1463 * the end
1151 * be removed
1152 */ 1464 */
1153int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) 1465static int shrink_vma(struct mm_struct *mm,
1466 struct vm_area_struct *vma,
1467 unsigned long from, unsigned long to)
1154{ 1468{
1155 struct vm_list_struct *vml, **parent; 1469 struct vm_region *region;
1156 unsigned long end = addr + len;
1157 1470
1158#ifdef DEBUG 1471 kenter("");
1159 printk("do_munmap:\n");
1160#endif
1161 1472
1162 for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) { 1473 /* adjust the VMA's pointers, which may reposition it in the MM's tree
1163 if ((*parent)->vma->vm_start > addr) 1474 * and list */
1164 break; 1475 delete_vma_from_mm(vma);
1165 if ((*parent)->vma->vm_start == addr && 1476 if (from > vma->vm_start)
1166 ((len == 0) || ((*parent)->vma->vm_end == end))) 1477 vma->vm_end = from;
1167 goto found; 1478 else
1479 vma->vm_start = to;
1480 add_vma_to_mm(mm, vma);
1481
1482 /* cut the backing region down to size */
1483 region = vma->vm_region;
1484 BUG_ON(atomic_read(&region->vm_usage) != 1);
1485
1486 down_write(&nommu_region_sem);
1487 delete_nommu_region(region);
1488 if (from > region->vm_start) {
1489 to = region->vm_top;
1490 region->vm_top = region->vm_end = from;
1491 } else {
1492 region->vm_start = to;
1168 } 1493 }
1494 add_nommu_region(region);
1495 up_write(&nommu_region_sem);
1169 1496
1170 printk("munmap of non-mmaped memory by process %d (%s): %p\n", 1497 free_page_series(from, to);
1171 current->pid, current->comm, (void *) addr); 1498 return 0;
1172 return -EINVAL; 1499}
1173 1500
1174 found: 1501/*
1175 vml = *parent; 1502 * release a mapping
1503 * - under NOMMU conditions the chunk to be unmapped must be backed by a single
1504 * VMA, though it need not cover the whole VMA
1505 */
1506int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1507{
1508 struct vm_area_struct *vma;
1509 struct rb_node *rb;
1510 unsigned long end = start + len;
1511 int ret;
1176 1512
1177 put_vma(mm, vml->vma); 1513 kenter(",%lx,%zx", start, len);
1178 1514
1179 *parent = vml->next; 1515 if (len == 0)
1180 realalloc -= kobjsize(vml); 1516 return -EINVAL;
1181 askedalloc -= sizeof(*vml);
1182 kfree(vml);
1183 1517
1184 update_hiwater_vm(mm); 1518 /* find the first potentially overlapping VMA */
1185 mm->total_vm -= len >> PAGE_SHIFT; 1519 vma = find_vma(mm, start);
1520 if (!vma) {
1521 printk(KERN_WARNING
1522 "munmap of memory not mmapped by process %d (%s):"
1523 " 0x%lx-0x%lx\n",
1524 current->pid, current->comm, start, start + len - 1);
1525 return -EINVAL;
1526 }
1186 1527
1187#ifdef DEBUG 1528 /* we're allowed to split an anonymous VMA but not a file-backed one */
1188 show_process_blocks(); 1529 if (vma->vm_file) {
1189#endif 1530 do {
1531 if (start > vma->vm_start) {
1532 kleave(" = -EINVAL [miss]");
1533 return -EINVAL;
1534 }
1535 if (end == vma->vm_end)
1536 goto erase_whole_vma;
1537 rb = rb_next(&vma->vm_rb);
1538 vma = rb_entry(rb, struct vm_area_struct, vm_rb);
1539 } while (rb);
1540 kleave(" = -EINVAL [split file]");
1541 return -EINVAL;
1542 } else {
1543 /* the chunk must be a subset of the VMA found */
1544 if (start == vma->vm_start && end == vma->vm_end)
1545 goto erase_whole_vma;
1546 if (start < vma->vm_start || end > vma->vm_end) {
1547 kleave(" = -EINVAL [superset]");
1548 return -EINVAL;
1549 }
1550 if (start & ~PAGE_MASK) {
1551 kleave(" = -EINVAL [unaligned start]");
1552 return -EINVAL;
1553 }
1554 if (end != vma->vm_end && end & ~PAGE_MASK) {
1555 kleave(" = -EINVAL [unaligned split]");
1556 return -EINVAL;
1557 }
1558 if (start != vma->vm_start && end != vma->vm_end) {
1559 ret = split_vma(mm, vma, start, 1);
1560 if (ret < 0) {
1561 kleave(" = %d [split]", ret);
1562 return ret;
1563 }
1564 }
1565 return shrink_vma(mm, vma, start, end);
1566 }
1190 1567
1568erase_whole_vma:
1569 delete_vma_from_mm(vma);
1570 delete_vma(mm, vma);
1571 kleave(" = 0");
1191 return 0; 1572 return 0;
1192} 1573}
1193EXPORT_SYMBOL(do_munmap); 1574EXPORT_SYMBOL(do_munmap);
@@ -1204,32 +1585,26 @@ asmlinkage long sys_munmap(unsigned long addr, size_t len)
1204} 1585}
1205 1586
1206/* 1587/*
1207 * Release all mappings 1588 * release all the mappings made in a process's VM space
1208 */ 1589 */
1209void exit_mmap(struct mm_struct * mm) 1590void exit_mmap(struct mm_struct *mm)
1210{ 1591{
1211 struct vm_list_struct *tmp; 1592 struct vm_area_struct *vma;
1212
1213 if (mm) {
1214#ifdef DEBUG
1215 printk("Exit_mmap:\n");
1216#endif
1217 1593
1218 mm->total_vm = 0; 1594 if (!mm)
1595 return;
1219 1596
1220 while ((tmp = mm->context.vmlist)) { 1597 kenter("");
1221 mm->context.vmlist = tmp->next;
1222 put_vma(mm, tmp->vma);
1223 1598
1224 realalloc -= kobjsize(tmp); 1599 mm->total_vm = 0;
1225 askedalloc -= sizeof(*tmp);
1226 kfree(tmp);
1227 }
1228 1600
1229#ifdef DEBUG 1601 while ((vma = mm->mmap)) {
1230 show_process_blocks(); 1602 mm->mmap = vma->vm_next;
1231#endif 1603 delete_vma_from_mm(vma);
1604 delete_vma(mm, vma);
1232 } 1605 }
1606
1607 kleave("");
1233} 1608}
1234 1609
1235unsigned long do_brk(unsigned long addr, unsigned long len) 1610unsigned long do_brk(unsigned long addr, unsigned long len)
@@ -1242,8 +1617,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
1242 * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 1617 * time (controlled by the MREMAP_MAYMOVE flag and available VM space)
1243 * 1618 *
1244 * under NOMMU conditions, we only permit changing a mapping's size, and only 1619 * under NOMMU conditions, we only permit changing a mapping's size, and only
1245 * as long as it stays within the hole allocated by the kmalloc() call in 1620 * as long as it stays within the region allocated by do_mmap_private() and the
1246 * do_mmap_pgoff() and the block is not shareable 1621 * block is not shareable
1247 * 1622 *
1248 * MREMAP_FIXED is not supported under NOMMU conditions 1623 * MREMAP_FIXED is not supported under NOMMU conditions
1249 */ 1624 */
@@ -1254,13 +1629,16 @@ unsigned long do_mremap(unsigned long addr,
1254 struct vm_area_struct *vma; 1629 struct vm_area_struct *vma;
1255 1630
1256 /* insanity checks first */ 1631 /* insanity checks first */
1257 if (new_len == 0) 1632 if (old_len == 0 || new_len == 0)
1258 return (unsigned long) -EINVAL; 1633 return (unsigned long) -EINVAL;
1259 1634
1635 if (addr & ~PAGE_MASK)
1636 return -EINVAL;
1637
1260 if (flags & MREMAP_FIXED && new_addr != addr) 1638 if (flags & MREMAP_FIXED && new_addr != addr)
1261 return (unsigned long) -EINVAL; 1639 return (unsigned long) -EINVAL;
1262 1640
1263 vma = find_vma_exact(current->mm, addr); 1641 vma = find_vma_exact(current->mm, addr, old_len);
1264 if (!vma) 1642 if (!vma)
1265 return (unsigned long) -EINVAL; 1643 return (unsigned long) -EINVAL;
1266 1644
@@ -1270,22 +1648,19 @@ unsigned long do_mremap(unsigned long addr,
1270 if (vma->vm_flags & VM_MAYSHARE) 1648 if (vma->vm_flags & VM_MAYSHARE)
1271 return (unsigned long) -EPERM; 1649 return (unsigned long) -EPERM;
1272 1650
1273 if (new_len > kobjsize((void *) addr)) 1651 if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start)
1274 return (unsigned long) -ENOMEM; 1652 return (unsigned long) -ENOMEM;
1275 1653
1276 /* all checks complete - do it */ 1654 /* all checks complete - do it */
1277 vma->vm_end = vma->vm_start + new_len; 1655 vma->vm_end = vma->vm_start + new_len;
1278
1279 askedalloc -= old_len;
1280 askedalloc += new_len;
1281
1282 return vma->vm_start; 1656 return vma->vm_start;
1283} 1657}
1284EXPORT_SYMBOL(do_mremap); 1658EXPORT_SYMBOL(do_mremap);
1285 1659
1286asmlinkage unsigned long sys_mremap(unsigned long addr, 1660asmlinkage
1287 unsigned long old_len, unsigned long new_len, 1661unsigned long sys_mremap(unsigned long addr,
1288 unsigned long flags, unsigned long new_addr) 1662 unsigned long old_len, unsigned long new_len,
1663 unsigned long flags, unsigned long new_addr)
1289{ 1664{
1290 unsigned long ret; 1665 unsigned long ret;
1291 1666