aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOllie Wild <aaw@google.com>2007-07-19 04:48:16 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-19 13:04:45 -0400
commitb6a2fea39318e43fee84fa7b0b90d68bed92d2ba (patch)
treec9c3619cb2730b5c10c7427b837146bce3d69156
parentbdf4c48af20a3b0f01671799ace345e3d49576da (diff)
mm: variable length argument support
Remove the arg+env limit of MAX_ARG_PAGES by copying the strings directly from the old mm into the new mm. We create the new mm before the binfmt code runs, and place the new stack at the very top of the address space. Once the binfmt code runs and figures out where the stack should be, we move it downwards. It is a bit peculiar in that we have one task with two mm's, one of which is inactive. [a.p.zijlstra@chello.nl: limit stack size] Signed-off-by: Ollie Wild <aaw@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: <linux-arch@vger.kernel.org> Cc: Hugh Dickins <hugh@veritas.com> [bunk@stusta.de: unexport bprm_mm_init] Signed-off-by: Adrian Bunk <bunk@stusta.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/ia64/ia32/binfmt_elf32.c67
-rw-r--r--arch/x86_64/ia32/ia32_aout.c2
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c58
-rw-r--r--fs/binfmt_elf.c28
-rw-r--r--fs/binfmt_elf_fdpic.c8
-rw-r--r--fs/binfmt_misc.c4
-rw-r--r--fs/binfmt_script.c4
-rw-r--r--fs/compat.c128
-rw-r--r--fs/exec.c614
-rw-r--r--include/linux/binfmts.h18
-rw-r--r--include/linux/mm.h9
-rw-r--r--kernel/auditsc.c2
-rw-r--r--mm/mmap.c61
-rw-r--r--mm/mprotect.c2
-rw-r--r--mm/mremap.c2
15 files changed, 554 insertions, 453 deletions
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index 6f4d3d06f0ed..e1189ba1ca5e 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -195,62 +195,27 @@ ia64_elf32_init (struct pt_regs *regs)
195 ia32_load_state(current); 195 ia32_load_state(current);
196} 196}
197 197
198/*
199 * Undo the override of setup_arg_pages() without this ia32_setup_arg_pages()
200 * will suffer infinite self recursion.
201 */
202#undef setup_arg_pages
203
198int 204int
199ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) 205ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
200{ 206{
201 unsigned long stack_base; 207 int ret;
202 struct vm_area_struct *mpnt; 208
203 struct mm_struct *mm = current->mm; 209 ret = setup_arg_pages(bprm, IA32_STACK_TOP, executable_stack);
204 int i, ret; 210 if (!ret) {
205 211 /*
206 stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE; 212 * Can't do it in ia64_elf32_init(). Needs to be done before
207 mm->arg_start = bprm->p + stack_base; 213 * calls to elf32_map()
208 214 */
209 bprm->p += stack_base; 215 current->thread.ppl = ia32_init_pp_list();
210 if (bprm->loader)
211 bprm->loader += stack_base;
212 bprm->exec += stack_base;
213
214 mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
215 if (!mpnt)
216 return -ENOMEM;
217
218 down_write(&current->mm->mmap_sem);
219 {
220 mpnt->vm_mm = current->mm;
221 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
222 mpnt->vm_end = IA32_STACK_TOP;
223 if (executable_stack == EXSTACK_ENABLE_X)
224 mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
225 else if (executable_stack == EXSTACK_DISABLE_X)
226 mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
227 else
228 mpnt->vm_flags = VM_STACK_FLAGS;
229 mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
230 PAGE_COPY_EXEC: PAGE_COPY;
231 if ((ret = insert_vm_struct(current->mm, mpnt))) {
232 up_write(&current->mm->mmap_sem);
233 kmem_cache_free(vm_area_cachep, mpnt);
234 return ret;
235 }
236 current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
237 } 216 }
238 217
239 for (i = 0 ; i < MAX_ARG_PAGES ; i++) { 218 return ret;
240 struct page *page = bprm->page[i];
241 if (page) {
242 bprm->page[i] = NULL;
243 install_arg_page(mpnt, page, stack_base);
244 }
245 stack_base += PAGE_SIZE;
246 }
247 up_write(&current->mm->mmap_sem);
248
249 /* Can't do it in ia64_elf32_init(). Needs to be done before calls to
250 elf32_map() */
251 current->thread.ppl = ia32_init_pp_list();
252
253 return 0;
254} 219}
255 220
256static void 221static void
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c
index fe83edb93c10..08781370256d 100644
--- a/arch/x86_64/ia32/ia32_aout.c
+++ b/arch/x86_64/ia32/ia32_aout.c
@@ -404,7 +404,7 @@ beyond_if:
404 404
405 set_brk(current->mm->start_brk, current->mm->brk); 405 set_brk(current->mm->start_brk, current->mm->brk);
406 406
407 retval = ia32_setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); 407 retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
408 if (retval < 0) { 408 if (retval < 0) {
409 /* Someone check-me: is this error path enough? */ 409 /* Someone check-me: is this error path enough? */
410 send_sig(SIGKILL, current, 0); 410 send_sig(SIGKILL, current, 0);
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 185399baaf6d..ed56a8806eab 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -232,9 +232,6 @@ do { \
232#define load_elf_binary load_elf32_binary 232#define load_elf_binary load_elf32_binary
233 233
234#define ELF_PLAT_INIT(r, load_addr) elf32_init(r) 234#define ELF_PLAT_INIT(r, load_addr) elf32_init(r)
235#define setup_arg_pages(bprm, stack_top, exec_stack) \
236 ia32_setup_arg_pages(bprm, stack_top, exec_stack)
237int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack);
238 235
239#undef start_thread 236#undef start_thread
240#define start_thread(regs,new_rip,new_rsp) do { \ 237#define start_thread(regs,new_rip,new_rsp) do { \
@@ -286,61 +283,6 @@ static void elf32_init(struct pt_regs *regs)
286 me->thread.es = __USER_DS; 283 me->thread.es = __USER_DS;
287} 284}
288 285
289int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
290 int executable_stack)
291{
292 unsigned long stack_base;
293 struct vm_area_struct *mpnt;
294 struct mm_struct *mm = current->mm;
295 int i, ret;
296
297 stack_base = stack_top - MAX_ARG_PAGES * PAGE_SIZE;
298 mm->arg_start = bprm->p + stack_base;
299
300 bprm->p += stack_base;
301 if (bprm->loader)
302 bprm->loader += stack_base;
303 bprm->exec += stack_base;
304
305 mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
306 if (!mpnt)
307 return -ENOMEM;
308
309 down_write(&mm->mmap_sem);
310 {
311 mpnt->vm_mm = mm;
312 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
313 mpnt->vm_end = stack_top;
314 if (executable_stack == EXSTACK_ENABLE_X)
315 mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
316 else if (executable_stack == EXSTACK_DISABLE_X)
317 mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
318 else
319 mpnt->vm_flags = VM_STACK_FLAGS;
320 mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ?
321 PAGE_COPY_EXEC : PAGE_COPY;
322 if ((ret = insert_vm_struct(mm, mpnt))) {
323 up_write(&mm->mmap_sem);
324 kmem_cache_free(vm_area_cachep, mpnt);
325 return ret;
326 }
327 mm->stack_vm = mm->total_vm = vma_pages(mpnt);
328 }
329
330 for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
331 struct page *page = bprm->page[i];
332 if (page) {
333 bprm->page[i] = NULL;
334 install_arg_page(mpnt, page, stack_base);
335 }
336 stack_base += PAGE_SIZE;
337 }
338 up_write(&mm->mmap_sem);
339
340 return 0;
341}
342EXPORT_SYMBOL(ia32_setup_arg_pages);
343
344#ifdef CONFIG_SYSCTL 286#ifdef CONFIG_SYSCTL
345/* Register vsyscall32 into the ABI table */ 287/* Register vsyscall32 into the ABI table */
346#include <linux/sysctl.h> 288#include <linux/sysctl.h>
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a27e42bf3400..295cbaa0e58a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -148,6 +148,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
148 elf_addr_t *elf_info; 148 elf_addr_t *elf_info;
149 int ei_index = 0; 149 int ei_index = 0;
150 struct task_struct *tsk = current; 150 struct task_struct *tsk = current;
151 struct vm_area_struct *vma;
151 152
152 /* 153 /*
153 * If this architecture has a platform capability string, copy it 154 * If this architecture has a platform capability string, copy it
@@ -234,6 +235,15 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
234 sp = (elf_addr_t __user *)bprm->p; 235 sp = (elf_addr_t __user *)bprm->p;
235#endif 236#endif
236 237
238
239 /*
240 * Grow the stack manually; some architectures have a limit on how
241 * far ahead a user-space access may be in order to grow the stack.
242 */
243 vma = find_extend_vma(current->mm, bprm->p);
244 if (!vma)
245 return -EFAULT;
246
237 /* Now, let's put argc (and argv, envp if appropriate) on the stack */ 247 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
238 if (__put_user(argc, sp++)) 248 if (__put_user(argc, sp++))
239 return -EFAULT; 249 return -EFAULT;
@@ -254,8 +264,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
254 size_t len; 264 size_t len;
255 if (__put_user((elf_addr_t)p, argv++)) 265 if (__put_user((elf_addr_t)p, argv++))
256 return -EFAULT; 266 return -EFAULT;
257 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES); 267 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
258 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) 268 if (!len || len > MAX_ARG_STRLEN)
259 return 0; 269 return 0;
260 p += len; 270 p += len;
261 } 271 }
@@ -266,8 +276,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
266 size_t len; 276 size_t len;
267 if (__put_user((elf_addr_t)p, envp++)) 277 if (__put_user((elf_addr_t)p, envp++))
268 return -EFAULT; 278 return -EFAULT;
269 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES); 279 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
270 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) 280 if (!len || len > MAX_ARG_STRLEN)
271 return 0; 281 return 0;
272 p += len; 282 p += len;
273 } 283 }
@@ -826,10 +836,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
826 } 836 }
827 837
828 /* OK, This is the point of no return */ 838 /* OK, This is the point of no return */
829 current->mm->start_data = 0;
830 current->mm->end_data = 0;
831 current->mm->end_code = 0;
832 current->mm->mmap = NULL;
833 current->flags &= ~PF_FORKNOEXEC; 839 current->flags &= ~PF_FORKNOEXEC;
834 current->mm->def_flags = def_flags; 840 current->mm->def_flags = def_flags;
835 841
@@ -1051,9 +1057,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
1051 1057
1052 compute_creds(bprm); 1058 compute_creds(bprm);
1053 current->flags &= ~PF_FORKNOEXEC; 1059 current->flags &= ~PF_FORKNOEXEC;
1054 create_elf_tables(bprm, &loc->elf_ex, 1060 retval = create_elf_tables(bprm, &loc->elf_ex,
1055 (interpreter_type == INTERPRETER_AOUT), 1061 (interpreter_type == INTERPRETER_AOUT),
1056 load_addr, interp_load_addr); 1062 load_addr, interp_load_addr);
1063 if (retval < 0) {
1064 send_sig(SIGKILL, current, 0);
1065 goto out;
1066 }
1057 /* N.B. passed_fileno might not be initialized? */ 1067 /* N.B. passed_fileno might not be initialized? */
1058 if (interpreter_type == INTERPRETER_AOUT) 1068 if (interpreter_type == INTERPRETER_AOUT)
1059 current->mm->arg_start += strlen(passed_fileno) + 1; 1069 current->mm->arg_start += strlen(passed_fileno) + 1;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9d62fbad3d4b..4739506fb083 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -621,8 +621,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
621 p = (char __user *) current->mm->arg_start; 621 p = (char __user *) current->mm->arg_start;
622 for (loop = bprm->argc; loop > 0; loop--) { 622 for (loop = bprm->argc; loop > 0; loop--) {
623 __put_user((elf_caddr_t) p, argv++); 623 __put_user((elf_caddr_t) p, argv++);
624 len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES); 624 len = strnlen_user(p, MAX_ARG_STRLEN);
625 if (!len || len > PAGE_SIZE * MAX_ARG_PAGES) 625 if (!len || len > MAX_ARG_STRLEN)
626 return -EINVAL; 626 return -EINVAL;
627 p += len; 627 p += len;
628 } 628 }
@@ -633,8 +633,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
633 current->mm->env_start = (unsigned long) p; 633 current->mm->env_start = (unsigned long) p;
634 for (loop = bprm->envc; loop > 0; loop--) { 634 for (loop = bprm->envc; loop > 0; loop--) {
635 __put_user((elf_caddr_t)(unsigned long) p, envp++); 635 __put_user((elf_caddr_t)(unsigned long) p, envp++);
636 len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES); 636 len = strnlen_user(p, MAX_ARG_STRLEN);
637 if (!len || len > PAGE_SIZE * MAX_ARG_PAGES) 637 if (!len || len > MAX_ARG_STRLEN)
638 return -EINVAL; 638 return -EINVAL;
639 p += len; 639 p += len;
640 } 640 }
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 330fd3fe8546..42e94b3ab7be 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -126,7 +126,9 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
126 goto _ret; 126 goto _ret;
127 127
128 if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { 128 if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
129 remove_arg_zero(bprm); 129 retval = remove_arg_zero(bprm);
130 if (retval)
131 goto _ret;
130 } 132 }
131 133
132 if (fmt->flags & MISC_FMT_OPEN_BINARY) { 134 if (fmt->flags & MISC_FMT_OPEN_BINARY) {
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 304c88544d89..4d0e0f6d3273 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -67,7 +67,9 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
67 * This is done in reverse order, because of how the 67 * This is done in reverse order, because of how the
68 * user environment and arguments are stored. 68 * user environment and arguments are stored.
69 */ 69 */
70 remove_arg_zero(bprm); 70 retval = remove_arg_zero(bprm);
71 if (retval)
72 return retval;
71 retval = copy_strings_kernel(1, &bprm->interp, bprm); 73 retval = copy_strings_kernel(1, &bprm->interp, bprm);
72 if (retval < 0) return retval; 74 if (retval < 0) return retval;
73 bprm->argc++; 75 bprm->argc++;
diff --git a/fs/compat.c b/fs/compat.c
index 4db6216e5266..15078ce4c04a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1257,6 +1257,7 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
1257{ 1257{
1258 struct page *kmapped_page = NULL; 1258 struct page *kmapped_page = NULL;
1259 char *kaddr = NULL; 1259 char *kaddr = NULL;
1260 unsigned long kpos = 0;
1260 int ret; 1261 int ret;
1261 1262
1262 while (argc-- > 0) { 1263 while (argc-- > 0) {
@@ -1265,92 +1266,84 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
1265 unsigned long pos; 1266 unsigned long pos;
1266 1267
1267 if (get_user(str, argv+argc) || 1268 if (get_user(str, argv+argc) ||
1268 !(len = strnlen_user(compat_ptr(str), bprm->p))) { 1269 !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
1269 ret = -EFAULT; 1270 ret = -EFAULT;
1270 goto out; 1271 goto out;
1271 } 1272 }
1272 1273
1273 if (bprm->p < len) { 1274 if (len > MAX_ARG_STRLEN) {
1274 ret = -E2BIG; 1275 ret = -E2BIG;
1275 goto out; 1276 goto out;
1276 } 1277 }
1277 1278
1278 bprm->p -= len; 1279 /* We're going to work our way backwords. */
1279 /* XXX: add architecture specific overflow check here. */
1280 pos = bprm->p; 1280 pos = bprm->p;
1281 str += len;
1282 bprm->p -= len;
1281 1283
1282 while (len > 0) { 1284 while (len > 0) {
1283 int i, new, err;
1284 int offset, bytes_to_copy; 1285 int offset, bytes_to_copy;
1285 struct page *page;
1286 1286
1287 offset = pos % PAGE_SIZE; 1287 offset = pos % PAGE_SIZE;
1288 i = pos/PAGE_SIZE; 1288 if (offset == 0)
1289 page = bprm->page[i]; 1289 offset = PAGE_SIZE;
1290 new = 0; 1290
1291 if (!page) { 1291 bytes_to_copy = offset;
1292 page = alloc_page(GFP_HIGHUSER); 1292 if (bytes_to_copy > len)
1293 bprm->page[i] = page; 1293 bytes_to_copy = len;
1294 if (!page) { 1294
1295 ret = -ENOMEM; 1295 offset -= bytes_to_copy;
1296 pos -= bytes_to_copy;
1297 str -= bytes_to_copy;
1298 len -= bytes_to_copy;
1299
1300 if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
1301 struct page *page;
1302
1303#ifdef CONFIG_STACK_GROWSUP
1304 ret = expand_stack_downwards(bprm->vma, pos);
1305 if (ret < 0) {
1306 /* We've exceed the stack rlimit. */
1307 ret = -E2BIG;
1308 goto out;
1309 }
1310#endif
1311 ret = get_user_pages(current, bprm->mm, pos,
1312 1, 1, 1, &page, NULL);
1313 if (ret <= 0) {
1314 /* We've exceed the stack rlimit. */
1315 ret = -E2BIG;
1296 goto out; 1316 goto out;
1297 } 1317 }
1298 new = 1;
1299 }
1300 1318
1301 if (page != kmapped_page) { 1319 if (kmapped_page) {
1302 if (kmapped_page) 1320 flush_kernel_dcache_page(kmapped_page);
1303 kunmap(kmapped_page); 1321 kunmap(kmapped_page);
1322 put_page(kmapped_page);
1323 }
1304 kmapped_page = page; 1324 kmapped_page = page;
1305 kaddr = kmap(kmapped_page); 1325 kaddr = kmap(kmapped_page);
1326 kpos = pos & PAGE_MASK;
1327 flush_cache_page(bprm->vma, kpos,
1328 page_to_pfn(kmapped_page));
1306 } 1329 }
1307 if (new && offset) 1330 if (copy_from_user(kaddr+offset, compat_ptr(str),
1308 memset(kaddr, 0, offset); 1331 bytes_to_copy)) {
1309 bytes_to_copy = PAGE_SIZE - offset;
1310 if (bytes_to_copy > len) {
1311 bytes_to_copy = len;
1312 if (new)
1313 memset(kaddr+offset+len, 0,
1314 PAGE_SIZE-offset-len);
1315 }
1316 err = copy_from_user(kaddr+offset, compat_ptr(str),
1317 bytes_to_copy);
1318 if (err) {
1319 ret = -EFAULT; 1332 ret = -EFAULT;
1320 goto out; 1333 goto out;
1321 } 1334 }
1322
1323 pos += bytes_to_copy;
1324 str += bytes_to_copy;
1325 len -= bytes_to_copy;
1326 } 1335 }
1327 } 1336 }
1328 ret = 0; 1337 ret = 0;
1329out: 1338out:
1330 if (kmapped_page) 1339 if (kmapped_page) {
1340 flush_kernel_dcache_page(kmapped_page);
1331 kunmap(kmapped_page); 1341 kunmap(kmapped_page);
1332 return ret; 1342 put_page(kmapped_page);
1333}
1334
1335#ifdef CONFIG_MMU
1336
1337#define free_arg_pages(bprm) do { } while (0)
1338
1339#else
1340
1341static inline void free_arg_pages(struct linux_binprm *bprm)
1342{
1343 int i;
1344
1345 for (i = 0; i < MAX_ARG_PAGES; i++) {
1346 if (bprm->page[i])
1347 __free_page(bprm->page[i]);
1348 bprm->page[i] = NULL;
1349 } 1343 }
1344 return ret;
1350} 1345}
1351 1346
1352#endif /* CONFIG_MMU */
1353
1354/* 1347/*
1355 * compat_do_execve() is mostly a copy of do_execve(), with the exception 1348 * compat_do_execve() is mostly a copy of do_execve(), with the exception
1356 * that it processes 32 bit argv and envp pointers. 1349 * that it processes 32 bit argv and envp pointers.
@@ -1363,7 +1356,6 @@ int compat_do_execve(char * filename,
1363 struct linux_binprm *bprm; 1356 struct linux_binprm *bprm;
1364 struct file *file; 1357 struct file *file;
1365 int retval; 1358 int retval;
1366 int i;
1367 1359
1368 retval = -ENOMEM; 1360 retval = -ENOMEM;
1369 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); 1361 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
@@ -1377,24 +1369,19 @@ int compat_do_execve(char * filename,
1377 1369
1378 sched_exec(); 1370 sched_exec();
1379 1371
1380 bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
1381 bprm->file = file; 1372 bprm->file = file;
1382 bprm->filename = filename; 1373 bprm->filename = filename;
1383 bprm->interp = filename; 1374 bprm->interp = filename;
1384 bprm->mm = mm_alloc();
1385 retval = -ENOMEM;
1386 if (!bprm->mm)
1387 goto out_file;
1388 1375
1389 retval = init_new_context(current, bprm->mm); 1376 retval = bprm_mm_init(bprm);
1390 if (retval < 0) 1377 if (retval)
1391 goto out_mm; 1378 goto out_file;
1392 1379
1393 bprm->argc = compat_count(argv, bprm->p / sizeof(compat_uptr_t)); 1380 bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
1394 if ((retval = bprm->argc) < 0) 1381 if ((retval = bprm->argc) < 0)
1395 goto out_mm; 1382 goto out_mm;
1396 1383
1397 bprm->envc = compat_count(envp, bprm->p / sizeof(compat_uptr_t)); 1384 bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
1398 if ((retval = bprm->envc) < 0) 1385 if ((retval = bprm->envc) < 0)
1399 goto out_mm; 1386 goto out_mm;
1400 1387
@@ -1421,8 +1408,6 @@ int compat_do_execve(char * filename,
1421 1408
1422 retval = search_binary_handler(bprm, regs); 1409 retval = search_binary_handler(bprm, regs);
1423 if (retval >= 0) { 1410 if (retval >= 0) {
1424 free_arg_pages(bprm);
1425
1426 /* execve success */ 1411 /* execve success */
1427 security_bprm_free(bprm); 1412 security_bprm_free(bprm);
1428 acct_update_integrals(current); 1413 acct_update_integrals(current);
@@ -1431,19 +1416,12 @@ int compat_do_execve(char * filename,
1431 } 1416 }
1432 1417
1433out: 1418out:
1434 /* Something went wrong, return the inode and free the argument pages*/
1435 for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
1436 struct page * page = bprm->page[i];
1437 if (page)
1438 __free_page(page);
1439 }
1440
1441 if (bprm->security) 1419 if (bprm->security)
1442 security_bprm_free(bprm); 1420 security_bprm_free(bprm);
1443 1421
1444out_mm: 1422out_mm:
1445 if (bprm->mm) 1423 if (bprm->mm)
1446 mmdrop(bprm->mm); 1424 mmput(bprm->mm);
1447 1425
1448out_file: 1426out_file:
1449 if (bprm->file) { 1427 if (bprm->file) {
diff --git a/fs/exec.c b/fs/exec.c
index 2e3f7950c185..498f2b3dca20 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -54,6 +54,7 @@
54 54
55#include <asm/uaccess.h> 55#include <asm/uaccess.h>
56#include <asm/mmu_context.h> 56#include <asm/mmu_context.h>
57#include <asm/tlb.h>
57 58
58#ifdef CONFIG_KMOD 59#ifdef CONFIG_KMOD
59#include <linux/kmod.h> 60#include <linux/kmod.h>
@@ -178,6 +179,207 @@ exit:
178 goto out; 179 goto out;
179} 180}
180 181
182#ifdef CONFIG_MMU
183
184static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
185 int write)
186{
187 struct page *page;
188 int ret;
189
190#ifdef CONFIG_STACK_GROWSUP
191 if (write) {
192 ret = expand_stack_downwards(bprm->vma, pos);
193 if (ret < 0)
194 return NULL;
195 }
196#endif
197 ret = get_user_pages(current, bprm->mm, pos,
198 1, write, 1, &page, NULL);
199 if (ret <= 0)
200 return NULL;
201
202 if (write) {
203 struct rlimit *rlim = current->signal->rlim;
204 unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
205
206 /*
207 * Limit to 1/4-th the stack size for the argv+env strings.
208 * This ensures that:
209 * - the remaining binfmt code will not run out of stack space,
210 * - the program will have a reasonable amount of stack left
211 * to work from.
212 */
213 if (size > rlim[RLIMIT_STACK].rlim_cur / 4) {
214 put_page(page);
215 return NULL;
216 }
217 }
218
219 return page;
220}
221
222static void put_arg_page(struct page *page)
223{
224 put_page(page);
225}
226
227static void free_arg_page(struct linux_binprm *bprm, int i)
228{
229}
230
231static void free_arg_pages(struct linux_binprm *bprm)
232{
233}
234
235static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
236 struct page *page)
237{
238 flush_cache_page(bprm->vma, pos, page_to_pfn(page));
239}
240
241static int __bprm_mm_init(struct linux_binprm *bprm)
242{
243 int err = -ENOMEM;
244 struct vm_area_struct *vma = NULL;
245 struct mm_struct *mm = bprm->mm;
246
247 bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
248 if (!vma)
249 goto err;
250
251 down_write(&mm->mmap_sem);
252 vma->vm_mm = mm;
253
254 /*
255 * Place the stack at the largest stack address the architecture
256 * supports. Later, we'll move this to an appropriate place. We don't
257 * use STACK_TOP because that can depend on attributes which aren't
258 * configured yet.
259 */
260 vma->vm_end = STACK_TOP_MAX;
261 vma->vm_start = vma->vm_end - PAGE_SIZE;
262
263 vma->vm_flags = VM_STACK_FLAGS;
264 vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
265 err = insert_vm_struct(mm, vma);
266 if (err) {
267 up_write(&mm->mmap_sem);
268 goto err;
269 }
270
271 mm->stack_vm = mm->total_vm = 1;
272 up_write(&mm->mmap_sem);
273
274 bprm->p = vma->vm_end - sizeof(void *);
275
276 return 0;
277
278err:
279 if (vma) {
280 bprm->vma = NULL;
281 kmem_cache_free(vm_area_cachep, vma);
282 }
283
284 return err;
285}
286
287static bool valid_arg_len(struct linux_binprm *bprm, long len)
288{
289 return len <= MAX_ARG_STRLEN;
290}
291
292#else
293
294static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
295 int write)
296{
297 struct page *page;
298
299 page = bprm->page[pos / PAGE_SIZE];
300 if (!page && write) {
301 page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
302 if (!page)
303 return NULL;
304 bprm->page[pos / PAGE_SIZE] = page;
305 }
306
307 return page;
308}
309
310static void put_arg_page(struct page *page)
311{
312}
313
314static void free_arg_page(struct linux_binprm *bprm, int i)
315{
316 if (bprm->page[i]) {
317 __free_page(bprm->page[i]);
318 bprm->page[i] = NULL;
319 }
320}
321
322static void free_arg_pages(struct linux_binprm *bprm)
323{
324 int i;
325
326 for (i = 0; i < MAX_ARG_PAGES; i++)
327 free_arg_page(bprm, i);
328}
329
330static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
331 struct page *page)
332{
333}
334
335static int __bprm_mm_init(struct linux_binprm *bprm)
336{
337 bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
338 return 0;
339}
340
341static bool valid_arg_len(struct linux_binprm *bprm, long len)
342{
343 return len <= bprm->p;
344}
345
346#endif /* CONFIG_MMU */
347
348/*
349 * Create a new mm_struct and populate it with a temporary stack
350 * vm_area_struct. We don't have enough context at this point to set the stack
351 * flags, permissions, and offset, so we use temporary values. We'll update
352 * them later in setup_arg_pages().
353 */
354int bprm_mm_init(struct linux_binprm *bprm)
355{
356 int err;
357 struct mm_struct *mm = NULL;
358
359 bprm->mm = mm = mm_alloc();
360 err = -ENOMEM;
361 if (!mm)
362 goto err;
363
364 err = init_new_context(current, mm);
365 if (err)
366 goto err;
367
368 err = __bprm_mm_init(bprm);
369 if (err)
370 goto err;
371
372 return 0;
373
374err:
375 if (mm) {
376 bprm->mm = NULL;
377 mmdrop(mm);
378 }
379
380 return err;
381}
382
181/* 383/*
182 * count() counts the number of strings in array ARGV. 384 * count() counts the number of strings in array ARGV.
183 */ 385 */
@@ -203,15 +405,16 @@ static int count(char __user * __user * argv, int max)
203} 405}
204 406
205/* 407/*
206 * 'copy_strings()' copies argument/environment strings from user 408 * 'copy_strings()' copies argument/environment strings from the old
207 * memory to free pages in kernel mem. These are in a format ready 409 * processes's memory to the new process's stack. The call to get_user_pages()
208 * to be put directly into the top of new user memory. 410 * ensures the destination page is created and not swapped out.
209 */ 411 */
210static int copy_strings(int argc, char __user * __user * argv, 412static int copy_strings(int argc, char __user * __user * argv,
211 struct linux_binprm *bprm) 413 struct linux_binprm *bprm)
212{ 414{
213 struct page *kmapped_page = NULL; 415 struct page *kmapped_page = NULL;
214 char *kaddr = NULL; 416 char *kaddr = NULL;
417 unsigned long kpos = 0;
215 int ret; 418 int ret;
216 419
217 while (argc-- > 0) { 420 while (argc-- > 0) {
@@ -220,69 +423,69 @@ static int copy_strings(int argc, char __user * __user * argv,
220 unsigned long pos; 423 unsigned long pos;
221 424
222 if (get_user(str, argv+argc) || 425 if (get_user(str, argv+argc) ||
223 !(len = strnlen_user(str, bprm->p))) { 426 !(len = strnlen_user(str, MAX_ARG_STRLEN))) {
224 ret = -EFAULT; 427 ret = -EFAULT;
225 goto out; 428 goto out;
226 } 429 }
227 430
228 if (bprm->p < len) { 431 if (!valid_arg_len(bprm, len)) {
229 ret = -E2BIG; 432 ret = -E2BIG;
230 goto out; 433 goto out;
231 } 434 }
232 435
233 bprm->p -= len; 436 /* We're going to work our way backwords. */
234 /* XXX: add architecture specific overflow check here. */
235 pos = bprm->p; 437 pos = bprm->p;
438 str += len;
439 bprm->p -= len;
236 440
237 while (len > 0) { 441 while (len > 0) {
238 int i, new, err;
239 int offset, bytes_to_copy; 442 int offset, bytes_to_copy;
240 struct page *page;
241 443
242 offset = pos % PAGE_SIZE; 444 offset = pos % PAGE_SIZE;
243 i = pos/PAGE_SIZE; 445 if (offset == 0)
244 page = bprm->page[i]; 446 offset = PAGE_SIZE;
245 new = 0; 447
246 if (!page) { 448 bytes_to_copy = offset;
247 page = alloc_page(GFP_HIGHUSER); 449 if (bytes_to_copy > len)
248 bprm->page[i] = page; 450 bytes_to_copy = len;
451
452 offset -= bytes_to_copy;
453 pos -= bytes_to_copy;
454 str -= bytes_to_copy;
455 len -= bytes_to_copy;
456
457 if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
458 struct page *page;
459
460 page = get_arg_page(bprm, pos, 1);
249 if (!page) { 461 if (!page) {
250 ret = -ENOMEM; 462 ret = -E2BIG;
251 goto out; 463 goto out;
252 } 464 }
253 new = 1;
254 }
255 465
256 if (page != kmapped_page) { 466 if (kmapped_page) {
257 if (kmapped_page) 467 flush_kernel_dcache_page(kmapped_page);
258 kunmap(kmapped_page); 468 kunmap(kmapped_page);
469 put_arg_page(kmapped_page);
470 }
259 kmapped_page = page; 471 kmapped_page = page;
260 kaddr = kmap(kmapped_page); 472 kaddr = kmap(kmapped_page);
473 kpos = pos & PAGE_MASK;
474 flush_arg_page(bprm, kpos, kmapped_page);
261 } 475 }
262 if (new && offset) 476 if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
263 memset(kaddr, 0, offset);
264 bytes_to_copy = PAGE_SIZE - offset;
265 if (bytes_to_copy > len) {
266 bytes_to_copy = len;
267 if (new)
268 memset(kaddr+offset+len, 0,
269 PAGE_SIZE-offset-len);
270 }
271 err = copy_from_user(kaddr+offset, str, bytes_to_copy);
272 if (err) {
273 ret = -EFAULT; 477 ret = -EFAULT;
274 goto out; 478 goto out;
275 } 479 }
276
277 pos += bytes_to_copy;
278 str += bytes_to_copy;
279 len -= bytes_to_copy;
280 } 480 }
281 } 481 }
282 ret = 0; 482 ret = 0;
283out: 483out:
284 if (kmapped_page) 484 if (kmapped_page) {
485 flush_kernel_dcache_page(kmapped_page);
285 kunmap(kmapped_page); 486 kunmap(kmapped_page);
487 put_arg_page(kmapped_page);
488 }
286 return ret; 489 return ret;
287} 490}
288 491
@@ -298,181 +501,172 @@ int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
298 set_fs(oldfs); 501 set_fs(oldfs);
299 return r; 502 return r;
300} 503}
301
302EXPORT_SYMBOL(copy_strings_kernel); 504EXPORT_SYMBOL(copy_strings_kernel);
303 505
304#ifdef CONFIG_MMU 506#ifdef CONFIG_MMU
507
305/* 508/*
306 * This routine is used to map in a page into an address space: needed by 509 * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once
307 * execve() for the initial stack and environment pages. 510 * the binfmt code determines where the new stack should reside, we shift it to
511 * its final location. The process proceeds as follows:
308 * 512 *
309 * vma->vm_mm->mmap_sem is held for writing. 513 * 1) Use shift to calculate the new vma endpoints.
514 * 2) Extend vma to cover both the old and new ranges. This ensures the
515 * arguments passed to subsequent functions are consistent.
516 * 3) Move vma's page tables to the new range.
517 * 4) Free up any cleared pgd range.
518 * 5) Shrink the vma to cover only the new range.
310 */ 519 */
311void install_arg_page(struct vm_area_struct *vma, 520static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
312 struct page *page, unsigned long address)
313{ 521{
314 struct mm_struct *mm = vma->vm_mm; 522 struct mm_struct *mm = vma->vm_mm;
315 pte_t * pte; 523 unsigned long old_start = vma->vm_start;
316 spinlock_t *ptl; 524 unsigned long old_end = vma->vm_end;
525 unsigned long length = old_end - old_start;
526 unsigned long new_start = old_start - shift;
527 unsigned long new_end = old_end - shift;
528 struct mmu_gather *tlb;
317 529
318 if (unlikely(anon_vma_prepare(vma))) 530 BUG_ON(new_start > new_end);
319 goto out;
320 531
321 flush_dcache_page(page); 532 /*
322 pte = get_locked_pte(mm, address, &ptl); 533 * ensure there are no vmas between where we want to go
323 if (!pte) 534 * and where we are
324 goto out; 535 */
325 if (!pte_none(*pte)) { 536 if (vma != find_vma(mm, new_start))
326 pte_unmap_unlock(pte, ptl); 537 return -EFAULT;
327 goto out; 538
539 /*
540 * cover the whole range: [new_start, old_end)
541 */
542 vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL);
543
544 /*
545 * move the page tables downwards, on failure we rely on
546 * process cleanup to remove whatever mess we made.
547 */
548 if (length != move_page_tables(vma, old_start,
549 vma, new_start, length))
550 return -ENOMEM;
551
552 lru_add_drain();
553 tlb = tlb_gather_mmu(mm, 0);
554 if (new_end > old_start) {
555 /*
556 * when the old and new regions overlap clear from new_end.
557 */
558 free_pgd_range(&tlb, new_end, old_end, new_end,
559 vma->vm_next ? vma->vm_next->vm_start : 0);
560 } else {
561 /*
562 * otherwise, clean from old_start; this is done to not touch
563 * the address space in [new_end, old_start) some architectures
564 * have constraints on va-space that make this illegal (IA64) -
565 * for the others its just a little faster.
566 */
567 free_pgd_range(&tlb, old_start, old_end, new_end,
568 vma->vm_next ? vma->vm_next->vm_start : 0);
328 } 569 }
329 inc_mm_counter(mm, anon_rss); 570 tlb_finish_mmu(tlb, new_end, old_end);
330 lru_cache_add_active(page); 571
331 set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte( 572 /*
332 page, vma->vm_page_prot)))); 573 * shrink the vma to just the new range.
333 page_add_new_anon_rmap(page, vma, address); 574 */
334 pte_unmap_unlock(pte, ptl); 575 vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
335 576
336 /* no need for flush_tlb */ 577 return 0;
337 return;
338out:
339 __free_page(page);
340 force_sig(SIGKILL, current);
341} 578}
342 579
343#define EXTRA_STACK_VM_PAGES 20 /* random */ 580#define EXTRA_STACK_VM_PAGES 20 /* random */
344 581
582/*
583 * Finalizes the stack vm_area_struct. The flags and permissions are updated,
584 * the stack is optionally relocated, and some extra space is added.
585 */
345int setup_arg_pages(struct linux_binprm *bprm, 586int setup_arg_pages(struct linux_binprm *bprm,
346 unsigned long stack_top, 587 unsigned long stack_top,
347 int executable_stack) 588 int executable_stack)
348{ 589{
349 unsigned long stack_base; 590 unsigned long ret;
350 struct vm_area_struct *mpnt; 591 unsigned long stack_shift;
351 struct mm_struct *mm = current->mm; 592 struct mm_struct *mm = current->mm;
352 int i, ret; 593 struct vm_area_struct *vma = bprm->vma;
353 long arg_size; 594 struct vm_area_struct *prev = NULL;
595 unsigned long vm_flags;
596 unsigned long stack_base;
354 597
355#ifdef CONFIG_STACK_GROWSUP 598#ifdef CONFIG_STACK_GROWSUP
356 /* Move the argument and environment strings to the bottom of the
357 * stack space.
358 */
359 int offset, j;
360 char *to, *from;
361
362 /* Start by shifting all the pages down */
363 i = 0;
364 for (j = 0; j < MAX_ARG_PAGES; j++) {
365 struct page *page = bprm->page[j];
366 if (!page)
367 continue;
368 bprm->page[i++] = page;
369 }
370
371 /* Now move them within their pages */
372 offset = bprm->p % PAGE_SIZE;
373 to = kmap(bprm->page[0]);
374 for (j = 1; j < i; j++) {
375 memmove(to, to + offset, PAGE_SIZE - offset);
376 from = kmap(bprm->page[j]);
377 memcpy(to + PAGE_SIZE - offset, from, offset);
378 kunmap(bprm->page[j - 1]);
379 to = from;
380 }
381 memmove(to, to + offset, PAGE_SIZE - offset);
382 kunmap(bprm->page[j - 1]);
383
384 /* Limit stack size to 1GB */ 599 /* Limit stack size to 1GB */
385 stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max; 600 stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max;
386 if (stack_base > (1 << 30)) 601 if (stack_base > (1 << 30))
387 stack_base = 1 << 30; 602 stack_base = 1 << 30;
388 stack_base = PAGE_ALIGN(stack_top - stack_base);
389 603
390 /* Adjust bprm->p to point to the end of the strings. */ 604 /* Make sure we didn't let the argument array grow too large. */
391 bprm->p = stack_base + PAGE_SIZE * i - offset; 605 if (vma->vm_end - vma->vm_start > stack_base)
606 return -ENOMEM;
392 607
393 mm->arg_start = stack_base; 608 stack_base = PAGE_ALIGN(stack_top - stack_base);
394 arg_size = i << PAGE_SHIFT;
395 609
396 /* zero pages that were copied above */ 610 stack_shift = vma->vm_start - stack_base;
397 while (i < MAX_ARG_PAGES) 611 mm->arg_start = bprm->p - stack_shift;
398 bprm->page[i++] = NULL; 612 bprm->p = vma->vm_end - stack_shift;
399#else 613#else
400 stack_base = arch_align_stack(stack_top - MAX_ARG_PAGES*PAGE_SIZE); 614 stack_top = arch_align_stack(stack_top);
401 stack_base = PAGE_ALIGN(stack_base); 615 stack_top = PAGE_ALIGN(stack_top);
402 bprm->p += stack_base; 616 stack_shift = vma->vm_end - stack_top;
617
618 bprm->p -= stack_shift;
403 mm->arg_start = bprm->p; 619 mm->arg_start = bprm->p;
404 arg_size = stack_top - (PAGE_MASK & (unsigned long) mm->arg_start);
405#endif 620#endif
406 621
407 arg_size += EXTRA_STACK_VM_PAGES * PAGE_SIZE;
408
409 if (bprm->loader) 622 if (bprm->loader)
410 bprm->loader += stack_base; 623 bprm->loader -= stack_shift;
411 bprm->exec += stack_base; 624 bprm->exec -= stack_shift;
412
413 mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
414 if (!mpnt)
415 return -ENOMEM;
416 625
417 down_write(&mm->mmap_sem); 626 down_write(&mm->mmap_sem);
418 { 627 vm_flags = vma->vm_flags;
419 mpnt->vm_mm = mm; 628
420#ifdef CONFIG_STACK_GROWSUP 629 /*
421 mpnt->vm_start = stack_base; 630 * Adjust stack execute permissions; explicitly enable for
422 mpnt->vm_end = stack_base + arg_size; 631 * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
423#else 632 * (arch default) otherwise.
424 mpnt->vm_end = stack_top; 633 */
425 mpnt->vm_start = mpnt->vm_end - arg_size; 634 if (unlikely(executable_stack == EXSTACK_ENABLE_X))
426#endif 635 vm_flags |= VM_EXEC;
427 /* Adjust stack execute permissions; explicitly enable 636 else if (executable_stack == EXSTACK_DISABLE_X)
428 * for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X 637 vm_flags &= ~VM_EXEC;
429 * and leave alone (arch default) otherwise. */ 638 vm_flags |= mm->def_flags;
430 if (unlikely(executable_stack == EXSTACK_ENABLE_X)) 639
431 mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC; 640 ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
432 else if (executable_stack == EXSTACK_DISABLE_X) 641 vm_flags);
433 mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC; 642 if (ret)
434 else 643 goto out_unlock;
435 mpnt->vm_flags = VM_STACK_FLAGS; 644 BUG_ON(prev != vma);
436 mpnt->vm_flags |= mm->def_flags; 645
437 mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7]; 646 /* Move stack pages down in memory. */
438 if ((ret = insert_vm_struct(mm, mpnt))) { 647 if (stack_shift) {
648 ret = shift_arg_pages(vma, stack_shift);
649 if (ret) {
439 up_write(&mm->mmap_sem); 650 up_write(&mm->mmap_sem);
440 kmem_cache_free(vm_area_cachep, mpnt);
441 return ret; 651 return ret;
442 } 652 }
443 mm->stack_vm = mm->total_vm = vma_pages(mpnt);
444 } 653 }
445 654
446 for (i = 0 ; i < MAX_ARG_PAGES ; i++) { 655#ifdef CONFIG_STACK_GROWSUP
447 struct page *page = bprm->page[i]; 656 stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE;
448 if (page) { 657#else
449 bprm->page[i] = NULL; 658 stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE;
450 install_arg_page(mpnt, page, stack_base); 659#endif
451 } 660 ret = expand_stack(vma, stack_base);
452 stack_base += PAGE_SIZE; 661 if (ret)
453 } 662 ret = -EFAULT;
663
664out_unlock:
454 up_write(&mm->mmap_sem); 665 up_write(&mm->mmap_sem);
455
456 return 0; 666 return 0;
457} 667}
458
459EXPORT_SYMBOL(setup_arg_pages); 668EXPORT_SYMBOL(setup_arg_pages);
460 669
461#define free_arg_pages(bprm) do { } while (0)
462
463#else
464
465static inline void free_arg_pages(struct linux_binprm *bprm)
466{
467 int i;
468
469 for (i = 0; i < MAX_ARG_PAGES; i++) {
470 if (bprm->page[i])
471 __free_page(bprm->page[i]);
472 bprm->page[i] = NULL;
473 }
474}
475
476#endif /* CONFIG_MMU */ 670#endif /* CONFIG_MMU */
477 671
478struct file *open_exec(const char *name) 672struct file *open_exec(const char *name)
@@ -1000,43 +1194,42 @@ EXPORT_SYMBOL(compute_creds);
1000 * points to; chop off the first by relocating brpm->p to right after 1194 * points to; chop off the first by relocating brpm->p to right after
1001 * the first '\0' encountered. 1195 * the first '\0' encountered.
1002 */ 1196 */
1003void remove_arg_zero(struct linux_binprm *bprm) 1197int remove_arg_zero(struct linux_binprm *bprm)
1004{ 1198{
1005 if (bprm->argc) { 1199 int ret = 0;
1006 char ch; 1200 unsigned long offset;
1201 char *kaddr;
1202 struct page *page;
1007 1203
1008 do { 1204 if (!bprm->argc)
1009 unsigned long offset; 1205 return 0;
1010 unsigned long index;
1011 char *kaddr;
1012 struct page *page;
1013
1014 offset = bprm->p & ~PAGE_MASK;
1015 index = bprm->p >> PAGE_SHIFT;
1016 1206
1017 page = bprm->page[index]; 1207 do {
1018 kaddr = kmap_atomic(page, KM_USER0); 1208 offset = bprm->p & ~PAGE_MASK;
1209 page = get_arg_page(bprm, bprm->p, 0);
1210 if (!page) {
1211 ret = -EFAULT;
1212 goto out;
1213 }
1214 kaddr = kmap_atomic(page, KM_USER0);
1019 1215
1020 /* run through page until we reach end or find NUL */ 1216 for (; offset < PAGE_SIZE && kaddr[offset];
1021 do { 1217 offset++, bprm->p++)
1022 ch = *(kaddr + offset); 1218 ;
1023 1219
1024 /* discard that character... */ 1220 kunmap_atomic(kaddr, KM_USER0);
1025 bprm->p++; 1221 put_arg_page(page);
1026 offset++;
1027 } while (offset < PAGE_SIZE && ch != '\0');
1028 1222
1029 kunmap_atomic(kaddr, KM_USER0); 1223 if (offset == PAGE_SIZE)
1224 free_arg_page(bprm, (bprm->p >> PAGE_SHIFT) - 1);
1225 } while (offset == PAGE_SIZE);
1030 1226
1031 /* free the old page */ 1227 bprm->p++;
1032 if (offset == PAGE_SIZE) { 1228 bprm->argc--;
1033 __free_page(page); 1229 ret = 0;
1034 bprm->page[index] = NULL;
1035 }
1036 } while (ch != '\0');
1037 1230
1038 bprm->argc--; 1231out:
1039 } 1232 return ret;
1040} 1233}
1041EXPORT_SYMBOL(remove_arg_zero); 1234EXPORT_SYMBOL(remove_arg_zero);
1042 1235
@@ -1062,7 +1255,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1062 fput(bprm->file); 1255 fput(bprm->file);
1063 bprm->file = NULL; 1256 bprm->file = NULL;
1064 1257
1065 loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); 1258 loader = bprm->vma->vm_end - sizeof(void *);
1066 1259
1067 file = open_exec("/sbin/loader"); 1260 file = open_exec("/sbin/loader");
1068 retval = PTR_ERR(file); 1261 retval = PTR_ERR(file);
@@ -1156,7 +1349,6 @@ int do_execve(char * filename,
1156 struct file *file; 1349 struct file *file;
1157 unsigned long env_p; 1350 unsigned long env_p;
1158 int retval; 1351 int retval;
1159 int i;
1160 1352
1161 retval = -ENOMEM; 1353 retval = -ENOMEM;
1162 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); 1354 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
@@ -1170,25 +1362,19 @@ int do_execve(char * filename,
1170 1362
1171 sched_exec(); 1363 sched_exec();
1172 1364
1173 bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
1174
1175 bprm->file = file; 1365 bprm->file = file;
1176 bprm->filename = filename; 1366 bprm->filename = filename;
1177 bprm->interp = filename; 1367 bprm->interp = filename;
1178 bprm->mm = mm_alloc();
1179 retval = -ENOMEM;
1180 if (!bprm->mm)
1181 goto out_file;
1182 1368
1183 retval = init_new_context(current, bprm->mm); 1369 retval = bprm_mm_init(bprm);
1184 if (retval < 0) 1370 if (retval)
1185 goto out_mm; 1371 goto out_file;
1186 1372
1187 bprm->argc = count(argv, bprm->p / sizeof(void *)); 1373 bprm->argc = count(argv, MAX_ARG_STRINGS);
1188 if ((retval = bprm->argc) < 0) 1374 if ((retval = bprm->argc) < 0)
1189 goto out_mm; 1375 goto out_mm;
1190 1376
1191 bprm->envc = count(envp, bprm->p / sizeof(void *)); 1377 bprm->envc = count(envp, MAX_ARG_STRINGS);
1192 if ((retval = bprm->envc) < 0) 1378 if ((retval = bprm->envc) < 0)
1193 goto out_mm; 1379 goto out_mm;
1194 1380
@@ -1217,9 +1403,8 @@ int do_execve(char * filename,
1217 1403
1218 retval = search_binary_handler(bprm,regs); 1404 retval = search_binary_handler(bprm,regs);
1219 if (retval >= 0) { 1405 if (retval >= 0) {
1220 free_arg_pages(bprm);
1221
1222 /* execve success */ 1406 /* execve success */
1407 free_arg_pages(bprm);
1223 security_bprm_free(bprm); 1408 security_bprm_free(bprm);
1224 acct_update_integrals(current); 1409 acct_update_integrals(current);
1225 kfree(bprm); 1410 kfree(bprm);
@@ -1227,26 +1412,19 @@ int do_execve(char * filename,
1227 } 1412 }
1228 1413
1229out: 1414out:
1230 /* Something went wrong, return the inode and free the argument pages*/ 1415 free_arg_pages(bprm);
1231 for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
1232 struct page * page = bprm->page[i];
1233 if (page)
1234 __free_page(page);
1235 }
1236
1237 if (bprm->security) 1416 if (bprm->security)
1238 security_bprm_free(bprm); 1417 security_bprm_free(bprm);
1239 1418
1240out_mm: 1419out_mm:
1241 if (bprm->mm) 1420 if (bprm->mm)
1242 mmdrop(bprm->mm); 1421 mmput (bprm->mm);
1243 1422
1244out_file: 1423out_file:
1245 if (bprm->file) { 1424 if (bprm->file) {
1246 allow_write_access(bprm->file); 1425 allow_write_access(bprm->file);
1247 fput(bprm->file); 1426 fput(bprm->file);
1248 } 1427 }
1249
1250out_kfree: 1428out_kfree:
1251 kfree(bprm); 1429 kfree(bprm);
1252 1430
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index a0b209cd5761..91c8c07fe8b7 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -6,11 +6,13 @@
6struct pt_regs; 6struct pt_regs;
7 7
8/* 8/*
9 * MAX_ARG_PAGES defines the number of pages allocated for arguments 9 * These are the maximum length and maximum number of strings passed to the
10 * and envelope for the new program. 32 should suffice, this gives 10 * execve() system call. MAX_ARG_STRLEN is essentially random but serves to
11 * a maximum env+arg of 128kB w/4KB pages! 11 * prevent the kernel from being unduly impacted by misaddressed pointers.
12 * MAX_ARG_STRINGS is chosen to fit in a signed 32-bit integer.
12 */ 13 */
13#define MAX_ARG_PAGES 32 14#define MAX_ARG_STRLEN (PAGE_SIZE * 32)
15#define MAX_ARG_STRINGS 0x7FFFFFFF
14 16
15/* sizeof(linux_binprm->buf) */ 17/* sizeof(linux_binprm->buf) */
16#define BINPRM_BUF_SIZE 128 18#define BINPRM_BUF_SIZE 128
@@ -24,7 +26,12 @@ struct pt_regs;
24 */ 26 */
25struct linux_binprm{ 27struct linux_binprm{
26 char buf[BINPRM_BUF_SIZE]; 28 char buf[BINPRM_BUF_SIZE];
29#ifdef CONFIG_MMU
30 struct vm_area_struct *vma;
31#else
32# define MAX_ARG_PAGES 32
27 struct page *page[MAX_ARG_PAGES]; 33 struct page *page[MAX_ARG_PAGES];
34#endif
28 struct mm_struct *mm; 35 struct mm_struct *mm;
29 unsigned long p; /* current top of mem */ 36 unsigned long p; /* current top of mem */
30 int sh_bang; 37 int sh_bang;
@@ -69,7 +76,7 @@ extern int register_binfmt(struct linux_binfmt *);
69extern int unregister_binfmt(struct linux_binfmt *); 76extern int unregister_binfmt(struct linux_binfmt *);
70 77
71extern int prepare_binprm(struct linux_binprm *); 78extern int prepare_binprm(struct linux_binprm *);
72extern void remove_arg_zero(struct linux_binprm *); 79extern int __must_check remove_arg_zero(struct linux_binprm *);
73extern int search_binary_handler(struct linux_binprm *,struct pt_regs *); 80extern int search_binary_handler(struct linux_binprm *,struct pt_regs *);
74extern int flush_old_exec(struct linux_binprm * bprm); 81extern int flush_old_exec(struct linux_binprm * bprm);
75 82
@@ -86,6 +93,7 @@ extern int suid_dumpable;
86extern int setup_arg_pages(struct linux_binprm * bprm, 93extern int setup_arg_pages(struct linux_binprm * bprm,
87 unsigned long stack_top, 94 unsigned long stack_top,
88 int executable_stack); 95 int executable_stack);
96extern int bprm_mm_init(struct linux_binprm *bprm);
89extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); 97extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
90extern void compute_creds(struct linux_binprm *binprm); 98extern void compute_creds(struct linux_binprm *binprm);
91extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); 99extern int do_coredump(long signr, int exit_code, struct pt_regs * regs);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 50a0ed1d1806..c456c3a1c28e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -808,7 +808,6 @@ static inline int handle_mm_fault(struct mm_struct *mm,
808 808
809extern int make_pages_present(unsigned long addr, unsigned long end); 809extern int make_pages_present(unsigned long addr, unsigned long end);
810extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); 810extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
811void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
812 811
813int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, 812int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
814 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); 813 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
@@ -825,9 +824,15 @@ int FASTCALL(set_page_dirty(struct page *page));
825int set_page_dirty_lock(struct page *page); 824int set_page_dirty_lock(struct page *page);
826int clear_page_dirty_for_io(struct page *page); 825int clear_page_dirty_for_io(struct page *page);
827 826
827extern unsigned long move_page_tables(struct vm_area_struct *vma,
828 unsigned long old_addr, struct vm_area_struct *new_vma,
829 unsigned long new_addr, unsigned long len);
828extern unsigned long do_mremap(unsigned long addr, 830extern unsigned long do_mremap(unsigned long addr,
829 unsigned long old_len, unsigned long new_len, 831 unsigned long old_len, unsigned long new_len,
830 unsigned long flags, unsigned long new_addr); 832 unsigned long flags, unsigned long new_addr);
833extern int mprotect_fixup(struct vm_area_struct *vma,
834 struct vm_area_struct **pprev, unsigned long start,
835 unsigned long end, unsigned long newflags);
831 836
832/* 837/*
833 * A callback you can register to apply pressure to ageable caches. 838 * A callback you can register to apply pressure to ageable caches.
@@ -1159,6 +1164,8 @@ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
1159#ifdef CONFIG_IA64 1164#ifdef CONFIG_IA64
1160extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); 1165extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
1161#endif 1166#endif
1167extern int expand_stack_downwards(struct vm_area_struct *vma,
1168 unsigned long address);
1162 1169
1163/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 1170/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
1164extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); 1171extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 535586fc498b..145cbb79c4b9 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -843,7 +843,7 @@ static void audit_log_execve_info(struct audit_buffer *ab,
843 return; /* execve failed, no additional info */ 843 return; /* execve failed, no additional info */
844 844
845 for (i = 0; i < axi->argc; i++, p += len) { 845 for (i = 0; i < axi->argc; i++, p += len) {
846 len = strnlen_user(p, MAX_ARG_PAGES*PAGE_SIZE); 846 len = strnlen_user(p, MAX_ARG_STRLEN);
847 /* 847 /*
848 * We just created this mm, if we can't find the strings 848 * We just created this mm, if we can't find the strings
849 * we just copied into it something is _very_ wrong. Similar 849 * we just copied into it something is _very_ wrong. Similar
diff --git a/mm/mmap.c b/mm/mmap.c
index 724f342bcf89..7afc7a7cec6f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1571,33 +1571,11 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1571} 1571}
1572#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ 1572#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
1573 1573
1574#ifdef CONFIG_STACK_GROWSUP
1575int expand_stack(struct vm_area_struct *vma, unsigned long address)
1576{
1577 return expand_upwards(vma, address);
1578}
1579
1580struct vm_area_struct *
1581find_extend_vma(struct mm_struct *mm, unsigned long addr)
1582{
1583 struct vm_area_struct *vma, *prev;
1584
1585 addr &= PAGE_MASK;
1586 vma = find_vma_prev(mm, addr, &prev);
1587 if (vma && (vma->vm_start <= addr))
1588 return vma;
1589 if (!prev || expand_stack(prev, addr))
1590 return NULL;
1591 if (prev->vm_flags & VM_LOCKED) {
1592 make_pages_present(addr, prev->vm_end);
1593 }
1594 return prev;
1595}
1596#else
1597/* 1574/*
1598 * vma is the first one with address < vma->vm_start. Have to extend vma. 1575 * vma is the first one with address < vma->vm_start. Have to extend vma.
1599 */ 1576 */
1600int expand_stack(struct vm_area_struct *vma, unsigned long address) 1577static inline int expand_downwards(struct vm_area_struct *vma,
1578 unsigned long address)
1601{ 1579{
1602 int error; 1580 int error;
1603 1581
@@ -1634,6 +1612,38 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address)
1634 return error; 1612 return error;
1635} 1613}
1636 1614
1615int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
1616{
1617 return expand_downwards(vma, address);
1618}
1619
1620#ifdef CONFIG_STACK_GROWSUP
1621int expand_stack(struct vm_area_struct *vma, unsigned long address)
1622{
1623 return expand_upwards(vma, address);
1624}
1625
1626struct vm_area_struct *
1627find_extend_vma(struct mm_struct *mm, unsigned long addr)
1628{
1629 struct vm_area_struct *vma, *prev;
1630
1631 addr &= PAGE_MASK;
1632 vma = find_vma_prev(mm, addr, &prev);
1633 if (vma && (vma->vm_start <= addr))
1634 return vma;
1635 if (!prev || expand_stack(prev, addr))
1636 return NULL;
1637 if (prev->vm_flags & VM_LOCKED)
1638 make_pages_present(addr, prev->vm_end);
1639 return prev;
1640}
1641#else
1642int expand_stack(struct vm_area_struct *vma, unsigned long address)
1643{
1644 return expand_downwards(vma, address);
1645}
1646
1637struct vm_area_struct * 1647struct vm_area_struct *
1638find_extend_vma(struct mm_struct * mm, unsigned long addr) 1648find_extend_vma(struct mm_struct * mm, unsigned long addr)
1639{ 1649{
@@ -1651,9 +1661,8 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
1651 start = vma->vm_start; 1661 start = vma->vm_start;
1652 if (expand_stack(vma, addr)) 1662 if (expand_stack(vma, addr))
1653 return NULL; 1663 return NULL;
1654 if (vma->vm_flags & VM_LOCKED) { 1664 if (vma->vm_flags & VM_LOCKED)
1655 make_pages_present(addr, start); 1665 make_pages_present(addr, start);
1656 }
1657 return vma; 1666 return vma;
1658} 1667}
1659#endif 1668#endif
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 3b8f3c0c63f3..e8346c30abec 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -128,7 +128,7 @@ static void change_protection(struct vm_area_struct *vma,
128 flush_tlb_range(vma, start, end); 128 flush_tlb_range(vma, start, end);
129} 129}
130 130
131static int 131int
132mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, 132mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
133 unsigned long start, unsigned long end, unsigned long newflags) 133 unsigned long start, unsigned long end, unsigned long newflags)
134{ 134{
diff --git a/mm/mremap.c b/mm/mremap.c
index bc7c52efc71b..8ea5c2412c6e 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -120,7 +120,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
120 120
121#define LATENCY_LIMIT (64 * PAGE_SIZE) 121#define LATENCY_LIMIT (64 * PAGE_SIZE)
122 122
123static unsigned long move_page_tables(struct vm_area_struct *vma, 123unsigned long move_page_tables(struct vm_area_struct *vma,
124 unsigned long old_addr, struct vm_area_struct *new_vma, 124 unsigned long old_addr, struct vm_area_struct *new_vma,
125 unsigned long new_addr, unsigned long len) 125 unsigned long new_addr, unsigned long len)
126{ 126{