aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2010-11-30 14:55:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-11-30 20:56:37 -0500
commit3c77f845722158206a7209c45ccddc264d19319c (patch)
tree9eace97a8b88eb68b7d5d3127041b14c202421ae /fs
parent37a09f07459753e7c98d4e21f1c61e8756923f81 (diff)
exec: make argv/envp memory visible to oom-killer
Brad Spengler published a local memory-allocation DoS that evades the OOM-killer (though not the virtual memory RLIMIT): http://www.grsecurity.net/~spender/64bit_dos.c execve()->copy_strings() can allocate a lot of memory, but this is not visible to oom-killer, nobody can see the nascent bprm->mm and take it into account. With this patch get_arg_page() increments current's MM_ANONPAGES counter every time we allocate the new page for argv/envp. When do_execve() succeds or fails, we change this counter back. Technically this is not 100% correct, we can't know if the new page is swapped out and turn MM_ANONPAGES into MM_SWAPENTS, but I don't think this really matters and everything becomes correct once exec changes ->mm or fails. Reported-by: Brad Spengler <spender@grsecurity.net> Reviewed-and-discussed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Oleg Nesterov <oleg@redhat.com> Cc: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/exec.c32
1 files changed, 30 insertions, 2 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 99d33a1371e9..4303b9035fe7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -164,6 +164,25 @@ out:
164 164
165#ifdef CONFIG_MMU 165#ifdef CONFIG_MMU
166 166
167static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
168{
169 struct mm_struct *mm = current->mm;
170 long diff = (long)(pages - bprm->vma_pages);
171
172 if (!mm || !diff)
173 return;
174
175 bprm->vma_pages = pages;
176
177#ifdef SPLIT_RSS_COUNTING
178 add_mm_counter(mm, MM_ANONPAGES, diff);
179#else
180 spin_lock(&mm->page_table_lock);
181 add_mm_counter(mm, MM_ANONPAGES, diff);
182 spin_unlock(&mm->page_table_lock);
183#endif
184}
185
167static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 186static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
168 int write) 187 int write)
169{ 188{
@@ -186,6 +205,8 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
186 unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; 205 unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
187 struct rlimit *rlim; 206 struct rlimit *rlim;
188 207
208 acct_arg_size(bprm, size / PAGE_SIZE);
209
189 /* 210 /*
190 * We've historically supported up to 32 pages (ARG_MAX) 211 * We've historically supported up to 32 pages (ARG_MAX)
191 * of argument strings even with small stacks 212 * of argument strings even with small stacks
@@ -276,6 +297,10 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
276 297
277#else 298#else
278 299
300static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
301{
302}
303
279static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 304static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
280 int write) 305 int write)
281{ 306{
@@ -1003,6 +1028,7 @@ int flush_old_exec(struct linux_binprm * bprm)
1003 /* 1028 /*
1004 * Release all of the old mmap stuff 1029 * Release all of the old mmap stuff
1005 */ 1030 */
1031 acct_arg_size(bprm, 0);
1006 retval = exec_mmap(bprm->mm); 1032 retval = exec_mmap(bprm->mm);
1007 if (retval) 1033 if (retval)
1008 goto out; 1034 goto out;
@@ -1426,8 +1452,10 @@ int do_execve(const char * filename,
1426 return retval; 1452 return retval;
1427 1453
1428out: 1454out:
1429 if (bprm->mm) 1455 if (bprm->mm) {
1430 mmput (bprm->mm); 1456 acct_arg_size(bprm, 0);
1457 mmput(bprm->mm);
1458 }
1431 1459
1432out_file: 1460out_file:
1433 if (bprm->file) { 1461 if (bprm->file) {