Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp

Conflicts: litmus/sched_cedf.c
author: Glenn Elliott <gelliott@cs.unc.edu> 2012-03-04 19:47:13 -0500
committer: Glenn Elliott <gelliott@cs.unc.edu> 2012-03-04 19:47:13 -0500
commit: c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
tree: ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /mm/oom_kill.c
parent: ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent: 6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
1 files changed, 104 insertions, 63 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 4029583a1024..e4b0991ca351 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -31,12 +31,40 @@
 #include <linux/memcontrol.h>
 #include <linux/mempolicy.h>
 #include <linux/security.h>
+#include <linux/ptrace.h>
 int sysctl_panic_on_oom;
 int sysctl_oom_kill_allocating_task;
 int sysctl_oom_dump_tasks = 1;
 static DEFINE_SPINLOCK(zone_scan_lock);
+/**
+ * test_set_oom_score_adj() - set current's oom_score_adj and return old value
+ * @new_val: new oom_score_adj value
+ *
+ * Sets the oom_score_adj value for current to @new_val with proper
+ * synchronization and returns the old value.  Usually used to temporarily
+ * set a value, save the old value in the caller, and then reinstate it later.
+ */
+int test_set_oom_score_adj(int new_val)
+{
+        struct sighand_struct *sighand = current->sighand;
+        int old_val;
+        spin_lock_irq(&sighand->siglock);
+        old_val = current->signal->oom_score_adj;
+        if (new_val != old_val) {
+                if (new_val == OOM_SCORE_ADJ_MIN)
+                        atomic_inc(&current->mm->oom_disable_count);
+                else if (old_val == OOM_SCORE_ADJ_MIN)
+                        atomic_dec(&current->mm->oom_disable_count);
+                current->signal->oom_score_adj = new_val;
+        }
+        spin_unlock_irq(&sighand->siglock);
+        return old_val;
+}
 #ifdef CONFIG_NUMA
 /**
 * has_intersects_mems_allowed() - check task eligiblity for kill
@@ -83,24 +111,6 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk,
 #endif /* CONFIG_NUMA */
 /*
- * If this is a system OOM (not a memcg OOM) and the task selected to be
- * killed is not already running at high (RT) priorities, speed up the
- * recovery by boosting the dying task to the lowest FIFO priority.
- * That helps with the recovery and avoids interfering with RT tasks.
- */
-static void boost_dying_task_prio(struct task_struct *p,
-                                  struct mem_cgroup *mem)
-{
-        struct sched_param param = { .sched_priority = 1 };
-        if (mem)
-                return;
-        if (!rt_task(p))
-                sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
-}
-/*
 * The process p may have detached its own ->mm while exiting or through
 * use_mm(), but one or more of its subthreads may still have a valid
 * pointer.  Return p, or any of its subthreads with a valid ->mm, with
@@ -162,24 +172,16 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
                return 0;
        /*
-         * Shortcut check for OOM_SCORE_ADJ_MIN so the entire heuristic doesn't
+         * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN
-         * need to be executed for something that cannot be killed.
+         * so the entire heuristic doesn't need to be executed for something
+         * that cannot be killed.
         */
-        if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
+        if (atomic_read(&p->mm->oom_disable_count)) {
                task_unlock(p);
                return 0;
        }
        /*
-         * When the PF_OOM_ORIGIN bit is set, it indicates the task should have
-         * priority for oom killing.
-         */
-        if (p->flags & PF_OOM_ORIGIN) {
-                task_unlock(p);
-                return 1000;
-        }
-        /*
         * The memory controller may have a limit of 0 bytes, so avoid a divide
         * by zero, if necessary.
         */
@@ -188,10 +190,13 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
        /*
         * The baseline for the badness score is the proportion of RAM that each
-         * task's rss and swap space use.
+         * task's rss, pagetable and swap space use.
         */
-        points = (get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS)) * 1000 /
+        points = get_mm_rss(p->mm) + p->mm->nr_ptes;
-                        totalpages;
+        points += get_mm_counter(p->mm, MM_SWAPENTS);
+        points *= 1000;
+        points /= totalpages;
        task_unlock(p);
        /*
@@ -291,13 +296,15 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
                unsigned long totalpages, struct mem_cgroup *mem,
                const nodemask_t *nodemask)
 {
-        struct task_struct *p;
+        struct task_struct *g, *p;
        struct task_struct *chosen = NULL;
        *ppoints = 0;
-        for_each_process(p) {
+        do_each_thread(g, p) {
                unsigned int points;
+                if (!p->mm)
+                        continue;
                if (oom_unkillable_task(p, mem, nodemask))
                        continue;
@@ -313,22 +320,29 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
                if (test_tsk_thread_flag(p, TIF_MEMDIE))
                        return ERR_PTR(-1UL);
-                /*
+                if (p->flags & PF_EXITING) {
-                 * This is in the process of releasing memory so wait for it
+                        /*
-                 * to finish before killing some other task by mistake.
+                         * If p is the current task and is in the process of
-                 *
+                         * releasing memory, we allow the "kill" to set
-                 * However, if p is the current task, we allow the 'kill' to
+                         * TIF_MEMDIE, which will allow it to gain access to
-                 * go ahead if it is exiting: this will simply set TIF_MEMDIE,
+                         * memory reserves.  Otherwise, it may stall forever.
-                 * which will allow it to gain access to memory reserves in
+                         *
-                 * the process of exiting and releasing its resources.
+                         * The loop isn't broken here, however, in case other
-                 * Otherwise we could get an easy OOM deadlock.
+                         * threads are found to have already been oom killed.
-                 */
+                         */
-                if (thread_group_empty(p) && (p->flags & PF_EXITING) && p->mm) {
+                        if (p == current) {
-                        if (p != current)
+                                chosen = p;
-                                return ERR_PTR(-1UL);
+                                *ppoints = 1000;
+                        } else {
-                        chosen = p;
+                                /*
-                        *ppoints = 1000;
+                                 * If this task is not being ptraced on exit,
+                                 * then wait for it to finish before killing
+                                 * some other task unnecessarily.
+                                 */
+                                if (!(task_ptrace(p->group_leader) &
+                                                        PT_TRACE_EXIT))
+                                        return ERR_PTR(-1UL);
+                        }
                }
                points = oom_badness(p, mem, nodemask, totalpages);
@@ -336,7 +350,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
                        chosen = p;
                        *ppoints = points;
                }
-        }
+        } while_each_thread(g, p);
        return chosen;
 }
@@ -395,7 +409,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
        task_unlock(current);
        dump_stack();
        mem_cgroup_print_oom_info(mem, p);
-        show_mem();
+        show_mem(SHOW_MEM_FILTER_NODES);
        if (sysctl_oom_dump_tasks)
                dump_tasks(mem, nodemask);
 }
@@ -403,27 +417,44 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 #define K(x) ((x) << (PAGE_SHIFT-10))
 static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem)
 {
+        struct task_struct *q;
+        struct mm_struct *mm;
        p = find_lock_task_mm(p);
        if (!p)
                return 1;
+        /* mm cannot be safely dereferenced after task_unlock(p) */
+        mm = p->mm;
        pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
                task_pid_nr(p), p->comm, K(p->mm->total_vm),
                K(get_mm_counter(p->mm, MM_ANONPAGES)),
                K(get_mm_counter(p->mm, MM_FILEPAGES)));
        task_unlock(p);
+        /*
+         * Kill all processes sharing p->mm in other thread groups, if any.
+         * They don't get access to memory reserves or a higher scheduler
+         * priority, though, to avoid depletion of all memory or task
+         * starvation.  This prevents mm->mmap_sem livelock when an oom killed
+         * task cannot exit because it requires the semaphore and its contended
+         * by another thread trying to allocate memory itself.  That thread will
+         * now get access to memory reserves since it has a pending fatal
+         * signal.
+         */
+        for_each_process(q)
+                if (q->mm == mm && !same_thread_group(q, p)) {
+                        task_lock(q);   /* Protect ->comm from prctl() */
+                        pr_err("Kill process %d (%s) sharing same memory\n",
+                                task_pid_nr(q), q->comm);
+                        task_unlock(q);
+                        force_sig(SIGKILL, q);
+                }
        set_tsk_thread_flag(p, TIF_MEMDIE);
        force_sig(SIGKILL, p);
-        /*
-         * We give our sacrificial lamb high priority and access to
-         * all the memory it needs. That way it should be able to
-         * exit() and clear out its resources quickly...
-         */
-        boost_dying_task_prio(p, mem);
        return 0;
 }
 #undef K
@@ -447,7 +478,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
         */
        if (p->flags & PF_EXITING) {
                set_tsk_thread_flag(p, TIF_MEMDIE);
-                boost_dying_task_prio(p, mem);
                return 0;
        }
@@ -466,6 +496,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
                list_for_each_entry(child, &t->children, sibling) {
                        unsigned int child_points;
+                        if (child->mm == p->mm)
+                                continue;
                        /*
                         * oom_badness() returns 0 if the thread is unkillable
                         */
@@ -512,6 +544,16 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
        unsigned int points = 0;
        struct task_struct *p;
+        /*
+         * If current has a pending SIGKILL, then automatically select it.  The
+         * goal is to allow it to allocate so that it may quickly exit and free
+         * its memory.
+         */
+        if (fatal_signal_pending(current)) {
+                set_thread_flag(TIF_MEMDIE);
+                return;
+        }
        check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL);
        limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT;
        read_lock(&tasklist_lock);
@@ -664,7 +706,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
         */
        if (fatal_signal_pending(current)) {
                set_thread_flag(TIF_MEMDIE);
-                boost_dying_task_prio(current, NULL);
                return;
        }
@@ -680,7 +721,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
        read_lock(&tasklist_lock);
        if (sysctl_oom_kill_allocating_task &&
            !oom_unkillable_task(current, NULL, nodemask) &&
-            (current->signal->oom_adj != OOM_DISABLE)) {
+            current->mm && !atomic_read(&current->mm->oom_disable_count)) {
                /*
                 * oom_kill_process() needs tasklist_lock held.  If it returns
                 * non-zero, current could not be killed so we must fallback to
author	Glenn Elliott <gelliott@cs.unc.edu>	2012-03-04 19:47:13 -0500
committer	Glenn Elliott <gelliott@cs.unc.edu>	2012-03-04 19:47:13 -0500
commit	c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
tree	ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /mm/oom_kill.c
parent	ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent	6a00f206debf8a5c8899055726ad127dbeeed098 (diff)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 4029583a1024..e4b0991ca351 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c
@@ -31,12 +31,40 @@
31	#include <linux/memcontrol.h>	31	#include <linux/memcontrol.h>
32	#include <linux/mempolicy.h>	32	#include <linux/mempolicy.h>
33	#include <linux/security.h>	33	#include <linux/security.h>
		34	#include <linux/ptrace.h>
34		35
35	int sysctl_panic_on_oom;	36	int sysctl_panic_on_oom;
36	int sysctl_oom_kill_allocating_task;	37	int sysctl_oom_kill_allocating_task;
37	int sysctl_oom_dump_tasks = 1;	38	int sysctl_oom_dump_tasks = 1;
38	static DEFINE_SPINLOCK(zone_scan_lock);	39	static DEFINE_SPINLOCK(zone_scan_lock);
39		40
		41	/**
		42	* test_set_oom_score_adj() - set current's oom_score_adj and return old value
		43	* @new_val: new oom_score_adj value
		44	*
		45	* Sets the oom_score_adj value for current to @new_val with proper
		46	* synchronization and returns the old value. Usually used to temporarily
		47	* set a value, save the old value in the caller, and then reinstate it later.
		48	*/
		49	int test_set_oom_score_adj(int new_val)
		50	{
		51	struct sighand_struct *sighand = current->sighand;
		52	int old_val;
		53
		54	spin_lock_irq(&sighand->siglock);
		55	old_val = current->signal->oom_score_adj;
		56	if (new_val != old_val) {
		57	if (new_val == OOM_SCORE_ADJ_MIN)
		58	atomic_inc(&current->mm->oom_disable_count);
		59	else if (old_val == OOM_SCORE_ADJ_MIN)
		60	atomic_dec(&current->mm->oom_disable_count);
		61	current->signal->oom_score_adj = new_val;
		62	}
		63	spin_unlock_irq(&sighand->siglock);
		64
		65	return old_val;
		66	}
		67
40	#ifdef CONFIG_NUMA	68	#ifdef CONFIG_NUMA
41	/**	69	/**
42	* has_intersects_mems_allowed() - check task eligiblity for kill	70	* has_intersects_mems_allowed() - check task eligiblity for kill
@@ -83,24 +111,6 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk,
83	#endif /* CONFIG_NUMA */	111	#endif /* CONFIG_NUMA */
84		112
85	/*	113	/*
86	* If this is a system OOM (not a memcg OOM) and the task selected to be
87	* killed is not already running at high (RT) priorities, speed up the
88	* recovery by boosting the dying task to the lowest FIFO priority.
89	* That helps with the recovery and avoids interfering with RT tasks.
90	*/
91	static void boost_dying_task_prio(struct task_struct *p,
92	struct mem_cgroup *mem)
93	{
94	struct sched_param param = { .sched_priority = 1 };
95
96	if (mem)
97	return;
98
99	if (!rt_task(p))
100	sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
101	}
102
103	/*
104	* The process p may have detached its own ->mm while exiting or through	114	* The process p may have detached its own ->mm while exiting or through
105	* use_mm(), but one or more of its subthreads may still have a valid	115	* use_mm(), but one or more of its subthreads may still have a valid
106	* pointer. Return p, or any of its subthreads with a valid ->mm, with	116	* pointer. Return p, or any of its subthreads with a valid ->mm, with
@@ -162,24 +172,16 @@ unsigned int oom_badness(struct task_struct p, struct mem_cgroup mem,
162	return 0;	172	return 0;
163		173
164	/*	174	/*
165	* Shortcut check for OOM_SCORE_ADJ_MIN so the entire heuristic doesn't	175	* Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN
166	* need to be executed for something that cannot be killed.	176	* so the entire heuristic doesn't need to be executed for something
		177	* that cannot be killed.
167	*/	178	*/
168	if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {	179	if (atomic_read(&p->mm->oom_disable_count)) {
169	task_unlock(p);	180	task_unlock(p);
170	return 0;	181	return 0;
171	}	182	}
172		183
173	/*	184	/*
174	* When the PF_OOM_ORIGIN bit is set, it indicates the task should have
175	* priority for oom killing.
176	*/
177	if (p->flags & PF_OOM_ORIGIN) {
178	task_unlock(p);
179	return 1000;
180	}
181
182	/*
183	* The memory controller may have a limit of 0 bytes, so avoid a divide	185	* The memory controller may have a limit of 0 bytes, so avoid a divide
184	* by zero, if necessary.	186	* by zero, if necessary.
185	*/	187	*/
@@ -188,10 +190,13 @@ unsigned int oom_badness(struct task_struct p, struct mem_cgroup mem,
188		190
189	/*	191	/*
190	* The baseline for the badness score is the proportion of RAM that each	192	* The baseline for the badness score is the proportion of RAM that each
191	* task's rss and swap space use.	193	* task's rss, pagetable and swap space use.
192	*/	194	*/
193	points = (get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS)) * 1000 /	195	points = get_mm_rss(p->mm) + p->mm->nr_ptes;
194	totalpages;	196	points += get_mm_counter(p->mm, MM_SWAPENTS);
		197
		198	points *= 1000;
		199	points /= totalpages;
195	task_unlock(p);	200	task_unlock(p);
196		201
197	/*	202	/*
@@ -291,13 +296,15 @@ static struct task_struct select_bad_process(unsigned int ppoints,
291	unsigned long totalpages, struct mem_cgroup *mem,	296	unsigned long totalpages, struct mem_cgroup *mem,
292	const nodemask_t *nodemask)	297	const nodemask_t *nodemask)
293	{	298	{
294	struct task_struct *p;	299	struct task_struct g, p;
295	struct task_struct *chosen = NULL;	300	struct task_struct *chosen = NULL;
296	*ppoints = 0;	301	*ppoints = 0;
297		302
298	for_each_process(p) {	303	do_each_thread(g, p) {
299	unsigned int points;	304	unsigned int points;
300		305
		306	if (!p->mm)
		307	continue;
301	if (oom_unkillable_task(p, mem, nodemask))	308	if (oom_unkillable_task(p, mem, nodemask))
302	continue;	309	continue;
303		310
@@ -313,22 +320,29 @@ static struct task_struct select_bad_process(unsigned int ppoints,
313	if (test_tsk_thread_flag(p, TIF_MEMDIE))	320	if (test_tsk_thread_flag(p, TIF_MEMDIE))
314	return ERR_PTR(-1UL);	321	return ERR_PTR(-1UL);
315		322
316	/*	323	if (p->flags & PF_EXITING) {
317	* This is in the process of releasing memory so wait for it	324	/*
318	* to finish before killing some other task by mistake.	325	* If p is the current task and is in the process of
319	*	326	* releasing memory, we allow the "kill" to set
320	* However, if p is the current task, we allow the 'kill' to	327	* TIF_MEMDIE, which will allow it to gain access to
321	* go ahead if it is exiting: this will simply set TIF_MEMDIE,	328	* memory reserves. Otherwise, it may stall forever.
322	* which will allow it to gain access to memory reserves in	329	*
323	* the process of exiting and releasing its resources.	330	* The loop isn't broken here, however, in case other
324	* Otherwise we could get an easy OOM deadlock.	331	* threads are found to have already been oom killed.
325	*/	332	*/
326	if (thread_group_empty(p) && (p->flags & PF_EXITING) && p->mm) {	333	if (p == current) {
327	if (p != current)	334	chosen = p;
328	return ERR_PTR(-1UL);	335	*ppoints = 1000;
329		336	} else {
330	chosen = p;	337	/*
331	*ppoints = 1000;	338	* If this task is not being ptraced on exit,
		339	* then wait for it to finish before killing
		340	* some other task unnecessarily.
		341	*/
		342	if (!(task_ptrace(p->group_leader) &
		343	PT_TRACE_EXIT))
		344	return ERR_PTR(-1UL);
		345	}
332	}	346	}
333		347
334	points = oom_badness(p, mem, nodemask, totalpages);	348	points = oom_badness(p, mem, nodemask, totalpages);
@@ -336,7 +350,7 @@ static struct task_struct select_bad_process(unsigned int ppoints,
336	chosen = p;	350	chosen = p;
337	*ppoints = points;	351	*ppoints = points;
338	}	352	}
339	}	353	} while_each_thread(g, p);
340		354
341	return chosen;	355	return chosen;
342	}	356	}
@@ -395,7 +409,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
395	task_unlock(current);	409	task_unlock(current);
396	dump_stack();	410	dump_stack();
397	mem_cgroup_print_oom_info(mem, p);	411	mem_cgroup_print_oom_info(mem, p);
398	show_mem();	412	show_mem(SHOW_MEM_FILTER_NODES);
399	if (sysctl_oom_dump_tasks)	413	if (sysctl_oom_dump_tasks)
400	dump_tasks(mem, nodemask);	414	dump_tasks(mem, nodemask);
401	}	415	}
@@ -403,27 +417,44 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
403	#define K(x) ((x) << (PAGE_SHIFT-10))	417	#define K(x) ((x) << (PAGE_SHIFT-10))
404	static int oom_kill_task(struct task_struct p, struct mem_cgroup mem)	418	static int oom_kill_task(struct task_struct p, struct mem_cgroup mem)
405	{	419	{
		420	struct task_struct *q;
		421	struct mm_struct *mm;
		422
406	p = find_lock_task_mm(p);	423	p = find_lock_task_mm(p);
407	if (!p)	424	if (!p)
408	return 1;	425	return 1;
409		426
		427	/* mm cannot be safely dereferenced after task_unlock(p) */
		428	mm = p->mm;
		429
410	pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",	430	pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
411	task_pid_nr(p), p->comm, K(p->mm->total_vm),	431	task_pid_nr(p), p->comm, K(p->mm->total_vm),
412	K(get_mm_counter(p->mm, MM_ANONPAGES)),	432	K(get_mm_counter(p->mm, MM_ANONPAGES)),
413	K(get_mm_counter(p->mm, MM_FILEPAGES)));	433	K(get_mm_counter(p->mm, MM_FILEPAGES)));
414	task_unlock(p);	434	task_unlock(p);
415		435
		436	/*
		437	* Kill all processes sharing p->mm in other thread groups, if any.
		438	* They don't get access to memory reserves or a higher scheduler
		439	* priority, though, to avoid depletion of all memory or task
		440	* starvation. This prevents mm->mmap_sem livelock when an oom killed
		441	* task cannot exit because it requires the semaphore and its contended
		442	* by another thread trying to allocate memory itself. That thread will
		443	* now get access to memory reserves since it has a pending fatal
		444	* signal.
		445	*/
		446	for_each_process(q)
		447	if (q->mm == mm && !same_thread_group(q, p)) {
		448	task_lock(q); /* Protect ->comm from prctl() */
		449	pr_err("Kill process %d (%s) sharing same memory\n",
		450	task_pid_nr(q), q->comm);
		451	task_unlock(q);
		452	force_sig(SIGKILL, q);
		453	}
416		454
417	set_tsk_thread_flag(p, TIF_MEMDIE);	455	set_tsk_thread_flag(p, TIF_MEMDIE);
418	force_sig(SIGKILL, p);	456	force_sig(SIGKILL, p);
419		457
420	/*
421	* We give our sacrificial lamb high priority and access to
422	* all the memory it needs. That way it should be able to
423	* exit() and clear out its resources quickly...
424	*/
425	boost_dying_task_prio(p, mem);
426
427	return 0;	458	return 0;
428	}	459	}
429	#undef K	460	#undef K
@@ -447,7 +478,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
447	*/	478	*/
448	if (p->flags & PF_EXITING) {	479	if (p->flags & PF_EXITING) {
449	set_tsk_thread_flag(p, TIF_MEMDIE);	480	set_tsk_thread_flag(p, TIF_MEMDIE);
450	boost_dying_task_prio(p, mem);
451	return 0;	481	return 0;
452	}	482	}
453		483
@@ -466,6 +496,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
466	list_for_each_entry(child, &t->children, sibling) {	496	list_for_each_entry(child, &t->children, sibling) {
467	unsigned int child_points;	497	unsigned int child_points;
468		498
		499	if (child->mm == p->mm)
		500	continue;
469	/*	501	/*
470	* oom_badness() returns 0 if the thread is unkillable	502	* oom_badness() returns 0 if the thread is unkillable
471	*/	503	*/
@@ -512,6 +544,16 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
512	unsigned int points = 0;	544	unsigned int points = 0;
513	struct task_struct *p;	545	struct task_struct *p;
514		546
		547	/*
		548	* If current has a pending SIGKILL, then automatically select it. The
		549	* goal is to allow it to allocate so that it may quickly exit and free
		550	* its memory.
		551	*/
		552	if (fatal_signal_pending(current)) {
		553	set_thread_flag(TIF_MEMDIE);
		554	return;
		555	}
		556
515	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL);	557	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL);
516	limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT;	558	limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT;
517	read_lock(&tasklist_lock);	559	read_lock(&tasklist_lock);
@@ -664,7 +706,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
664	*/	706	*/
665	if (fatal_signal_pending(current)) {	707	if (fatal_signal_pending(current)) {
666	set_thread_flag(TIF_MEMDIE);	708	set_thread_flag(TIF_MEMDIE);
667	boost_dying_task_prio(current, NULL);
668	return;	709	return;
669	}	710	}
670		711
@@ -680,7 +721,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
680	read_lock(&tasklist_lock);	721	read_lock(&tasklist_lock);
681	if (sysctl_oom_kill_allocating_task &&	722	if (sysctl_oom_kill_allocating_task &&
682	!oom_unkillable_task(current, NULL, nodemask) &&	723	!oom_unkillable_task(current, NULL, nodemask) &&
683	(current->signal->oom_adj != OOM_DISABLE)) {	724	current->mm && !atomic_read(&current->mm->oom_disable_count)) {
684	/*	725	/*
685	* oom_kill_process() needs tasklist_lock held. If it returns	726	* oom_kill_process() needs tasklist_lock held. If it returns
686	* non-zero, current could not be killed so we must fallback to	727	* non-zero, current could not be killed so we must fallback to