diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 18 | ||||
-rw-r--r-- | mm/oom_kill.c | 259 |
2 files changed, 129 insertions, 148 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 31abd1c2c0c5..de54ea0094a1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1127,6 +1127,24 @@ static int mem_cgroup_count_children(struct mem_cgroup *mem) | |||
1127 | } | 1127 | } |
1128 | 1128 | ||
1129 | /* | 1129 | /* |
1130 | * Return the memory (and swap, if configured) limit for a memcg. | ||
1131 | */ | ||
1132 | u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) | ||
1133 | { | ||
1134 | u64 limit; | ||
1135 | u64 memsw; | ||
1136 | |||
1137 | limit = res_counter_read_u64(&memcg->res, RES_LIMIT) + | ||
1138 | total_swap_pages; | ||
1139 | memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT); | ||
1140 | /* | ||
1141 | * If memsw is finite and limits the amount of swap space available | ||
1142 | * to this memcg, return that limit. | ||
1143 | */ | ||
1144 | return min(limit, memsw); | ||
1145 | } | ||
1146 | |||
1147 | /* | ||
1130 | * Visit the first child (need not be the first child as per the ordering | 1148 | * Visit the first child (need not be the first child as per the ordering |
1131 | * of the cgroup list, since we track last_scanned_child) of @mem and use | 1149 | * of the cgroup list, since we track last_scanned_child) of @mem and use |
1132 | * that to reclaim free pages from. | 1150 | * that to reclaim free pages from. |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 0a4ca8a0234b..d3def05a33d9 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -4,6 +4,8 @@ | |||
4 | * Copyright (C) 1998,2000 Rik van Riel | 4 | * Copyright (C) 1998,2000 Rik van Riel |
5 | * Thanks go out to Claus Fischer for some serious inspiration and | 5 | * Thanks go out to Claus Fischer for some serious inspiration and |
6 | * for goading me into coding this file... | 6 | * for goading me into coding this file... |
7 | * Copyright (C) 2010 Google, Inc. | ||
8 | * Rewritten by David Rientjes | ||
7 | * | 9 | * |
8 | * The routines in this file are used to kill a process when | 10 | * The routines in this file are used to kill a process when |
9 | * we're seriously out of memory. This gets called from __alloc_pages() | 11 | * we're seriously out of memory. This gets called from __alloc_pages() |
@@ -34,7 +36,6 @@ int sysctl_panic_on_oom; | |||
34 | int sysctl_oom_kill_allocating_task; | 36 | int sysctl_oom_kill_allocating_task; |
35 | int sysctl_oom_dump_tasks = 1; | 37 | int sysctl_oom_dump_tasks = 1; |
36 | static DEFINE_SPINLOCK(zone_scan_lock); | 38 | static DEFINE_SPINLOCK(zone_scan_lock); |
37 | /* #define DEBUG */ | ||
38 | 39 | ||
39 | #ifdef CONFIG_NUMA | 40 | #ifdef CONFIG_NUMA |
40 | /** | 41 | /** |
@@ -140,137 +141,76 @@ static bool oom_unkillable_task(struct task_struct *p, struct mem_cgroup *mem, | |||
140 | } | 141 | } |
141 | 142 | ||
142 | /** | 143 | /** |
143 | * badness - calculate a numeric value for how bad this task has been | 144 | * oom_badness - heuristic function to determine which candidate task to kill |
144 | * @p: task struct of which task we should calculate | 145 | * @p: task struct of which task we should calculate |
145 | * @uptime: current uptime in seconds | 146 | * @totalpages: total present RAM allowed for page allocation |
146 | * | 147 | * |
147 | * The formula used is relatively simple and documented inline in the | 148 | * The heuristic for determining which task to kill is made to be as simple and |
148 | * function. The main rationale is that we want to select a good task | 149 | * predictable as possible. The goal is to return the highest value for the |
149 | * to kill when we run out of memory. | 150 | * task consuming the most memory to avoid subsequent oom failures. |
150 | * | ||
151 | * Good in this context means that: | ||
152 | * 1) we lose the minimum amount of work done | ||
153 | * 2) we recover a large amount of memory | ||
154 | * 3) we don't kill anything innocent of eating tons of memory | ||
155 | * 4) we want to kill the minimum amount of processes (one) | ||
156 | * 5) we try to kill the process the user expects us to kill, this | ||
157 | * algorithm has been meticulously tuned to meet the principle | ||
158 | * of least surprise ... (be careful when you change it) | ||
159 | */ | 151 | */ |
160 | unsigned long badness(struct task_struct *p, struct mem_cgroup *mem, | 152 | unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, |
161 | const nodemask_t *nodemask, unsigned long uptime) | 153 | const nodemask_t *nodemask, unsigned long totalpages) |
162 | { | 154 | { |
163 | unsigned long points, cpu_time, run_time; | 155 | int points; |
164 | struct task_struct *child; | ||
165 | struct task_struct *c, *t; | ||
166 | int oom_adj = p->signal->oom_adj; | ||
167 | struct task_cputime task_time; | ||
168 | unsigned long utime; | ||
169 | unsigned long stime; | ||
170 | 156 | ||
171 | if (oom_unkillable_task(p, mem, nodemask)) | 157 | if (oom_unkillable_task(p, mem, nodemask)) |
172 | return 0; | 158 | return 0; |
173 | if (oom_adj == OOM_DISABLE) | ||
174 | return 0; | ||
175 | 159 | ||
176 | p = find_lock_task_mm(p); | 160 | p = find_lock_task_mm(p); |
177 | if (!p) | 161 | if (!p) |
178 | return 0; | 162 | return 0; |
179 | 163 | ||
180 | /* | 164 | /* |
181 | * The memory size of the process is the basis for the badness. | 165 | * Shortcut check for OOM_SCORE_ADJ_MIN so the entire heuristic doesn't |
182 | */ | 166 | * need to be executed for something that cannot be killed. |
183 | points = p->mm->total_vm; | ||
184 | task_unlock(p); | ||
185 | |||
186 | /* | ||
187 | * swapoff can easily use up all memory, so kill those first. | ||
188 | */ | ||
189 | if (p->flags & PF_OOM_ORIGIN) | ||
190 | return ULONG_MAX; | ||
191 | |||
192 | /* | ||
193 | * Processes which fork a lot of child processes are likely | ||
194 | * a good choice. We add half the vmsize of the children if they | ||
195 | * have an own mm. This prevents forking servers to flood the | ||
196 | * machine with an endless amount of children. In case a single | ||
197 | * child is eating the vast majority of memory, adding only half | ||
198 | * to the parents will make the child our kill candidate of choice. | ||
199 | */ | 167 | */ |
200 | t = p; | 168 | if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { |
201 | do { | 169 | task_unlock(p); |
202 | list_for_each_entry(c, &t->children, sibling) { | 170 | return 0; |
203 | child = find_lock_task_mm(c); | 171 | } |
204 | if (child) { | ||
205 | if (child->mm != p->mm) | ||
206 | points += child->mm->total_vm/2 + 1; | ||
207 | task_unlock(child); | ||
208 | } | ||
209 | } | ||
210 | } while_each_thread(p, t); | ||
211 | 172 | ||
212 | /* | 173 | /* |
213 | * CPU time is in tens of seconds and run time is in thousands | 174 | * When the PF_OOM_ORIGIN bit is set, it indicates the task should have |
214 | * of seconds. There is no particular reason for this other than | 175 | * priority for oom killing. |
215 | * that it turned out to work very well in practice. | ||
216 | */ | 176 | */ |
217 | thread_group_cputime(p, &task_time); | 177 | if (p->flags & PF_OOM_ORIGIN) { |
218 | utime = cputime_to_jiffies(task_time.utime); | 178 | task_unlock(p); |
219 | stime = cputime_to_jiffies(task_time.stime); | 179 | return 1000; |
220 | cpu_time = (utime + stime) >> (SHIFT_HZ + 3); | 180 | } |
221 | |||
222 | |||
223 | if (uptime >= p->start_time.tv_sec) | ||
224 | run_time = (uptime - p->start_time.tv_sec) >> 10; | ||
225 | else | ||
226 | run_time = 0; | ||
227 | |||
228 | if (cpu_time) | ||
229 | points /= int_sqrt(cpu_time); | ||
230 | if (run_time) | ||
231 | points /= int_sqrt(int_sqrt(run_time)); | ||
232 | 181 | ||
233 | /* | 182 | /* |
234 | * Niced processes are most likely less important, so double | 183 | * The memory controller may have a limit of 0 bytes, so avoid a divide |
235 | * their badness points. | 184 | * by zero, if necessary. |
236 | */ | 185 | */ |
237 | if (task_nice(p) > 0) | 186 | if (!totalpages) |
238 | points *= 2; | 187 | totalpages = 1; |
239 | 188 | ||
240 | /* | 189 | /* |
241 | * Superuser processes are usually more important, so we make it | 190 | * The baseline for the badness score is the proportion of RAM that each |
242 | * less likely that we kill those. | 191 | * task's rss and swap space use. |
243 | */ | 192 | */ |
244 | if (has_capability_noaudit(p, CAP_SYS_ADMIN) || | 193 | points = (get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS)) * 1000 / |
245 | has_capability_noaudit(p, CAP_SYS_RESOURCE)) | 194 | totalpages; |
246 | points /= 4; | 195 | task_unlock(p); |
247 | 196 | ||
248 | /* | 197 | /* |
249 | * We don't want to kill a process with direct hardware access. | 198 | * Root processes get 3% bonus, just like the __vm_enough_memory() |
250 | * Not only could that mess up the hardware, but usually users | 199 | * implementation used by LSMs. |
251 | * tend to only have this flag set on applications they think | ||
252 | * of as important. | ||
253 | */ | 200 | */ |
254 | if (has_capability_noaudit(p, CAP_SYS_RAWIO)) | 201 | if (has_capability_noaudit(p, CAP_SYS_ADMIN)) |
255 | points /= 4; | 202 | points -= 30; |
256 | 203 | ||
257 | /* | 204 | /* |
258 | * Adjust the score by oom_adj. | 205 | * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may |
206 | * either completely disable oom killing or always prefer a certain | ||
207 | * task. | ||
259 | */ | 208 | */ |
260 | if (oom_adj) { | 209 | points += p->signal->oom_score_adj; |
261 | if (oom_adj > 0) { | ||
262 | if (!points) | ||
263 | points = 1; | ||
264 | points <<= oom_adj; | ||
265 | } else | ||
266 | points >>= -(oom_adj); | ||
267 | } | ||
268 | 210 | ||
269 | #ifdef DEBUG | 211 | if (points < 0) |
270 | printk(KERN_DEBUG "OOMkill: task %d (%s) got %lu points\n", | 212 | return 0; |
271 | p->pid, p->comm, points); | 213 | return (points < 1000) ? points : 1000; |
272 | #endif | ||
273 | return points; | ||
274 | } | 214 | } |
275 | 215 | ||
276 | /* | 216 | /* |
@@ -278,12 +218,20 @@ unsigned long badness(struct task_struct *p, struct mem_cgroup *mem, | |||
278 | */ | 218 | */ |
279 | #ifdef CONFIG_NUMA | 219 | #ifdef CONFIG_NUMA |
280 | static enum oom_constraint constrained_alloc(struct zonelist *zonelist, | 220 | static enum oom_constraint constrained_alloc(struct zonelist *zonelist, |
281 | gfp_t gfp_mask, nodemask_t *nodemask) | 221 | gfp_t gfp_mask, nodemask_t *nodemask, |
222 | unsigned long *totalpages) | ||
282 | { | 223 | { |
283 | struct zone *zone; | 224 | struct zone *zone; |
284 | struct zoneref *z; | 225 | struct zoneref *z; |
285 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | 226 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
227 | bool cpuset_limited = false; | ||
228 | int nid; | ||
286 | 229 | ||
230 | /* Default to all available memory */ | ||
231 | *totalpages = totalram_pages + total_swap_pages; | ||
232 | |||
233 | if (!zonelist) | ||
234 | return CONSTRAINT_NONE; | ||
287 | /* | 235 | /* |
288 | * Reach here only when __GFP_NOFAIL is used. So, we should avoid | 236 | * Reach here only when __GFP_NOFAIL is used. So, we should avoid |
289 | * to kill current.We have to random task kill in this case. | 237 | * to kill current.We have to random task kill in this case. |
@@ -293,26 +241,37 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist, | |||
293 | return CONSTRAINT_NONE; | 241 | return CONSTRAINT_NONE; |
294 | 242 | ||
295 | /* | 243 | /* |
296 | * The nodemask here is a nodemask passed to alloc_pages(). Now, | 244 | * This is not a __GFP_THISNODE allocation, so a truncated nodemask in |
297 | * cpuset doesn't use this nodemask for its hardwall/softwall/hierarchy | 245 | * the page allocator means a mempolicy is in effect. Cpuset policy |
298 | * feature. mempolicy is an only user of nodemask here. | 246 | * is enforced in get_page_from_freelist(). |
299 | * check mempolicy's nodemask contains all N_HIGH_MEMORY | ||
300 | */ | 247 | */ |
301 | if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) | 248 | if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) { |
249 | *totalpages = total_swap_pages; | ||
250 | for_each_node_mask(nid, *nodemask) | ||
251 | *totalpages += node_spanned_pages(nid); | ||
302 | return CONSTRAINT_MEMORY_POLICY; | 252 | return CONSTRAINT_MEMORY_POLICY; |
253 | } | ||
303 | 254 | ||
304 | /* Check this allocation failure is caused by cpuset's wall function */ | 255 | /* Check this allocation failure is caused by cpuset's wall function */ |
305 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 256 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
306 | high_zoneidx, nodemask) | 257 | high_zoneidx, nodemask) |
307 | if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) | 258 | if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) |
308 | return CONSTRAINT_CPUSET; | 259 | cpuset_limited = true; |
309 | 260 | ||
261 | if (cpuset_limited) { | ||
262 | *totalpages = total_swap_pages; | ||
263 | for_each_node_mask(nid, cpuset_current_mems_allowed) | ||
264 | *totalpages += node_spanned_pages(nid); | ||
265 | return CONSTRAINT_CPUSET; | ||
266 | } | ||
310 | return CONSTRAINT_NONE; | 267 | return CONSTRAINT_NONE; |
311 | } | 268 | } |
312 | #else | 269 | #else |
313 | static enum oom_constraint constrained_alloc(struct zonelist *zonelist, | 270 | static enum oom_constraint constrained_alloc(struct zonelist *zonelist, |
314 | gfp_t gfp_mask, nodemask_t *nodemask) | 271 | gfp_t gfp_mask, nodemask_t *nodemask, |
272 | unsigned long *totalpages) | ||
315 | { | 273 | { |
274 | *totalpages = totalram_pages + total_swap_pages; | ||
316 | return CONSTRAINT_NONE; | 275 | return CONSTRAINT_NONE; |
317 | } | 276 | } |
318 | #endif | 277 | #endif |
@@ -323,17 +282,16 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist, | |||
323 | * | 282 | * |
324 | * (not docbooked, we don't want this one cluttering up the manual) | 283 | * (not docbooked, we don't want this one cluttering up the manual) |
325 | */ | 284 | */ |
326 | static struct task_struct *select_bad_process(unsigned long *ppoints, | 285 | static struct task_struct *select_bad_process(unsigned int *ppoints, |
327 | struct mem_cgroup *mem, const nodemask_t *nodemask) | 286 | unsigned long totalpages, struct mem_cgroup *mem, |
287 | const nodemask_t *nodemask) | ||
328 | { | 288 | { |
329 | struct task_struct *p; | 289 | struct task_struct *p; |
330 | struct task_struct *chosen = NULL; | 290 | struct task_struct *chosen = NULL; |
331 | struct timespec uptime; | ||
332 | *ppoints = 0; | 291 | *ppoints = 0; |
333 | 292 | ||
334 | do_posix_clock_monotonic_gettime(&uptime); | ||
335 | for_each_process(p) { | 293 | for_each_process(p) { |
336 | unsigned long points; | 294 | unsigned int points; |
337 | 295 | ||
338 | if (oom_unkillable_task(p, mem, nodemask)) | 296 | if (oom_unkillable_task(p, mem, nodemask)) |
339 | continue; | 297 | continue; |
@@ -365,11 +323,11 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, | |||
365 | return ERR_PTR(-1UL); | 323 | return ERR_PTR(-1UL); |
366 | 324 | ||
367 | chosen = p; | 325 | chosen = p; |
368 | *ppoints = ULONG_MAX; | 326 | *ppoints = 1000; |
369 | } | 327 | } |
370 | 328 | ||
371 | points = badness(p, mem, nodemask, uptime.tv_sec); | 329 | points = oom_badness(p, mem, nodemask, totalpages); |
372 | if (points > *ppoints || !chosen) { | 330 | if (points > *ppoints) { |
373 | chosen = p; | 331 | chosen = p; |
374 | *ppoints = points; | 332 | *ppoints = points; |
375 | } | 333 | } |
@@ -384,7 +342,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, | |||
384 | * | 342 | * |
385 | * Dumps the current memory state of all system tasks, excluding kernel threads. | 343 | * Dumps the current memory state of all system tasks, excluding kernel threads. |
386 | * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj | 344 | * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj |
387 | * score, and name. | 345 | * value, oom_score_adj value, and name. |
388 | * | 346 | * |
389 | * If the actual is non-NULL, only tasks that are a member of the mem_cgroup are | 347 | * If the actual is non-NULL, only tasks that are a member of the mem_cgroup are |
390 | * shown. | 348 | * shown. |
@@ -396,8 +354,7 @@ static void dump_tasks(const struct mem_cgroup *mem) | |||
396 | struct task_struct *p; | 354 | struct task_struct *p; |
397 | struct task_struct *task; | 355 | struct task_struct *task; |
398 | 356 | ||
399 | printk(KERN_INFO "[ pid ] uid tgid total_vm rss cpu oom_adj " | 357 | pr_info("[ pid ] uid tgid total_vm rss cpu oom_adj oom_score_adj name\n"); |
400 | "name\n"); | ||
401 | for_each_process(p) { | 358 | for_each_process(p) { |
402 | if (p->flags & PF_KTHREAD) | 359 | if (p->flags & PF_KTHREAD) |
403 | continue; | 360 | continue; |
@@ -414,10 +371,11 @@ static void dump_tasks(const struct mem_cgroup *mem) | |||
414 | continue; | 371 | continue; |
415 | } | 372 | } |
416 | 373 | ||
417 | printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3u %3d %s\n", | 374 | pr_info("[%5d] %5d %5d %8lu %8lu %3u %3d %5d %s\n", |
418 | task->pid, __task_cred(task)->uid, task->tgid, | 375 | task->pid, __task_cred(task)->uid, task->tgid, |
419 | task->mm->total_vm, get_mm_rss(task->mm), | 376 | task->mm->total_vm, get_mm_rss(task->mm), |
420 | task_cpu(task), task->signal->oom_adj, task->comm); | 377 | task_cpu(task), task->signal->oom_adj, |
378 | task->signal->oom_score_adj, task->comm); | ||
421 | task_unlock(task); | 379 | task_unlock(task); |
422 | } | 380 | } |
423 | } | 381 | } |
@@ -427,8 +385,9 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, | |||
427 | { | 385 | { |
428 | task_lock(current); | 386 | task_lock(current); |
429 | pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " | 387 | pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " |
430 | "oom_adj=%d\n", | 388 | "oom_adj=%d, oom_score_adj=%d\n", |
431 | current->comm, gfp_mask, order, current->signal->oom_adj); | 389 | current->comm, gfp_mask, order, current->signal->oom_adj, |
390 | current->signal->oom_score_adj); | ||
432 | cpuset_print_task_mems_allowed(current); | 391 | cpuset_print_task_mems_allowed(current); |
433 | task_unlock(current); | 392 | task_unlock(current); |
434 | dump_stack(); | 393 | dump_stack(); |
@@ -468,14 +427,14 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) | |||
468 | #undef K | 427 | #undef K |
469 | 428 | ||
470 | static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | 429 | static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, |
471 | unsigned long points, struct mem_cgroup *mem, | 430 | unsigned int points, unsigned long totalpages, |
472 | nodemask_t *nodemask, const char *message) | 431 | struct mem_cgroup *mem, nodemask_t *nodemask, |
432 | const char *message) | ||
473 | { | 433 | { |
474 | struct task_struct *victim = p; | 434 | struct task_struct *victim = p; |
475 | struct task_struct *child; | 435 | struct task_struct *child; |
476 | struct task_struct *t = p; | 436 | struct task_struct *t = p; |
477 | unsigned long victim_points = 0; | 437 | unsigned int victim_points = 0; |
478 | struct timespec uptime; | ||
479 | 438 | ||
480 | if (printk_ratelimit()) | 439 | if (printk_ratelimit()) |
481 | dump_header(p, gfp_mask, order, mem); | 440 | dump_header(p, gfp_mask, order, mem); |
@@ -491,7 +450,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
491 | } | 450 | } |
492 | 451 | ||
493 | task_lock(p); | 452 | task_lock(p); |
494 | pr_err("%s: Kill process %d (%s) score %lu or sacrifice child\n", | 453 | pr_err("%s: Kill process %d (%s) score %d or sacrifice child\n", |
495 | message, task_pid_nr(p), p->comm, points); | 454 | message, task_pid_nr(p), p->comm, points); |
496 | task_unlock(p); | 455 | task_unlock(p); |
497 | 456 | ||
@@ -501,14 +460,15 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
501 | * parent. This attempts to lose the minimal amount of work done while | 460 | * parent. This attempts to lose the minimal amount of work done while |
502 | * still freeing memory. | 461 | * still freeing memory. |
503 | */ | 462 | */ |
504 | do_posix_clock_monotonic_gettime(&uptime); | ||
505 | do { | 463 | do { |
506 | list_for_each_entry(child, &t->children, sibling) { | 464 | list_for_each_entry(child, &t->children, sibling) { |
507 | unsigned long child_points; | 465 | unsigned int child_points; |
508 | 466 | ||
509 | /* badness() returns 0 if the thread is unkillable */ | 467 | /* |
510 | child_points = badness(child, mem, nodemask, | 468 | * oom_badness() returns 0 if the thread is unkillable |
511 | uptime.tv_sec); | 469 | */ |
470 | child_points = oom_badness(child, mem, nodemask, | ||
471 | totalpages); | ||
512 | if (child_points > victim_points) { | 472 | if (child_points > victim_points) { |
513 | victim = child; | 473 | victim = child; |
514 | victim_points = child_points; | 474 | victim_points = child_points; |
@@ -546,17 +506,19 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, | |||
546 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 506 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
547 | void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) | 507 | void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) |
548 | { | 508 | { |
549 | unsigned long points = 0; | 509 | unsigned long limit; |
510 | unsigned int points = 0; | ||
550 | struct task_struct *p; | 511 | struct task_struct *p; |
551 | 512 | ||
552 | check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0); | 513 | check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0); |
514 | limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT; | ||
553 | read_lock(&tasklist_lock); | 515 | read_lock(&tasklist_lock); |
554 | retry: | 516 | retry: |
555 | p = select_bad_process(&points, mem, NULL); | 517 | p = select_bad_process(&points, limit, mem, NULL); |
556 | if (!p || PTR_ERR(p) == -1UL) | 518 | if (!p || PTR_ERR(p) == -1UL) |
557 | goto out; | 519 | goto out; |
558 | 520 | ||
559 | if (oom_kill_process(p, gfp_mask, 0, points, mem, NULL, | 521 | if (oom_kill_process(p, gfp_mask, 0, points, limit, mem, NULL, |
560 | "Memory cgroup out of memory")) | 522 | "Memory cgroup out of memory")) |
561 | goto retry; | 523 | goto retry; |
562 | out: | 524 | out: |
@@ -681,8 +643,9 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
681 | int order, nodemask_t *nodemask) | 643 | int order, nodemask_t *nodemask) |
682 | { | 644 | { |
683 | struct task_struct *p; | 645 | struct task_struct *p; |
646 | unsigned long totalpages; | ||
684 | unsigned long freed = 0; | 647 | unsigned long freed = 0; |
685 | unsigned long points; | 648 | unsigned int points; |
686 | enum oom_constraint constraint = CONSTRAINT_NONE; | 649 | enum oom_constraint constraint = CONSTRAINT_NONE; |
687 | 650 | ||
688 | blocking_notifier_call_chain(&oom_notify_list, 0, &freed); | 651 | blocking_notifier_call_chain(&oom_notify_list, 0, &freed); |
@@ -705,8 +668,8 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
705 | * Check if there were limitations on the allocation (only relevant for | 668 | * Check if there were limitations on the allocation (only relevant for |
706 | * NUMA) that may require different handling. | 669 | * NUMA) that may require different handling. |
707 | */ | 670 | */ |
708 | if (zonelist) | 671 | constraint = constrained_alloc(zonelist, gfp_mask, nodemask, |
709 | constraint = constrained_alloc(zonelist, gfp_mask, nodemask); | 672 | &totalpages); |
710 | check_panic_on_oom(constraint, gfp_mask, order); | 673 | check_panic_on_oom(constraint, gfp_mask, order); |
711 | 674 | ||
712 | read_lock(&tasklist_lock); | 675 | read_lock(&tasklist_lock); |
@@ -718,14 +681,14 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
718 | * non-zero, current could not be killed so we must fallback to | 681 | * non-zero, current could not be killed so we must fallback to |
719 | * the tasklist scan. | 682 | * the tasklist scan. |
720 | */ | 683 | */ |
721 | if (!oom_kill_process(current, gfp_mask, order, 0, NULL, | 684 | if (!oom_kill_process(current, gfp_mask, order, 0, totalpages, |
722 | nodemask, | 685 | NULL, nodemask, |
723 | "Out of memory (oom_kill_allocating_task)")) | 686 | "Out of memory (oom_kill_allocating_task)")) |
724 | return; | 687 | return; |
725 | } | 688 | } |
726 | 689 | ||
727 | retry: | 690 | retry: |
728 | p = select_bad_process(&points, NULL, | 691 | p = select_bad_process(&points, totalpages, NULL, |
729 | constraint == CONSTRAINT_MEMORY_POLICY ? nodemask : | 692 | constraint == CONSTRAINT_MEMORY_POLICY ? nodemask : |
730 | NULL); | 693 | NULL); |
731 | if (PTR_ERR(p) == -1UL) | 694 | if (PTR_ERR(p) == -1UL) |
@@ -738,8 +701,8 @@ retry: | |||
738 | panic("Out of memory and no killable processes...\n"); | 701 | panic("Out of memory and no killable processes...\n"); |
739 | } | 702 | } |
740 | 703 | ||
741 | if (oom_kill_process(p, gfp_mask, order, points, NULL, nodemask, | 704 | if (oom_kill_process(p, gfp_mask, order, points, totalpages, NULL, |
742 | "Out of memory")) | 705 | nodemask, "Out of memory")) |
743 | goto retry; | 706 | goto retry; |
744 | read_unlock(&tasklist_lock); | 707 | read_unlock(&tasklist_lock); |
745 | 708 | ||