diff options
-rw-r--r-- | include/linux/memcontrol.h | 15 | ||||
-rw-r--r-- | include/linux/oom.h | 43 | ||||
-rw-r--r-- | mm/memcontrol.c | 114 | ||||
-rw-r--r-- | mm/oom_kill.c | 200 |
4 files changed, 167 insertions, 205 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5d8ca6e02e39..0710143723bc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -366,6 +366,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, | |||
366 | struct mem_cgroup *, | 366 | struct mem_cgroup *, |
367 | struct mem_cgroup_reclaim_cookie *); | 367 | struct mem_cgroup_reclaim_cookie *); |
368 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); | 368 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); |
369 | int mem_cgroup_scan_tasks(struct mem_cgroup *, | ||
370 | int (*)(struct task_struct *, void *), void *); | ||
369 | 371 | ||
370 | static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) | 372 | static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) |
371 | { | 373 | { |
@@ -446,6 +448,8 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) | |||
446 | 448 | ||
447 | void mem_cgroup_handle_over_high(void); | 449 | void mem_cgroup_handle_over_high(void); |
448 | 450 | ||
451 | unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg); | ||
452 | |||
449 | void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, | 453 | void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, |
450 | struct task_struct *p); | 454 | struct task_struct *p); |
451 | 455 | ||
@@ -639,6 +643,12 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root, | |||
639 | { | 643 | { |
640 | } | 644 | } |
641 | 645 | ||
646 | static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, | ||
647 | int (*fn)(struct task_struct *, void *), void *arg) | ||
648 | { | ||
649 | return 0; | ||
650 | } | ||
651 | |||
642 | static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) | 652 | static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) |
643 | { | 653 | { |
644 | return 0; | 654 | return 0; |
@@ -669,6 +679,11 @@ mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, | |||
669 | return 0; | 679 | return 0; |
670 | } | 680 | } |
671 | 681 | ||
682 | static inline unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) | ||
683 | { | ||
684 | return 0; | ||
685 | } | ||
686 | |||
672 | static inline void | 687 | static inline void |
673 | mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | 688 | mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) |
674 | { | 689 | { |
diff --git a/include/linux/oom.h b/include/linux/oom.h index 5bc0457ee3a8..17946e5121b6 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h | |||
@@ -34,23 +34,11 @@ struct oom_control { | |||
34 | * for display purposes. | 34 | * for display purposes. |
35 | */ | 35 | */ |
36 | const int order; | 36 | const int order; |
37 | }; | ||
38 | 37 | ||
39 | /* | 38 | /* Used by oom implementation, do not set */ |
40 | * Types of limitations to the nodes from which allocations may occur | 39 | unsigned long totalpages; |
41 | */ | 40 | struct task_struct *chosen; |
42 | enum oom_constraint { | 41 | unsigned long chosen_points; |
43 | CONSTRAINT_NONE, | ||
44 | CONSTRAINT_CPUSET, | ||
45 | CONSTRAINT_MEMORY_POLICY, | ||
46 | CONSTRAINT_MEMCG, | ||
47 | }; | ||
48 | |||
49 | enum oom_scan_t { | ||
50 | OOM_SCAN_OK, /* scan thread and find its badness */ | ||
51 | OOM_SCAN_CONTINUE, /* do not consider thread for oom kill */ | ||
52 | OOM_SCAN_ABORT, /* abort the iteration and return */ | ||
53 | OOM_SCAN_SELECT, /* always select this thread first */ | ||
54 | }; | 42 | }; |
55 | 43 | ||
56 | extern struct mutex oom_lock; | 44 | extern struct mutex oom_lock; |
@@ -70,30 +58,10 @@ static inline bool oom_task_origin(const struct task_struct *p) | |||
70 | return p->signal->oom_flag_origin; | 58 | return p->signal->oom_flag_origin; |
71 | } | 59 | } |
72 | 60 | ||
73 | extern void mark_oom_victim(struct task_struct *tsk); | ||
74 | |||
75 | #ifdef CONFIG_MMU | ||
76 | extern void wake_oom_reaper(struct task_struct *tsk); | ||
77 | #else | ||
78 | static inline void wake_oom_reaper(struct task_struct *tsk) | ||
79 | { | ||
80 | } | ||
81 | #endif | ||
82 | |||
83 | extern unsigned long oom_badness(struct task_struct *p, | 61 | extern unsigned long oom_badness(struct task_struct *p, |
84 | struct mem_cgroup *memcg, const nodemask_t *nodemask, | 62 | struct mem_cgroup *memcg, const nodemask_t *nodemask, |
85 | unsigned long totalpages); | 63 | unsigned long totalpages); |
86 | 64 | ||
87 | extern void oom_kill_process(struct oom_control *oc, struct task_struct *p, | ||
88 | unsigned int points, unsigned long totalpages, | ||
89 | const char *message); | ||
90 | |||
91 | extern void check_panic_on_oom(struct oom_control *oc, | ||
92 | enum oom_constraint constraint); | ||
93 | |||
94 | extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, | ||
95 | struct task_struct *task); | ||
96 | |||
97 | extern bool out_of_memory(struct oom_control *oc); | 65 | extern bool out_of_memory(struct oom_control *oc); |
98 | 66 | ||
99 | extern void exit_oom_victim(struct task_struct *tsk); | 67 | extern void exit_oom_victim(struct task_struct *tsk); |
@@ -101,14 +69,11 @@ extern void exit_oom_victim(struct task_struct *tsk); | |||
101 | extern int register_oom_notifier(struct notifier_block *nb); | 69 | extern int register_oom_notifier(struct notifier_block *nb); |
102 | extern int unregister_oom_notifier(struct notifier_block *nb); | 70 | extern int unregister_oom_notifier(struct notifier_block *nb); |
103 | 71 | ||
104 | extern bool oom_killer_disabled; | ||
105 | extern bool oom_killer_disable(void); | 72 | extern bool oom_killer_disable(void); |
106 | extern void oom_killer_enable(void); | 73 | extern void oom_killer_enable(void); |
107 | 74 | ||
108 | extern struct task_struct *find_lock_task_mm(struct task_struct *p); | 75 | extern struct task_struct *find_lock_task_mm(struct task_struct *p); |
109 | 76 | ||
110 | bool task_will_free_mem(struct task_struct *task); | ||
111 | |||
112 | /* sysctls */ | 77 | /* sysctls */ |
113 | extern int sysctl_oom_dump_tasks; | 78 | extern int sysctl_oom_dump_tasks; |
114 | extern int sysctl_oom_kill_allocating_task; | 79 | extern int sysctl_oom_kill_allocating_task; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4be518d4e68a..48747ef5b88f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -921,6 +921,43 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) | |||
921 | iter = mem_cgroup_iter(NULL, iter, NULL)) | 921 | iter = mem_cgroup_iter(NULL, iter, NULL)) |
922 | 922 | ||
923 | /** | 923 | /** |
924 | * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy | ||
925 | * @memcg: hierarchy root | ||
926 | * @fn: function to call for each task | ||
927 | * @arg: argument passed to @fn | ||
928 | * | ||
929 | * This function iterates over tasks attached to @memcg or to any of its | ||
930 | * descendants and calls @fn for each task. If @fn returns a non-zero | ||
931 | * value, the function breaks the iteration loop and returns the value. | ||
932 | * Otherwise, it will iterate over all tasks and return 0. | ||
933 | * | ||
934 | * This function must not be called for the root memory cgroup. | ||
935 | */ | ||
936 | int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, | ||
937 | int (*fn)(struct task_struct *, void *), void *arg) | ||
938 | { | ||
939 | struct mem_cgroup *iter; | ||
940 | int ret = 0; | ||
941 | |||
942 | BUG_ON(memcg == root_mem_cgroup); | ||
943 | |||
944 | for_each_mem_cgroup_tree(iter, memcg) { | ||
945 | struct css_task_iter it; | ||
946 | struct task_struct *task; | ||
947 | |||
948 | css_task_iter_start(&iter->css, &it); | ||
949 | while (!ret && (task = css_task_iter_next(&it))) | ||
950 | ret = fn(task, arg); | ||
951 | css_task_iter_end(&it); | ||
952 | if (ret) { | ||
953 | mem_cgroup_iter_break(memcg, iter); | ||
954 | break; | ||
955 | } | ||
956 | } | ||
957 | return ret; | ||
958 | } | ||
959 | |||
960 | /** | ||
924 | * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page | 961 | * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page |
925 | * @page: the page | 962 | * @page: the page |
926 | * @zone: zone of the page | 963 | * @zone: zone of the page |
@@ -1178,7 +1215,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg) | |||
1178 | /* | 1215 | /* |
1179 | * Return the memory (and swap, if configured) limit for a memcg. | 1216 | * Return the memory (and swap, if configured) limit for a memcg. |
1180 | */ | 1217 | */ |
1181 | static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) | 1218 | unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) |
1182 | { | 1219 | { |
1183 | unsigned long limit; | 1220 | unsigned long limit; |
1184 | 1221 | ||
@@ -1205,79 +1242,12 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
1205 | .gfp_mask = gfp_mask, | 1242 | .gfp_mask = gfp_mask, |
1206 | .order = order, | 1243 | .order = order, |
1207 | }; | 1244 | }; |
1208 | struct mem_cgroup *iter; | 1245 | bool ret; |
1209 | unsigned long chosen_points = 0; | ||
1210 | unsigned long totalpages; | ||
1211 | unsigned int points = 0; | ||
1212 | struct task_struct *chosen = NULL; | ||
1213 | 1246 | ||
1214 | mutex_lock(&oom_lock); | 1247 | mutex_lock(&oom_lock); |
1215 | 1248 | ret = out_of_memory(&oc); | |
1216 | /* | ||
1217 | * If current has a pending SIGKILL or is exiting, then automatically | ||
1218 | * select it. The goal is to allow it to allocate so that it may | ||
1219 | * quickly exit and free its memory. | ||
1220 | */ | ||
1221 | if (task_will_free_mem(current)) { | ||
1222 | mark_oom_victim(current); | ||
1223 | wake_oom_reaper(current); | ||
1224 | goto unlock; | ||
1225 | } | ||
1226 | |||
1227 | check_panic_on_oom(&oc, CONSTRAINT_MEMCG); | ||
1228 | totalpages = mem_cgroup_get_limit(memcg) ? : 1; | ||
1229 | for_each_mem_cgroup_tree(iter, memcg) { | ||
1230 | struct css_task_iter it; | ||
1231 | struct task_struct *task; | ||
1232 | |||
1233 | css_task_iter_start(&iter->css, &it); | ||
1234 | while ((task = css_task_iter_next(&it))) { | ||
1235 | switch (oom_scan_process_thread(&oc, task)) { | ||
1236 | case OOM_SCAN_SELECT: | ||
1237 | if (chosen) | ||
1238 | put_task_struct(chosen); | ||
1239 | chosen = task; | ||
1240 | chosen_points = ULONG_MAX; | ||
1241 | get_task_struct(chosen); | ||
1242 | /* fall through */ | ||
1243 | case OOM_SCAN_CONTINUE: | ||
1244 | continue; | ||
1245 | case OOM_SCAN_ABORT: | ||
1246 | css_task_iter_end(&it); | ||
1247 | mem_cgroup_iter_break(memcg, iter); | ||
1248 | if (chosen) | ||
1249 | put_task_struct(chosen); | ||
1250 | /* Set a dummy value to return "true". */ | ||
1251 | chosen = (void *) 1; | ||
1252 | goto unlock; | ||
1253 | case OOM_SCAN_OK: | ||
1254 | break; | ||
1255 | }; | ||
1256 | points = oom_badness(task, memcg, NULL, totalpages); | ||
1257 | if (!points || points < chosen_points) | ||
1258 | continue; | ||
1259 | /* Prefer thread group leaders for display purposes */ | ||
1260 | if (points == chosen_points && | ||
1261 | thread_group_leader(chosen)) | ||
1262 | continue; | ||
1263 | |||
1264 | if (chosen) | ||
1265 | put_task_struct(chosen); | ||
1266 | chosen = task; | ||
1267 | chosen_points = points; | ||
1268 | get_task_struct(chosen); | ||
1269 | } | ||
1270 | css_task_iter_end(&it); | ||
1271 | } | ||
1272 | |||
1273 | if (chosen) { | ||
1274 | points = chosen_points * 1000 / totalpages; | ||
1275 | oom_kill_process(&oc, chosen, points, totalpages, | ||
1276 | "Memory cgroup out of memory"); | ||
1277 | } | ||
1278 | unlock: | ||
1279 | mutex_unlock(&oom_lock); | 1249 | mutex_unlock(&oom_lock); |
1280 | return chosen; | 1250 | return ret; |
1281 | } | 1251 | } |
1282 | 1252 | ||
1283 | #if MAX_NUMNODES > 1 | 1253 | #if MAX_NUMNODES > 1 |
@@ -1600,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle) | |||
1600 | if (!memcg) | 1570 | if (!memcg) |
1601 | return false; | 1571 | return false; |
1602 | 1572 | ||
1603 | if (!handle || oom_killer_disabled) | 1573 | if (!handle) |
1604 | goto cleanup; | 1574 | goto cleanup; |
1605 | 1575 | ||
1606 | owait.memcg = memcg; | 1576 | owait.memcg = memcg; |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d53a9aa00977..ef175518f05f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -132,6 +132,11 @@ static inline bool is_sysrq_oom(struct oom_control *oc) | |||
132 | return oc->order == -1; | 132 | return oc->order == -1; |
133 | } | 133 | } |
134 | 134 | ||
135 | static inline bool is_memcg_oom(struct oom_control *oc) | ||
136 | { | ||
137 | return oc->memcg != NULL; | ||
138 | } | ||
139 | |||
135 | /* return true if the task is not adequate as candidate victim task. */ | 140 | /* return true if the task is not adequate as candidate victim task. */ |
136 | static bool oom_unkillable_task(struct task_struct *p, | 141 | static bool oom_unkillable_task(struct task_struct *p, |
137 | struct mem_cgroup *memcg, const nodemask_t *nodemask) | 142 | struct mem_cgroup *memcg, const nodemask_t *nodemask) |
@@ -213,12 +218,17 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, | |||
213 | return points > 0 ? points : 1; | 218 | return points > 0 ? points : 1; |
214 | } | 219 | } |
215 | 220 | ||
221 | enum oom_constraint { | ||
222 | CONSTRAINT_NONE, | ||
223 | CONSTRAINT_CPUSET, | ||
224 | CONSTRAINT_MEMORY_POLICY, | ||
225 | CONSTRAINT_MEMCG, | ||
226 | }; | ||
227 | |||
216 | /* | 228 | /* |
217 | * Determine the type of allocation constraint. | 229 | * Determine the type of allocation constraint. |
218 | */ | 230 | */ |
219 | #ifdef CONFIG_NUMA | 231 | static enum oom_constraint constrained_alloc(struct oom_control *oc) |
220 | static enum oom_constraint constrained_alloc(struct oom_control *oc, | ||
221 | unsigned long *totalpages) | ||
222 | { | 232 | { |
223 | struct zone *zone; | 233 | struct zone *zone; |
224 | struct zoneref *z; | 234 | struct zoneref *z; |
@@ -226,8 +236,16 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc, | |||
226 | bool cpuset_limited = false; | 236 | bool cpuset_limited = false; |
227 | int nid; | 237 | int nid; |
228 | 238 | ||
239 | if (is_memcg_oom(oc)) { | ||
240 | oc->totalpages = mem_cgroup_get_limit(oc->memcg) ?: 1; | ||
241 | return CONSTRAINT_MEMCG; | ||
242 | } | ||
243 | |||
229 | /* Default to all available memory */ | 244 | /* Default to all available memory */ |
230 | *totalpages = totalram_pages + total_swap_pages; | 245 | oc->totalpages = totalram_pages + total_swap_pages; |
246 | |||
247 | if (!IS_ENABLED(CONFIG_NUMA)) | ||
248 | return CONSTRAINT_NONE; | ||
231 | 249 | ||
232 | if (!oc->zonelist) | 250 | if (!oc->zonelist) |
233 | return CONSTRAINT_NONE; | 251 | return CONSTRAINT_NONE; |
@@ -246,9 +264,9 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc, | |||
246 | */ | 264 | */ |
247 | if (oc->nodemask && | 265 | if (oc->nodemask && |
248 | !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) { | 266 | !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) { |
249 | *totalpages = total_swap_pages; | 267 | oc->totalpages = total_swap_pages; |
250 | for_each_node_mask(nid, *oc->nodemask) | 268 | for_each_node_mask(nid, *oc->nodemask) |
251 | *totalpages += node_spanned_pages(nid); | 269 | oc->totalpages += node_spanned_pages(nid); |
252 | return CONSTRAINT_MEMORY_POLICY; | 270 | return CONSTRAINT_MEMORY_POLICY; |
253 | } | 271 | } |
254 | 272 | ||
@@ -259,27 +277,21 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc, | |||
259 | cpuset_limited = true; | 277 | cpuset_limited = true; |
260 | 278 | ||
261 | if (cpuset_limited) { | 279 | if (cpuset_limited) { |
262 | *totalpages = total_swap_pages; | 280 | oc->totalpages = total_swap_pages; |
263 | for_each_node_mask(nid, cpuset_current_mems_allowed) | 281 | for_each_node_mask(nid, cpuset_current_mems_allowed) |
264 | *totalpages += node_spanned_pages(nid); | 282 | oc->totalpages += node_spanned_pages(nid); |
265 | return CONSTRAINT_CPUSET; | 283 | return CONSTRAINT_CPUSET; |
266 | } | 284 | } |
267 | return CONSTRAINT_NONE; | 285 | return CONSTRAINT_NONE; |
268 | } | 286 | } |
269 | #else | ||
270 | static enum oom_constraint constrained_alloc(struct oom_control *oc, | ||
271 | unsigned long *totalpages) | ||
272 | { | ||
273 | *totalpages = totalram_pages + total_swap_pages; | ||
274 | return CONSTRAINT_NONE; | ||
275 | } | ||
276 | #endif | ||
277 | 287 | ||
278 | enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, | 288 | static int oom_evaluate_task(struct task_struct *task, void *arg) |
279 | struct task_struct *task) | ||
280 | { | 289 | { |
290 | struct oom_control *oc = arg; | ||
291 | unsigned long points; | ||
292 | |||
281 | if (oom_unkillable_task(task, NULL, oc->nodemask)) | 293 | if (oom_unkillable_task(task, NULL, oc->nodemask)) |
282 | return OOM_SCAN_CONTINUE; | 294 | goto next; |
283 | 295 | ||
284 | /* | 296 | /* |
285 | * This task already has access to memory reserves and is being killed. | 297 | * This task already has access to memory reserves and is being killed. |
@@ -289,68 +301,67 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, | |||
289 | */ | 301 | */ |
290 | if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) { | 302 | if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) { |
291 | struct task_struct *p = find_lock_task_mm(task); | 303 | struct task_struct *p = find_lock_task_mm(task); |
292 | enum oom_scan_t ret = OOM_SCAN_ABORT; | 304 | bool reaped = false; |
293 | 305 | ||
294 | if (p) { | 306 | if (p) { |
295 | if (test_bit(MMF_OOM_REAPED, &p->mm->flags)) | 307 | reaped = test_bit(MMF_OOM_REAPED, &p->mm->flags); |
296 | ret = OOM_SCAN_CONTINUE; | ||
297 | task_unlock(p); | 308 | task_unlock(p); |
298 | } | 309 | } |
299 | 310 | if (reaped) | |
300 | return ret; | 311 | goto next; |
312 | goto abort; | ||
301 | } | 313 | } |
302 | 314 | ||
303 | /* | 315 | /* |
304 | * If task is allocating a lot of memory and has been marked to be | 316 | * If task is allocating a lot of memory and has been marked to be |
305 | * killed first if it triggers an oom, then select it. | 317 | * killed first if it triggers an oom, then select it. |
306 | */ | 318 | */ |
307 | if (oom_task_origin(task)) | 319 | if (oom_task_origin(task)) { |
308 | return OOM_SCAN_SELECT; | 320 | points = ULONG_MAX; |
321 | goto select; | ||
322 | } | ||
309 | 323 | ||
310 | return OOM_SCAN_OK; | 324 | points = oom_badness(task, NULL, oc->nodemask, oc->totalpages); |
325 | if (!points || points < oc->chosen_points) | ||
326 | goto next; | ||
327 | |||
328 | /* Prefer thread group leaders for display purposes */ | ||
329 | if (points == oc->chosen_points && thread_group_leader(oc->chosen)) | ||
330 | goto next; | ||
331 | select: | ||
332 | if (oc->chosen) | ||
333 | put_task_struct(oc->chosen); | ||
334 | get_task_struct(task); | ||
335 | oc->chosen = task; | ||
336 | oc->chosen_points = points; | ||
337 | next: | ||
338 | return 0; | ||
339 | abort: | ||
340 | if (oc->chosen) | ||
341 | put_task_struct(oc->chosen); | ||
342 | oc->chosen = (void *)-1UL; | ||
343 | return 1; | ||
311 | } | 344 | } |
312 | 345 | ||
313 | /* | 346 | /* |
314 | * Simple selection loop. We chose the process with the highest | 347 | * Simple selection loop. We choose the process with the highest number of |
315 | * number of 'points'. Returns -1 on scan abort. | 348 | * 'points'. In case scan was aborted, oc->chosen is set to -1. |
316 | */ | 349 | */ |
317 | static struct task_struct *select_bad_process(struct oom_control *oc, | 350 | static void select_bad_process(struct oom_control *oc) |
318 | unsigned int *ppoints, unsigned long totalpages) | ||
319 | { | 351 | { |
320 | struct task_struct *p; | 352 | if (is_memcg_oom(oc)) |
321 | struct task_struct *chosen = NULL; | 353 | mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc); |
322 | unsigned long chosen_points = 0; | 354 | else { |
323 | 355 | struct task_struct *p; | |
324 | rcu_read_lock(); | ||
325 | for_each_process(p) { | ||
326 | unsigned int points; | ||
327 | |||
328 | switch (oom_scan_process_thread(oc, p)) { | ||
329 | case OOM_SCAN_SELECT: | ||
330 | chosen = p; | ||
331 | chosen_points = ULONG_MAX; | ||
332 | /* fall through */ | ||
333 | case OOM_SCAN_CONTINUE: | ||
334 | continue; | ||
335 | case OOM_SCAN_ABORT: | ||
336 | rcu_read_unlock(); | ||
337 | return (struct task_struct *)(-1UL); | ||
338 | case OOM_SCAN_OK: | ||
339 | break; | ||
340 | }; | ||
341 | points = oom_badness(p, NULL, oc->nodemask, totalpages); | ||
342 | if (!points || points < chosen_points) | ||
343 | continue; | ||
344 | 356 | ||
345 | chosen = p; | 357 | rcu_read_lock(); |
346 | chosen_points = points; | 358 | for_each_process(p) |
359 | if (oom_evaluate_task(p, oc)) | ||
360 | break; | ||
361 | rcu_read_unlock(); | ||
347 | } | 362 | } |
348 | if (chosen) | ||
349 | get_task_struct(chosen); | ||
350 | rcu_read_unlock(); | ||
351 | 363 | ||
352 | *ppoints = chosen_points * 1000 / totalpages; | 364 | oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages; |
353 | return chosen; | ||
354 | } | 365 | } |
355 | 366 | ||
356 | /** | 367 | /** |
@@ -419,7 +430,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p) | |||
419 | static atomic_t oom_victims = ATOMIC_INIT(0); | 430 | static atomic_t oom_victims = ATOMIC_INIT(0); |
420 | static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); | 431 | static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); |
421 | 432 | ||
422 | bool oom_killer_disabled __read_mostly; | 433 | static bool oom_killer_disabled __read_mostly; |
423 | 434 | ||
424 | #define K(x) ((x) << (PAGE_SHIFT-10)) | 435 | #define K(x) ((x) << (PAGE_SHIFT-10)) |
425 | 436 | ||
@@ -627,7 +638,7 @@ static int oom_reaper(void *unused) | |||
627 | return 0; | 638 | return 0; |
628 | } | 639 | } |
629 | 640 | ||
630 | void wake_oom_reaper(struct task_struct *tsk) | 641 | static void wake_oom_reaper(struct task_struct *tsk) |
631 | { | 642 | { |
632 | if (!oom_reaper_th) | 643 | if (!oom_reaper_th) |
633 | return; | 644 | return; |
@@ -656,7 +667,11 @@ static int __init oom_init(void) | |||
656 | return 0; | 667 | return 0; |
657 | } | 668 | } |
658 | subsys_initcall(oom_init) | 669 | subsys_initcall(oom_init) |
659 | #endif | 670 | #else |
671 | static inline void wake_oom_reaper(struct task_struct *tsk) | ||
672 | { | ||
673 | } | ||
674 | #endif /* CONFIG_MMU */ | ||
660 | 675 | ||
661 | /** | 676 | /** |
662 | * mark_oom_victim - mark the given task as OOM victim | 677 | * mark_oom_victim - mark the given task as OOM victim |
@@ -665,7 +680,7 @@ subsys_initcall(oom_init) | |||
665 | * Has to be called with oom_lock held and never after | 680 | * Has to be called with oom_lock held and never after |
666 | * oom has been disabled already. | 681 | * oom has been disabled already. |
667 | */ | 682 | */ |
668 | void mark_oom_victim(struct task_struct *tsk) | 683 | static void mark_oom_victim(struct task_struct *tsk) |
669 | { | 684 | { |
670 | WARN_ON(oom_killer_disabled); | 685 | WARN_ON(oom_killer_disabled); |
671 | /* OOM killer might race with memcg OOM */ | 686 | /* OOM killer might race with memcg OOM */ |
@@ -760,7 +775,7 @@ static inline bool __task_will_free_mem(struct task_struct *task) | |||
760 | * Caller has to make sure that task->mm is stable (hold task_lock or | 775 | * Caller has to make sure that task->mm is stable (hold task_lock or |
761 | * it operates on the current). | 776 | * it operates on the current). |
762 | */ | 777 | */ |
763 | bool task_will_free_mem(struct task_struct *task) | 778 | static bool task_will_free_mem(struct task_struct *task) |
764 | { | 779 | { |
765 | struct mm_struct *mm = task->mm; | 780 | struct mm_struct *mm = task->mm; |
766 | struct task_struct *p; | 781 | struct task_struct *p; |
@@ -806,14 +821,10 @@ bool task_will_free_mem(struct task_struct *task) | |||
806 | return ret; | 821 | return ret; |
807 | } | 822 | } |
808 | 823 | ||
809 | /* | 824 | static void oom_kill_process(struct oom_control *oc, const char *message) |
810 | * Must be called while holding a reference to p, which will be released upon | ||
811 | * returning. | ||
812 | */ | ||
813 | void oom_kill_process(struct oom_control *oc, struct task_struct *p, | ||
814 | unsigned int points, unsigned long totalpages, | ||
815 | const char *message) | ||
816 | { | 825 | { |
826 | struct task_struct *p = oc->chosen; | ||
827 | unsigned int points = oc->chosen_points; | ||
817 | struct task_struct *victim = p; | 828 | struct task_struct *victim = p; |
818 | struct task_struct *child; | 829 | struct task_struct *child; |
819 | struct task_struct *t; | 830 | struct task_struct *t; |
@@ -860,7 +871,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, | |||
860 | * oom_badness() returns 0 if the thread is unkillable | 871 | * oom_badness() returns 0 if the thread is unkillable |
861 | */ | 872 | */ |
862 | child_points = oom_badness(child, | 873 | child_points = oom_badness(child, |
863 | oc->memcg, oc->nodemask, totalpages); | 874 | oc->memcg, oc->nodemask, oc->totalpages); |
864 | if (child_points > victim_points) { | 875 | if (child_points > victim_points) { |
865 | put_task_struct(victim); | 876 | put_task_struct(victim); |
866 | victim = child; | 877 | victim = child; |
@@ -942,7 +953,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, | |||
942 | /* | 953 | /* |
943 | * Determines whether the kernel must panic because of the panic_on_oom sysctl. | 954 | * Determines whether the kernel must panic because of the panic_on_oom sysctl. |
944 | */ | 955 | */ |
945 | void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint) | 956 | static void check_panic_on_oom(struct oom_control *oc, |
957 | enum oom_constraint constraint) | ||
946 | { | 958 | { |
947 | if (likely(!sysctl_panic_on_oom)) | 959 | if (likely(!sysctl_panic_on_oom)) |
948 | return; | 960 | return; |
@@ -988,19 +1000,18 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier); | |||
988 | */ | 1000 | */ |
989 | bool out_of_memory(struct oom_control *oc) | 1001 | bool out_of_memory(struct oom_control *oc) |
990 | { | 1002 | { |
991 | struct task_struct *p; | ||
992 | unsigned long totalpages; | ||
993 | unsigned long freed = 0; | 1003 | unsigned long freed = 0; |
994 | unsigned int uninitialized_var(points); | ||
995 | enum oom_constraint constraint = CONSTRAINT_NONE; | 1004 | enum oom_constraint constraint = CONSTRAINT_NONE; |
996 | 1005 | ||
997 | if (oom_killer_disabled) | 1006 | if (oom_killer_disabled) |
998 | return false; | 1007 | return false; |
999 | 1008 | ||
1000 | blocking_notifier_call_chain(&oom_notify_list, 0, &freed); | 1009 | if (!is_memcg_oom(oc)) { |
1001 | if (freed > 0) | 1010 | blocking_notifier_call_chain(&oom_notify_list, 0, &freed); |
1002 | /* Got some memory back in the last second. */ | 1011 | if (freed > 0) |
1003 | return true; | 1012 | /* Got some memory back in the last second. */ |
1013 | return true; | ||
1014 | } | ||
1004 | 1015 | ||
1005 | /* | 1016 | /* |
1006 | * If current has a pending SIGKILL or is exiting, then automatically | 1017 | * If current has a pending SIGKILL or is exiting, then automatically |
@@ -1024,37 +1035,38 @@ bool out_of_memory(struct oom_control *oc) | |||
1024 | 1035 | ||
1025 | /* | 1036 | /* |
1026 | * Check if there were limitations on the allocation (only relevant for | 1037 | * Check if there were limitations on the allocation (only relevant for |
1027 | * NUMA) that may require different handling. | 1038 | * NUMA and memcg) that may require different handling. |
1028 | */ | 1039 | */ |
1029 | constraint = constrained_alloc(oc, &totalpages); | 1040 | constraint = constrained_alloc(oc); |
1030 | if (constraint != CONSTRAINT_MEMORY_POLICY) | 1041 | if (constraint != CONSTRAINT_MEMORY_POLICY) |
1031 | oc->nodemask = NULL; | 1042 | oc->nodemask = NULL; |
1032 | check_panic_on_oom(oc, constraint); | 1043 | check_panic_on_oom(oc, constraint); |
1033 | 1044 | ||
1034 | if (sysctl_oom_kill_allocating_task && current->mm && | 1045 | if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task && |
1035 | !oom_unkillable_task(current, NULL, oc->nodemask) && | 1046 | current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) && |
1036 | current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { | 1047 | current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { |
1037 | get_task_struct(current); | 1048 | get_task_struct(current); |
1038 | oom_kill_process(oc, current, 0, totalpages, | 1049 | oc->chosen = current; |
1039 | "Out of memory (oom_kill_allocating_task)"); | 1050 | oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)"); |
1040 | return true; | 1051 | return true; |
1041 | } | 1052 | } |
1042 | 1053 | ||
1043 | p = select_bad_process(oc, &points, totalpages); | 1054 | select_bad_process(oc); |
1044 | /* Found nothing?!?! Either we hang forever, or we panic. */ | 1055 | /* Found nothing?!?! Either we hang forever, or we panic. */ |
1045 | if (!p && !is_sysrq_oom(oc)) { | 1056 | if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) { |
1046 | dump_header(oc, NULL); | 1057 | dump_header(oc, NULL); |
1047 | panic("Out of memory and no killable processes...\n"); | 1058 | panic("Out of memory and no killable processes...\n"); |
1048 | } | 1059 | } |
1049 | if (p && p != (void *)-1UL) { | 1060 | if (oc->chosen && oc->chosen != (void *)-1UL) { |
1050 | oom_kill_process(oc, p, points, totalpages, "Out of memory"); | 1061 | oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" : |
1062 | "Memory cgroup out of memory"); | ||
1051 | /* | 1063 | /* |
1052 | * Give the killed process a good chance to exit before trying | 1064 | * Give the killed process a good chance to exit before trying |
1053 | * to allocate memory again. | 1065 | * to allocate memory again. |
1054 | */ | 1066 | */ |
1055 | schedule_timeout_killable(1); | 1067 | schedule_timeout_killable(1); |
1056 | } | 1068 | } |
1057 | return true; | 1069 | return !!oc->chosen; |
1058 | } | 1070 | } |
1059 | 1071 | ||
1060 | /* | 1072 | /* |