diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2006-09-26 02:31:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-26 11:48:47 -0400 |
commit | 8bc719d3cab8414938f9ea6e33b58d8810d18068 (patch) | |
tree | 1afd4ce7865466bf9578ca746c63c1d351f07cdc | |
parent | 19655d3487001d7df0e10e9cbfc27c758b77c2b5 (diff) |
[PATCH] out of memory notifier
Add a notifer chain to the out of memory killer. If one of the registered
callbacks could release some memory, do not kill the process but return and
retry the allocation that forced the oom killer to run.
The purpose of the notifier is to add a safety net in the presence of
memory ballooners. If the resource manager inflated the balloon to a size
where memory allocations can not be satisfied anymore, it is better to
deflate the balloon a bit instead of killing processes.
The implementation for the s390 ballooner is included.
[akpm@osdl.org: cleanups]
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/s390/mm/cmm.c | 155 | ||||
-rw-r--r-- | include/linux/swap.h | 4 | ||||
-rw-r--r-- | mm/oom_kill.c | 22 |
3 files changed, 121 insertions, 60 deletions
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 786a44dba5bf..f2e00df99bae 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/sysctl.h> | 16 | #include <linux/sysctl.h> |
17 | #include <linux/ctype.h> | 17 | #include <linux/ctype.h> |
18 | #include <linux/swap.h> | ||
18 | 19 | ||
19 | #include <asm/pgalloc.h> | 20 | #include <asm/pgalloc.h> |
20 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
@@ -34,17 +35,18 @@ struct cmm_page_array { | |||
34 | unsigned long pages[CMM_NR_PAGES]; | 35 | unsigned long pages[CMM_NR_PAGES]; |
35 | }; | 36 | }; |
36 | 37 | ||
37 | static long cmm_pages = 0; | 38 | static long cmm_pages; |
38 | static long cmm_timed_pages = 0; | 39 | static long cmm_timed_pages; |
39 | static volatile long cmm_pages_target = 0; | 40 | static volatile long cmm_pages_target; |
40 | static volatile long cmm_timed_pages_target = 0; | 41 | static volatile long cmm_timed_pages_target; |
41 | static long cmm_timeout_pages = 0; | 42 | static long cmm_timeout_pages; |
42 | static long cmm_timeout_seconds = 0; | 43 | static long cmm_timeout_seconds; |
43 | 44 | ||
44 | static struct cmm_page_array *cmm_page_list = NULL; | 45 | static struct cmm_page_array *cmm_page_list; |
45 | static struct cmm_page_array *cmm_timed_page_list = NULL; | 46 | static struct cmm_page_array *cmm_timed_page_list; |
47 | static DEFINE_SPINLOCK(cmm_lock); | ||
46 | 48 | ||
47 | static unsigned long cmm_thread_active = 0; | 49 | static unsigned long cmm_thread_active; |
48 | static struct work_struct cmm_thread_starter; | 50 | static struct work_struct cmm_thread_starter; |
49 | static wait_queue_head_t cmm_thread_wait; | 51 | static wait_queue_head_t cmm_thread_wait; |
50 | static struct timer_list cmm_timer; | 52 | static struct timer_list cmm_timer; |
@@ -53,58 +55,89 @@ static void cmm_timer_fn(unsigned long); | |||
53 | static void cmm_set_timer(void); | 55 | static void cmm_set_timer(void); |
54 | 56 | ||
55 | static long | 57 | static long |
56 | cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list) | 58 | cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list) |
57 | { | 59 | { |
58 | struct cmm_page_array *pa; | 60 | struct cmm_page_array *pa, *npa; |
59 | unsigned long page; | 61 | unsigned long addr; |
60 | 62 | ||
61 | pa = *list; | 63 | while (nr) { |
62 | while (pages) { | 64 | addr = __get_free_page(GFP_NOIO); |
63 | page = __get_free_page(GFP_NOIO); | 65 | if (!addr) |
64 | if (!page) | ||
65 | break; | 66 | break; |
67 | spin_lock(&cmm_lock); | ||
68 | pa = *list; | ||
66 | if (!pa || pa->index >= CMM_NR_PAGES) { | 69 | if (!pa || pa->index >= CMM_NR_PAGES) { |
67 | /* Need a new page for the page list. */ | 70 | /* Need a new page for the page list. */ |
68 | pa = (struct cmm_page_array *) | 71 | spin_unlock(&cmm_lock); |
72 | npa = (struct cmm_page_array *) | ||
69 | __get_free_page(GFP_NOIO); | 73 | __get_free_page(GFP_NOIO); |
70 | if (!pa) { | 74 | if (!npa) { |
71 | free_page(page); | 75 | free_page(addr); |
72 | break; | 76 | break; |
73 | } | 77 | } |
74 | pa->next = *list; | 78 | spin_lock(&cmm_lock); |
75 | pa->index = 0; | 79 | pa = *list; |
76 | *list = pa; | 80 | if (!pa || pa->index >= CMM_NR_PAGES) { |
81 | npa->next = pa; | ||
82 | npa->index = 0; | ||
83 | pa = npa; | ||
84 | *list = pa; | ||
85 | } else | ||
86 | free_page((unsigned long) npa); | ||
77 | } | 87 | } |
78 | diag10(page); | 88 | diag10(addr); |
79 | pa->pages[pa->index++] = page; | 89 | pa->pages[pa->index++] = addr; |
80 | (*counter)++; | 90 | (*counter)++; |
81 | pages--; | 91 | spin_unlock(&cmm_lock); |
92 | nr--; | ||
82 | } | 93 | } |
83 | return pages; | 94 | return nr; |
84 | } | 95 | } |
85 | 96 | ||
86 | static void | 97 | static long |
87 | cmm_free_pages(long pages, long *counter, struct cmm_page_array **list) | 98 | cmm_free_pages(long nr, long *counter, struct cmm_page_array **list) |
88 | { | 99 | { |
89 | struct cmm_page_array *pa; | 100 | struct cmm_page_array *pa; |
90 | unsigned long page; | 101 | unsigned long addr; |
91 | 102 | ||
103 | spin_lock(&cmm_lock); | ||
92 | pa = *list; | 104 | pa = *list; |
93 | while (pages) { | 105 | while (nr) { |
94 | if (!pa || pa->index <= 0) | 106 | if (!pa || pa->index <= 0) |
95 | break; | 107 | break; |
96 | page = pa->pages[--pa->index]; | 108 | addr = pa->pages[--pa->index]; |
97 | if (pa->index == 0) { | 109 | if (pa->index == 0) { |
98 | pa = pa->next; | 110 | pa = pa->next; |
99 | free_page((unsigned long) *list); | 111 | free_page((unsigned long) *list); |
100 | *list = pa; | 112 | *list = pa; |
101 | } | 113 | } |
102 | free_page(page); | 114 | free_page(addr); |
103 | (*counter)--; | 115 | (*counter)--; |
104 | pages--; | 116 | nr--; |
105 | } | 117 | } |
118 | spin_unlock(&cmm_lock); | ||
119 | return nr; | ||
106 | } | 120 | } |
107 | 121 | ||
122 | static int cmm_oom_notify(struct notifier_block *self, | ||
123 | unsigned long dummy, void *parm) | ||
124 | { | ||
125 | unsigned long *freed = parm; | ||
126 | long nr = 256; | ||
127 | |||
128 | nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list); | ||
129 | if (nr > 0) | ||
130 | nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list); | ||
131 | cmm_pages_target = cmm_pages; | ||
132 | cmm_timed_pages_target = cmm_timed_pages; | ||
133 | *freed += 256 - nr; | ||
134 | return NOTIFY_OK; | ||
135 | } | ||
136 | |||
137 | static struct notifier_block cmm_oom_nb = { | ||
138 | .notifier_call = cmm_oom_notify | ||
139 | }; | ||
140 | |||
108 | static int | 141 | static int |
109 | cmm_thread(void *dummy) | 142 | cmm_thread(void *dummy) |
110 | { | 143 | { |
@@ -177,21 +210,21 @@ cmm_set_timer(void) | |||
177 | static void | 210 | static void |
178 | cmm_timer_fn(unsigned long ignored) | 211 | cmm_timer_fn(unsigned long ignored) |
179 | { | 212 | { |
180 | long pages; | 213 | long nr; |
181 | 214 | ||
182 | pages = cmm_timed_pages_target - cmm_timeout_pages; | 215 | nr = cmm_timed_pages_target - cmm_timeout_pages; |
183 | if (pages < 0) | 216 | if (nr < 0) |
184 | cmm_timed_pages_target = 0; | 217 | cmm_timed_pages_target = 0; |
185 | else | 218 | else |
186 | cmm_timed_pages_target = pages; | 219 | cmm_timed_pages_target = nr; |
187 | cmm_kick_thread(); | 220 | cmm_kick_thread(); |
188 | cmm_set_timer(); | 221 | cmm_set_timer(); |
189 | } | 222 | } |
190 | 223 | ||
191 | void | 224 | void |
192 | cmm_set_pages(long pages) | 225 | cmm_set_pages(long nr) |
193 | { | 226 | { |
194 | cmm_pages_target = pages; | 227 | cmm_pages_target = nr; |
195 | cmm_kick_thread(); | 228 | cmm_kick_thread(); |
196 | } | 229 | } |
197 | 230 | ||
@@ -202,9 +235,9 @@ cmm_get_pages(void) | |||
202 | } | 235 | } |
203 | 236 | ||
204 | void | 237 | void |
205 | cmm_add_timed_pages(long pages) | 238 | cmm_add_timed_pages(long nr) |
206 | { | 239 | { |
207 | cmm_timed_pages_target += pages; | 240 | cmm_timed_pages_target += nr; |
208 | cmm_kick_thread(); | 241 | cmm_kick_thread(); |
209 | } | 242 | } |
210 | 243 | ||
@@ -215,9 +248,9 @@ cmm_get_timed_pages(void) | |||
215 | } | 248 | } |
216 | 249 | ||
217 | void | 250 | void |
218 | cmm_set_timeout(long pages, long seconds) | 251 | cmm_set_timeout(long nr, long seconds) |
219 | { | 252 | { |
220 | cmm_timeout_pages = pages; | 253 | cmm_timeout_pages = nr; |
221 | cmm_timeout_seconds = seconds; | 254 | cmm_timeout_seconds = seconds; |
222 | cmm_set_timer(); | 255 | cmm_set_timer(); |
223 | } | 256 | } |
@@ -245,7 +278,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, | |||
245 | void __user *buffer, size_t *lenp, loff_t *ppos) | 278 | void __user *buffer, size_t *lenp, loff_t *ppos) |
246 | { | 279 | { |
247 | char buf[16], *p; | 280 | char buf[16], *p; |
248 | long pages; | 281 | long nr; |
249 | int len; | 282 | int len; |
250 | 283 | ||
251 | if (!*lenp || (*ppos && !write)) { | 284 | if (!*lenp || (*ppos && !write)) { |
@@ -260,17 +293,17 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, | |||
260 | return -EFAULT; | 293 | return -EFAULT; |
261 | buf[sizeof(buf) - 1] = '\0'; | 294 | buf[sizeof(buf) - 1] = '\0'; |
262 | cmm_skip_blanks(buf, &p); | 295 | cmm_skip_blanks(buf, &p); |
263 | pages = simple_strtoul(p, &p, 0); | 296 | nr = simple_strtoul(p, &p, 0); |
264 | if (ctl == &cmm_table[0]) | 297 | if (ctl == &cmm_table[0]) |
265 | cmm_set_pages(pages); | 298 | cmm_set_pages(nr); |
266 | else | 299 | else |
267 | cmm_add_timed_pages(pages); | 300 | cmm_add_timed_pages(nr); |
268 | } else { | 301 | } else { |
269 | if (ctl == &cmm_table[0]) | 302 | if (ctl == &cmm_table[0]) |
270 | pages = cmm_get_pages(); | 303 | nr = cmm_get_pages(); |
271 | else | 304 | else |
272 | pages = cmm_get_timed_pages(); | 305 | nr = cmm_get_timed_pages(); |
273 | len = sprintf(buf, "%ld\n", pages); | 306 | len = sprintf(buf, "%ld\n", nr); |
274 | if (len > *lenp) | 307 | if (len > *lenp) |
275 | len = *lenp; | 308 | len = *lenp; |
276 | if (copy_to_user(buffer, buf, len)) | 309 | if (copy_to_user(buffer, buf, len)) |
@@ -286,7 +319,7 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, | |||
286 | void __user *buffer, size_t *lenp, loff_t *ppos) | 319 | void __user *buffer, size_t *lenp, loff_t *ppos) |
287 | { | 320 | { |
288 | char buf[64], *p; | 321 | char buf[64], *p; |
289 | long pages, seconds; | 322 | long nr, seconds; |
290 | int len; | 323 | int len; |
291 | 324 | ||
292 | if (!*lenp || (*ppos && !write)) { | 325 | if (!*lenp || (*ppos && !write)) { |
@@ -301,10 +334,10 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, | |||
301 | return -EFAULT; | 334 | return -EFAULT; |
302 | buf[sizeof(buf) - 1] = '\0'; | 335 | buf[sizeof(buf) - 1] = '\0'; |
303 | cmm_skip_blanks(buf, &p); | 336 | cmm_skip_blanks(buf, &p); |
304 | pages = simple_strtoul(p, &p, 0); | 337 | nr = simple_strtoul(p, &p, 0); |
305 | cmm_skip_blanks(p, &p); | 338 | cmm_skip_blanks(p, &p); |
306 | seconds = simple_strtoul(p, &p, 0); | 339 | seconds = simple_strtoul(p, &p, 0); |
307 | cmm_set_timeout(pages, seconds); | 340 | cmm_set_timeout(nr, seconds); |
308 | } else { | 341 | } else { |
309 | len = sprintf(buf, "%ld %ld\n", | 342 | len = sprintf(buf, "%ld %ld\n", |
310 | cmm_timeout_pages, cmm_timeout_seconds); | 343 | cmm_timeout_pages, cmm_timeout_seconds); |
@@ -357,7 +390,7 @@ static struct ctl_table cmm_dir_table[] = { | |||
357 | static void | 390 | static void |
358 | cmm_smsg_target(char *from, char *msg) | 391 | cmm_smsg_target(char *from, char *msg) |
359 | { | 392 | { |
360 | long pages, seconds; | 393 | long nr, seconds; |
361 | 394 | ||
362 | if (strlen(sender) > 0 && strcmp(from, sender) != 0) | 395 | if (strlen(sender) > 0 && strcmp(from, sender) != 0) |
363 | return; | 396 | return; |
@@ -366,27 +399,27 @@ cmm_smsg_target(char *from, char *msg) | |||
366 | if (strncmp(msg, "SHRINK", 6) == 0) { | 399 | if (strncmp(msg, "SHRINK", 6) == 0) { |
367 | if (!cmm_skip_blanks(msg + 6, &msg)) | 400 | if (!cmm_skip_blanks(msg + 6, &msg)) |
368 | return; | 401 | return; |
369 | pages = simple_strtoul(msg, &msg, 0); | 402 | nr = simple_strtoul(msg, &msg, 0); |
370 | cmm_skip_blanks(msg, &msg); | 403 | cmm_skip_blanks(msg, &msg); |
371 | if (*msg == '\0') | 404 | if (*msg == '\0') |
372 | cmm_set_pages(pages); | 405 | cmm_set_pages(nr); |
373 | } else if (strncmp(msg, "RELEASE", 7) == 0) { | 406 | } else if (strncmp(msg, "RELEASE", 7) == 0) { |
374 | if (!cmm_skip_blanks(msg + 7, &msg)) | 407 | if (!cmm_skip_blanks(msg + 7, &msg)) |
375 | return; | 408 | return; |
376 | pages = simple_strtoul(msg, &msg, 0); | 409 | nr = simple_strtoul(msg, &msg, 0); |
377 | cmm_skip_blanks(msg, &msg); | 410 | cmm_skip_blanks(msg, &msg); |
378 | if (*msg == '\0') | 411 | if (*msg == '\0') |
379 | cmm_add_timed_pages(pages); | 412 | cmm_add_timed_pages(nr); |
380 | } else if (strncmp(msg, "REUSE", 5) == 0) { | 413 | } else if (strncmp(msg, "REUSE", 5) == 0) { |
381 | if (!cmm_skip_blanks(msg + 5, &msg)) | 414 | if (!cmm_skip_blanks(msg + 5, &msg)) |
382 | return; | 415 | return; |
383 | pages = simple_strtoul(msg, &msg, 0); | 416 | nr = simple_strtoul(msg, &msg, 0); |
384 | if (!cmm_skip_blanks(msg, &msg)) | 417 | if (!cmm_skip_blanks(msg, &msg)) |
385 | return; | 418 | return; |
386 | seconds = simple_strtoul(msg, &msg, 0); | 419 | seconds = simple_strtoul(msg, &msg, 0); |
387 | cmm_skip_blanks(msg, &msg); | 420 | cmm_skip_blanks(msg, &msg); |
388 | if (*msg == '\0') | 421 | if (*msg == '\0') |
389 | cmm_set_timeout(pages, seconds); | 422 | cmm_set_timeout(nr, seconds); |
390 | } | 423 | } |
391 | } | 424 | } |
392 | #endif | 425 | #endif |
@@ -402,6 +435,7 @@ cmm_init (void) | |||
402 | #ifdef CONFIG_CMM_IUCV | 435 | #ifdef CONFIG_CMM_IUCV |
403 | smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); | 436 | smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); |
404 | #endif | 437 | #endif |
438 | register_oom_notifier(&cmm_oom_nb); | ||
405 | INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, NULL); | 439 | INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, NULL); |
406 | init_waitqueue_head(&cmm_thread_wait); | 440 | init_waitqueue_head(&cmm_thread_wait); |
407 | init_timer(&cmm_timer); | 441 | init_timer(&cmm_timer); |
@@ -411,6 +445,7 @@ cmm_init (void) | |||
411 | static void | 445 | static void |
412 | cmm_exit(void) | 446 | cmm_exit(void) |
413 | { | 447 | { |
448 | unregister_oom_notifier(&cmm_oom_nb); | ||
414 | cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); | 449 | cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); |
415 | cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); | 450 | cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); |
416 | #ifdef CONFIG_CMM_PROC | 451 | #ifdef CONFIG_CMM_PROC |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 34a6bc3e6cf3..32db06c8ffe0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -10,6 +10,8 @@ | |||
10 | #include <asm/atomic.h> | 10 | #include <asm/atomic.h> |
11 | #include <asm/page.h> | 11 | #include <asm/page.h> |
12 | 12 | ||
13 | struct notifier_block; | ||
14 | |||
13 | #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ | 15 | #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ |
14 | #define SWAP_FLAG_PRIO_MASK 0x7fff | 16 | #define SWAP_FLAG_PRIO_MASK 0x7fff |
15 | #define SWAP_FLAG_PRIO_SHIFT 0 | 17 | #define SWAP_FLAG_PRIO_SHIFT 0 |
@@ -156,6 +158,8 @@ struct swap_list_t { | |||
156 | 158 | ||
157 | /* linux/mm/oom_kill.c */ | 159 | /* linux/mm/oom_kill.c */ |
158 | extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); | 160 | extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); |
161 | extern int register_oom_notifier(struct notifier_block *nb); | ||
162 | extern int unregister_oom_notifier(struct notifier_block *nb); | ||
159 | 163 | ||
160 | /* linux/mm/memory.c */ | 164 | /* linux/mm/memory.c */ |
161 | extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); | 165 | extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b9af136e5cfa..7d056843fa2d 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include <linux/timex.h> | 21 | #include <linux/timex.h> |
22 | #include <linux/jiffies.h> | 22 | #include <linux/jiffies.h> |
23 | #include <linux/cpuset.h> | 23 | #include <linux/cpuset.h> |
24 | #include <linux/module.h> | ||
25 | #include <linux/notifier.h> | ||
24 | 26 | ||
25 | int sysctl_panic_on_oom; | 27 | int sysctl_panic_on_oom; |
26 | /* #define DEBUG */ | 28 | /* #define DEBUG */ |
@@ -306,6 +308,20 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, | |||
306 | return oom_kill_task(p, message); | 308 | return oom_kill_task(p, message); |
307 | } | 309 | } |
308 | 310 | ||
311 | static BLOCKING_NOTIFIER_HEAD(oom_notify_list); | ||
312 | |||
313 | int register_oom_notifier(struct notifier_block *nb) | ||
314 | { | ||
315 | return blocking_notifier_chain_register(&oom_notify_list, nb); | ||
316 | } | ||
317 | EXPORT_SYMBOL_GPL(register_oom_notifier); | ||
318 | |||
319 | int unregister_oom_notifier(struct notifier_block *nb) | ||
320 | { | ||
321 | return blocking_notifier_chain_unregister(&oom_notify_list, nb); | ||
322 | } | ||
323 | EXPORT_SYMBOL_GPL(unregister_oom_notifier); | ||
324 | |||
309 | /** | 325 | /** |
310 | * out_of_memory - kill the "best" process when we run out of memory | 326 | * out_of_memory - kill the "best" process when we run out of memory |
311 | * | 327 | * |
@@ -318,6 +334,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | |||
318 | { | 334 | { |
319 | struct task_struct *p; | 335 | struct task_struct *p; |
320 | unsigned long points = 0; | 336 | unsigned long points = 0; |
337 | unsigned long freed = 0; | ||
338 | |||
339 | blocking_notifier_call_chain(&oom_notify_list, 0, &freed); | ||
340 | if (freed > 0) | ||
341 | /* Got some memory back in the last second. */ | ||
342 | return; | ||
321 | 343 | ||
322 | if (printk_ratelimit()) { | 344 | if (printk_ratelimit()) { |
323 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", | 345 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", |