aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2006-09-26 02:31:20 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-26 11:48:47 -0400
commit8bc719d3cab8414938f9ea6e33b58d8810d18068 (patch)
tree1afd4ce7865466bf9578ca746c63c1d351f07cdc
parent19655d3487001d7df0e10e9cbfc27c758b77c2b5 (diff)
[PATCH] out of memory notifier
Add a notifer chain to the out of memory killer. If one of the registered callbacks could release some memory, do not kill the process but return and retry the allocation that forced the oom killer to run. The purpose of the notifier is to add a safety net in the presence of memory ballooners. If the resource manager inflated the balloon to a size where memory allocations can not be satisfied anymore, it is better to deflate the balloon a bit instead of killing processes. The implementation for the s390 ballooner is included. [akpm@osdl.org: cleanups] Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/s390/mm/cmm.c155
-rw-r--r--include/linux/swap.h4
-rw-r--r--mm/oom_kill.c22
3 files changed, 121 insertions, 60 deletions
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 786a44dba5bf..f2e00df99bae 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -15,6 +15,7 @@
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/sysctl.h> 16#include <linux/sysctl.h>
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/swap.h>
18 19
19#include <asm/pgalloc.h> 20#include <asm/pgalloc.h>
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
@@ -34,17 +35,18 @@ struct cmm_page_array {
34 unsigned long pages[CMM_NR_PAGES]; 35 unsigned long pages[CMM_NR_PAGES];
35}; 36};
36 37
37static long cmm_pages = 0; 38static long cmm_pages;
38static long cmm_timed_pages = 0; 39static long cmm_timed_pages;
39static volatile long cmm_pages_target = 0; 40static volatile long cmm_pages_target;
40static volatile long cmm_timed_pages_target = 0; 41static volatile long cmm_timed_pages_target;
41static long cmm_timeout_pages = 0; 42static long cmm_timeout_pages;
42static long cmm_timeout_seconds = 0; 43static long cmm_timeout_seconds;
43 44
44static struct cmm_page_array *cmm_page_list = NULL; 45static struct cmm_page_array *cmm_page_list;
45static struct cmm_page_array *cmm_timed_page_list = NULL; 46static struct cmm_page_array *cmm_timed_page_list;
47static DEFINE_SPINLOCK(cmm_lock);
46 48
47static unsigned long cmm_thread_active = 0; 49static unsigned long cmm_thread_active;
48static struct work_struct cmm_thread_starter; 50static struct work_struct cmm_thread_starter;
49static wait_queue_head_t cmm_thread_wait; 51static wait_queue_head_t cmm_thread_wait;
50static struct timer_list cmm_timer; 52static struct timer_list cmm_timer;
@@ -53,58 +55,89 @@ static void cmm_timer_fn(unsigned long);
53static void cmm_set_timer(void); 55static void cmm_set_timer(void);
54 56
55static long 57static long
56cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list) 58cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list)
57{ 59{
58 struct cmm_page_array *pa; 60 struct cmm_page_array *pa, *npa;
59 unsigned long page; 61 unsigned long addr;
60 62
61 pa = *list; 63 while (nr) {
62 while (pages) { 64 addr = __get_free_page(GFP_NOIO);
63 page = __get_free_page(GFP_NOIO); 65 if (!addr)
64 if (!page)
65 break; 66 break;
67 spin_lock(&cmm_lock);
68 pa = *list;
66 if (!pa || pa->index >= CMM_NR_PAGES) { 69 if (!pa || pa->index >= CMM_NR_PAGES) {
67 /* Need a new page for the page list. */ 70 /* Need a new page for the page list. */
68 pa = (struct cmm_page_array *) 71 spin_unlock(&cmm_lock);
72 npa = (struct cmm_page_array *)
69 __get_free_page(GFP_NOIO); 73 __get_free_page(GFP_NOIO);
70 if (!pa) { 74 if (!npa) {
71 free_page(page); 75 free_page(addr);
72 break; 76 break;
73 } 77 }
74 pa->next = *list; 78 spin_lock(&cmm_lock);
75 pa->index = 0; 79 pa = *list;
76 *list = pa; 80 if (!pa || pa->index >= CMM_NR_PAGES) {
81 npa->next = pa;
82 npa->index = 0;
83 pa = npa;
84 *list = pa;
85 } else
86 free_page((unsigned long) npa);
77 } 87 }
78 diag10(page); 88 diag10(addr);
79 pa->pages[pa->index++] = page; 89 pa->pages[pa->index++] = addr;
80 (*counter)++; 90 (*counter)++;
81 pages--; 91 spin_unlock(&cmm_lock);
92 nr--;
82 } 93 }
83 return pages; 94 return nr;
84} 95}
85 96
86static void 97static long
87cmm_free_pages(long pages, long *counter, struct cmm_page_array **list) 98cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
88{ 99{
89 struct cmm_page_array *pa; 100 struct cmm_page_array *pa;
90 unsigned long page; 101 unsigned long addr;
91 102
103 spin_lock(&cmm_lock);
92 pa = *list; 104 pa = *list;
93 while (pages) { 105 while (nr) {
94 if (!pa || pa->index <= 0) 106 if (!pa || pa->index <= 0)
95 break; 107 break;
96 page = pa->pages[--pa->index]; 108 addr = pa->pages[--pa->index];
97 if (pa->index == 0) { 109 if (pa->index == 0) {
98 pa = pa->next; 110 pa = pa->next;
99 free_page((unsigned long) *list); 111 free_page((unsigned long) *list);
100 *list = pa; 112 *list = pa;
101 } 113 }
102 free_page(page); 114 free_page(addr);
103 (*counter)--; 115 (*counter)--;
104 pages--; 116 nr--;
105 } 117 }
118 spin_unlock(&cmm_lock);
119 return nr;
106} 120}
107 121
122static int cmm_oom_notify(struct notifier_block *self,
123 unsigned long dummy, void *parm)
124{
125 unsigned long *freed = parm;
126 long nr = 256;
127
128 nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list);
129 if (nr > 0)
130 nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list);
131 cmm_pages_target = cmm_pages;
132 cmm_timed_pages_target = cmm_timed_pages;
133 *freed += 256 - nr;
134 return NOTIFY_OK;
135}
136
137static struct notifier_block cmm_oom_nb = {
138 .notifier_call = cmm_oom_notify
139};
140
108static int 141static int
109cmm_thread(void *dummy) 142cmm_thread(void *dummy)
110{ 143{
@@ -177,21 +210,21 @@ cmm_set_timer(void)
177static void 210static void
178cmm_timer_fn(unsigned long ignored) 211cmm_timer_fn(unsigned long ignored)
179{ 212{
180 long pages; 213 long nr;
181 214
182 pages = cmm_timed_pages_target - cmm_timeout_pages; 215 nr = cmm_timed_pages_target - cmm_timeout_pages;
183 if (pages < 0) 216 if (nr < 0)
184 cmm_timed_pages_target = 0; 217 cmm_timed_pages_target = 0;
185 else 218 else
186 cmm_timed_pages_target = pages; 219 cmm_timed_pages_target = nr;
187 cmm_kick_thread(); 220 cmm_kick_thread();
188 cmm_set_timer(); 221 cmm_set_timer();
189} 222}
190 223
191void 224void
192cmm_set_pages(long pages) 225cmm_set_pages(long nr)
193{ 226{
194 cmm_pages_target = pages; 227 cmm_pages_target = nr;
195 cmm_kick_thread(); 228 cmm_kick_thread();
196} 229}
197 230
@@ -202,9 +235,9 @@ cmm_get_pages(void)
202} 235}
203 236
204void 237void
205cmm_add_timed_pages(long pages) 238cmm_add_timed_pages(long nr)
206{ 239{
207 cmm_timed_pages_target += pages; 240 cmm_timed_pages_target += nr;
208 cmm_kick_thread(); 241 cmm_kick_thread();
209} 242}
210 243
@@ -215,9 +248,9 @@ cmm_get_timed_pages(void)
215} 248}
216 249
217void 250void
218cmm_set_timeout(long pages, long seconds) 251cmm_set_timeout(long nr, long seconds)
219{ 252{
220 cmm_timeout_pages = pages; 253 cmm_timeout_pages = nr;
221 cmm_timeout_seconds = seconds; 254 cmm_timeout_seconds = seconds;
222 cmm_set_timer(); 255 cmm_set_timer();
223} 256}
@@ -245,7 +278,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
245 void __user *buffer, size_t *lenp, loff_t *ppos) 278 void __user *buffer, size_t *lenp, loff_t *ppos)
246{ 279{
247 char buf[16], *p; 280 char buf[16], *p;
248 long pages; 281 long nr;
249 int len; 282 int len;
250 283
251 if (!*lenp || (*ppos && !write)) { 284 if (!*lenp || (*ppos && !write)) {
@@ -260,17 +293,17 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
260 return -EFAULT; 293 return -EFAULT;
261 buf[sizeof(buf) - 1] = '\0'; 294 buf[sizeof(buf) - 1] = '\0';
262 cmm_skip_blanks(buf, &p); 295 cmm_skip_blanks(buf, &p);
263 pages = simple_strtoul(p, &p, 0); 296 nr = simple_strtoul(p, &p, 0);
264 if (ctl == &cmm_table[0]) 297 if (ctl == &cmm_table[0])
265 cmm_set_pages(pages); 298 cmm_set_pages(nr);
266 else 299 else
267 cmm_add_timed_pages(pages); 300 cmm_add_timed_pages(nr);
268 } else { 301 } else {
269 if (ctl == &cmm_table[0]) 302 if (ctl == &cmm_table[0])
270 pages = cmm_get_pages(); 303 nr = cmm_get_pages();
271 else 304 else
272 pages = cmm_get_timed_pages(); 305 nr = cmm_get_timed_pages();
273 len = sprintf(buf, "%ld\n", pages); 306 len = sprintf(buf, "%ld\n", nr);
274 if (len > *lenp) 307 if (len > *lenp)
275 len = *lenp; 308 len = *lenp;
276 if (copy_to_user(buffer, buf, len)) 309 if (copy_to_user(buffer, buf, len))
@@ -286,7 +319,7 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
286 void __user *buffer, size_t *lenp, loff_t *ppos) 319 void __user *buffer, size_t *lenp, loff_t *ppos)
287{ 320{
288 char buf[64], *p; 321 char buf[64], *p;
289 long pages, seconds; 322 long nr, seconds;
290 int len; 323 int len;
291 324
292 if (!*lenp || (*ppos && !write)) { 325 if (!*lenp || (*ppos && !write)) {
@@ -301,10 +334,10 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
301 return -EFAULT; 334 return -EFAULT;
302 buf[sizeof(buf) - 1] = '\0'; 335 buf[sizeof(buf) - 1] = '\0';
303 cmm_skip_blanks(buf, &p); 336 cmm_skip_blanks(buf, &p);
304 pages = simple_strtoul(p, &p, 0); 337 nr = simple_strtoul(p, &p, 0);
305 cmm_skip_blanks(p, &p); 338 cmm_skip_blanks(p, &p);
306 seconds = simple_strtoul(p, &p, 0); 339 seconds = simple_strtoul(p, &p, 0);
307 cmm_set_timeout(pages, seconds); 340 cmm_set_timeout(nr, seconds);
308 } else { 341 } else {
309 len = sprintf(buf, "%ld %ld\n", 342 len = sprintf(buf, "%ld %ld\n",
310 cmm_timeout_pages, cmm_timeout_seconds); 343 cmm_timeout_pages, cmm_timeout_seconds);
@@ -357,7 +390,7 @@ static struct ctl_table cmm_dir_table[] = {
357static void 390static void
358cmm_smsg_target(char *from, char *msg) 391cmm_smsg_target(char *from, char *msg)
359{ 392{
360 long pages, seconds; 393 long nr, seconds;
361 394
362 if (strlen(sender) > 0 && strcmp(from, sender) != 0) 395 if (strlen(sender) > 0 && strcmp(from, sender) != 0)
363 return; 396 return;
@@ -366,27 +399,27 @@ cmm_smsg_target(char *from, char *msg)
366 if (strncmp(msg, "SHRINK", 6) == 0) { 399 if (strncmp(msg, "SHRINK", 6) == 0) {
367 if (!cmm_skip_blanks(msg + 6, &msg)) 400 if (!cmm_skip_blanks(msg + 6, &msg))
368 return; 401 return;
369 pages = simple_strtoul(msg, &msg, 0); 402 nr = simple_strtoul(msg, &msg, 0);
370 cmm_skip_blanks(msg, &msg); 403 cmm_skip_blanks(msg, &msg);
371 if (*msg == '\0') 404 if (*msg == '\0')
372 cmm_set_pages(pages); 405 cmm_set_pages(nr);
373 } else if (strncmp(msg, "RELEASE", 7) == 0) { 406 } else if (strncmp(msg, "RELEASE", 7) == 0) {
374 if (!cmm_skip_blanks(msg + 7, &msg)) 407 if (!cmm_skip_blanks(msg + 7, &msg))
375 return; 408 return;
376 pages = simple_strtoul(msg, &msg, 0); 409 nr = simple_strtoul(msg, &msg, 0);
377 cmm_skip_blanks(msg, &msg); 410 cmm_skip_blanks(msg, &msg);
378 if (*msg == '\0') 411 if (*msg == '\0')
379 cmm_add_timed_pages(pages); 412 cmm_add_timed_pages(nr);
380 } else if (strncmp(msg, "REUSE", 5) == 0) { 413 } else if (strncmp(msg, "REUSE", 5) == 0) {
381 if (!cmm_skip_blanks(msg + 5, &msg)) 414 if (!cmm_skip_blanks(msg + 5, &msg))
382 return; 415 return;
383 pages = simple_strtoul(msg, &msg, 0); 416 nr = simple_strtoul(msg, &msg, 0);
384 if (!cmm_skip_blanks(msg, &msg)) 417 if (!cmm_skip_blanks(msg, &msg))
385 return; 418 return;
386 seconds = simple_strtoul(msg, &msg, 0); 419 seconds = simple_strtoul(msg, &msg, 0);
387 cmm_skip_blanks(msg, &msg); 420 cmm_skip_blanks(msg, &msg);
388 if (*msg == '\0') 421 if (*msg == '\0')
389 cmm_set_timeout(pages, seconds); 422 cmm_set_timeout(nr, seconds);
390 } 423 }
391} 424}
392#endif 425#endif
@@ -402,6 +435,7 @@ cmm_init (void)
402#ifdef CONFIG_CMM_IUCV 435#ifdef CONFIG_CMM_IUCV
403 smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); 436 smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
404#endif 437#endif
438 register_oom_notifier(&cmm_oom_nb);
405 INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, NULL); 439 INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, NULL);
406 init_waitqueue_head(&cmm_thread_wait); 440 init_waitqueue_head(&cmm_thread_wait);
407 init_timer(&cmm_timer); 441 init_timer(&cmm_timer);
@@ -411,6 +445,7 @@ cmm_init (void)
411static void 445static void
412cmm_exit(void) 446cmm_exit(void)
413{ 447{
448 unregister_oom_notifier(&cmm_oom_nb);
414 cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); 449 cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
415 cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); 450 cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
416#ifdef CONFIG_CMM_PROC 451#ifdef CONFIG_CMM_PROC
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 34a6bc3e6cf3..32db06c8ffe0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -10,6 +10,8 @@
10#include <asm/atomic.h> 10#include <asm/atomic.h>
11#include <asm/page.h> 11#include <asm/page.h>
12 12
13struct notifier_block;
14
13#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ 15#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
14#define SWAP_FLAG_PRIO_MASK 0x7fff 16#define SWAP_FLAG_PRIO_MASK 0x7fff
15#define SWAP_FLAG_PRIO_SHIFT 0 17#define SWAP_FLAG_PRIO_SHIFT 0
@@ -156,6 +158,8 @@ struct swap_list_t {
156 158
157/* linux/mm/oom_kill.c */ 159/* linux/mm/oom_kill.c */
158extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); 160extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
161extern int register_oom_notifier(struct notifier_block *nb);
162extern int unregister_oom_notifier(struct notifier_block *nb);
159 163
160/* linux/mm/memory.c */ 164/* linux/mm/memory.c */
161extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); 165extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b9af136e5cfa..7d056843fa2d 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -21,6 +21,8 @@
21#include <linux/timex.h> 21#include <linux/timex.h>
22#include <linux/jiffies.h> 22#include <linux/jiffies.h>
23#include <linux/cpuset.h> 23#include <linux/cpuset.h>
24#include <linux/module.h>
25#include <linux/notifier.h>
24 26
25int sysctl_panic_on_oom; 27int sysctl_panic_on_oom;
26/* #define DEBUG */ 28/* #define DEBUG */
@@ -306,6 +308,20 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
306 return oom_kill_task(p, message); 308 return oom_kill_task(p, message);
307} 309}
308 310
311static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
312
313int register_oom_notifier(struct notifier_block *nb)
314{
315 return blocking_notifier_chain_register(&oom_notify_list, nb);
316}
317EXPORT_SYMBOL_GPL(register_oom_notifier);
318
319int unregister_oom_notifier(struct notifier_block *nb)
320{
321 return blocking_notifier_chain_unregister(&oom_notify_list, nb);
322}
323EXPORT_SYMBOL_GPL(unregister_oom_notifier);
324
309/** 325/**
310 * out_of_memory - kill the "best" process when we run out of memory 326 * out_of_memory - kill the "best" process when we run out of memory
311 * 327 *
@@ -318,6 +334,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
318{ 334{
319 struct task_struct *p; 335 struct task_struct *p;
320 unsigned long points = 0; 336 unsigned long points = 0;
337 unsigned long freed = 0;
338
339 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
340 if (freed > 0)
341 /* Got some memory back in the last second. */
342 return;
321 343
322 if (printk_ratelimit()) { 344 if (printk_ratelimit()) {
323 printk("oom-killer: gfp_mask=0x%x, order=%d\n", 345 printk("oom-killer: gfp_mask=0x%x, order=%d\n",