aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-04-21 12:52:36 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2006-04-21 12:52:36 -0400
commita748422ee45725e04e1d3792fa19dfa90ddfd116 (patch)
tree978e12895468baaa9f7ab2747b9f7d50beaf1717 /mm
parentc63e31c2cc1ec67372920b5e1aff8204d04dd172 (diff)
parentf4ffaa452e71495a06376f12f772342bc57051fc (diff)
Merge branch 'master'
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c9
-rw-r--r--mm/madvise.c3
-rw-r--r--mm/mempolicy.c1
-rw-r--r--mm/migrate.c8
-rw-r--r--mm/mmap.c34
-rw-r--r--mm/nommu.c18
-rw-r--r--mm/oom_kill.c71
-rw-r--r--mm/page-writeback.c5
-rw-r--r--mm/page_alloc.c80
-rw-r--r--mm/slab.c30
-rw-r--r--mm/slob.c10
11 files changed, 186 insertions, 83 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index d3e3bd2ffcea..d213feded10d 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -401,7 +401,7 @@ unsigned long __init free_all_bootmem (void)
401 return(free_all_bootmem_core(NODE_DATA(0))); 401 return(free_all_bootmem_core(NODE_DATA(0)));
402} 402}
403 403
404void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) 404void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
405{ 405{
406 bootmem_data_t *bdata; 406 bootmem_data_t *bdata;
407 void *ptr; 407 void *ptr;
@@ -409,7 +409,14 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned
409 list_for_each_entry(bdata, &bdata_list, list) 409 list_for_each_entry(bdata, &bdata_list, list)
410 if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0))) 410 if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
411 return(ptr); 411 return(ptr);
412 return NULL;
413}
412 414
415void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
416{
417 void *mem = __alloc_bootmem_nopanic(size,align,goal);
418 if (mem)
419 return mem;
413 /* 420 /*
414 * Whoops, we cannot satisfy the allocation request. 421 * Whoops, we cannot satisfy the allocation request.
415 */ 422 */
diff --git a/mm/madvise.c b/mm/madvise.c
index af3d573b0141..4e196155a0c3 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -168,6 +168,9 @@ static long madvise_remove(struct vm_area_struct *vma,
168 return -EINVAL; 168 return -EINVAL;
169 } 169 }
170 170
171 if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
172 return -EACCES;
173
171 mapping = vma->vm_file->f_mapping; 174 mapping = vma->vm_file->f_mapping;
172 175
173 offset = (loff_t)(start - vma->vm_start) 176 offset = (loff_t)(start - vma->vm_start)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index dec8249e972d..8778f58880c4 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1761,7 +1761,6 @@ static void gather_stats(struct page *page, void *private, int pte_dirty)
1761 md->mapcount_max = count; 1761 md->mapcount_max = count;
1762 1762
1763 md->node[page_to_nid(page)]++; 1763 md->node[page_to_nid(page)]++;
1764 cond_resched();
1765} 1764}
1766 1765
1767#ifdef CONFIG_HUGETLB_PAGE 1766#ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/migrate.c b/mm/migrate.c
index 09f6e4aa87fc..d444229f2599 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -16,8 +16,7 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/buffer_head.h> /* for try_to_release_page(), 19#include <linux/buffer_head.h>
20 buffer_heads_over_limit */
21#include <linux/mm_inline.h> 20#include <linux/mm_inline.h>
22#include <linux/pagevec.h> 21#include <linux/pagevec.h>
23#include <linux/rmap.h> 22#include <linux/rmap.h>
@@ -28,8 +27,6 @@
28 27
29#include "internal.h" 28#include "internal.h"
30 29
31#include "internal.h"
32
33/* The maximum number of pages to take off the LRU for migration */ 30/* The maximum number of pages to take off the LRU for migration */
34#define MIGRATE_CHUNK_SIZE 256 31#define MIGRATE_CHUNK_SIZE 256
35 32
@@ -176,7 +173,6 @@ unlock_retry:
176retry: 173retry:
177 return -EAGAIN; 174 return -EAGAIN;
178} 175}
179EXPORT_SYMBOL(swap_page);
180 176
181/* 177/*
182 * Remove references for a page and establish the new page with the correct 178 * Remove references for a page and establish the new page with the correct
@@ -234,7 +230,7 @@ int migrate_page_remove_references(struct page *newpage,
234 if (!page_mapping(page) || page_count(page) != nr_refs || 230 if (!page_mapping(page) || page_count(page) != nr_refs ||
235 *radix_pointer != page) { 231 *radix_pointer != page) {
236 write_unlock_irq(&mapping->tree_lock); 232 write_unlock_irq(&mapping->tree_lock);
237 return 1; 233 return -EAGAIN;
238 } 234 }
239 235
240 /* 236 /*
diff --git a/mm/mmap.c b/mm/mmap.c
index e780d19aa214..e6ee12344b13 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -121,14 +121,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
121 * only call if we're about to fail. 121 * only call if we're about to fail.
122 */ 122 */
123 n = nr_free_pages(); 123 n = nr_free_pages();
124
125 /*
126 * Leave reserved pages. The pages are not for anonymous pages.
127 */
128 if (n <= totalreserve_pages)
129 goto error;
130 else
131 n -= totalreserve_pages;
132
133 /*
134 * Leave the last 3% for root
135 */
124 if (!cap_sys_admin) 136 if (!cap_sys_admin)
125 n -= n / 32; 137 n -= n / 32;
126 free += n; 138 free += n;
127 139
128 if (free > pages) 140 if (free > pages)
129 return 0; 141 return 0;
130 vm_unacct_memory(pages); 142
131 return -ENOMEM; 143 goto error;
132 } 144 }
133 145
134 allowed = (totalram_pages - hugetlb_total_pages()) 146 allowed = (totalram_pages - hugetlb_total_pages())
@@ -150,7 +162,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
150 */ 162 */
151 if (atomic_read(&vm_committed_space) < (long)allowed) 163 if (atomic_read(&vm_committed_space) < (long)allowed)
152 return 0; 164 return 0;
153 165error:
154 vm_unacct_memory(pages); 166 vm_unacct_memory(pages);
155 167
156 return -ENOMEM; 168 return -ENOMEM;
@@ -220,6 +232,17 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
220 232
221 if (brk < mm->end_code) 233 if (brk < mm->end_code)
222 goto out; 234 goto out;
235
236 /*
237 * Check against rlimit here. If this check is done later after the test
238 * of oldbrk with newbrk then it can escape the test and let the data
239 * segment grow beyond its set limit the in case where the limit is
240 * not page aligned -Ram Gupta
241 */
242 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
243 if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
244 goto out;
245
223 newbrk = PAGE_ALIGN(brk); 246 newbrk = PAGE_ALIGN(brk);
224 oldbrk = PAGE_ALIGN(mm->brk); 247 oldbrk = PAGE_ALIGN(mm->brk);
225 if (oldbrk == newbrk) 248 if (oldbrk == newbrk)
@@ -232,11 +255,6 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
232 goto out; 255 goto out;
233 } 256 }
234 257
235 /* Check against rlimit.. */
236 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
237 if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
238 goto out;
239
240 /* Check against existing mmap mappings. */ 258 /* Check against existing mmap mappings. */
241 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) 259 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
242 goto out; 260 goto out;
diff --git a/mm/nommu.c b/mm/nommu.c
index db45efac17cc..029fadac0fb5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1147,14 +1147,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
1147 * only call if we're about to fail. 1147 * only call if we're about to fail.
1148 */ 1148 */
1149 n = nr_free_pages(); 1149 n = nr_free_pages();
1150
1151 /*
1152 * Leave reserved pages. The pages are not for anonymous pages.
1153 */
1154 if (n <= totalreserve_pages)
1155 goto error;
1156 else
1157 n -= totalreserve_pages;
1158
1159 /*
1160 * Leave the last 3% for root
1161 */
1150 if (!cap_sys_admin) 1162 if (!cap_sys_admin)
1151 n -= n / 32; 1163 n -= n / 32;
1152 free += n; 1164 free += n;
1153 1165
1154 if (free > pages) 1166 if (free > pages)
1155 return 0; 1167 return 0;
1156 vm_unacct_memory(pages); 1168
1157 return -ENOMEM; 1169 goto error;
1158 } 1170 }
1159 1171
1160 allowed = totalram_pages * sysctl_overcommit_ratio / 100; 1172 allowed = totalram_pages * sysctl_overcommit_ratio / 100;
@@ -1175,7 +1187,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
1175 */ 1187 */
1176 if (atomic_read(&vm_committed_space) < (long)allowed) 1188 if (atomic_read(&vm_committed_space) < (long)allowed)
1177 return 0; 1189 return 0;
1178 1190error:
1179 vm_unacct_memory(pages); 1191 vm_unacct_memory(pages);
1180 1192
1181 return -ENOMEM; 1193 return -ENOMEM;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 78747afad6b0..042e6436c3ee 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -46,15 +46,25 @@
46unsigned long badness(struct task_struct *p, unsigned long uptime) 46unsigned long badness(struct task_struct *p, unsigned long uptime)
47{ 47{
48 unsigned long points, cpu_time, run_time, s; 48 unsigned long points, cpu_time, run_time, s;
49 struct list_head *tsk; 49 struct mm_struct *mm;
50 struct task_struct *child;
50 51
51 if (!p->mm) 52 task_lock(p);
53 mm = p->mm;
54 if (!mm) {
55 task_unlock(p);
52 return 0; 56 return 0;
57 }
53 58
54 /* 59 /*
55 * The memory size of the process is the basis for the badness. 60 * The memory size of the process is the basis for the badness.
56 */ 61 */
57 points = p->mm->total_vm; 62 points = mm->total_vm;
63
64 /*
65 * After this unlock we can no longer dereference local variable `mm'
66 */
67 task_unlock(p);
58 68
59 /* 69 /*
60 * Processes which fork a lot of child processes are likely 70 * Processes which fork a lot of child processes are likely
@@ -64,11 +74,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
64 * child is eating the vast majority of memory, adding only half 74 * child is eating the vast majority of memory, adding only half
65 * to the parents will make the child our kill candidate of choice. 75 * to the parents will make the child our kill candidate of choice.
66 */ 76 */
67 list_for_each(tsk, &p->children) { 77 list_for_each_entry(child, &p->children, sibling) {
68 struct task_struct *chld; 78 task_lock(child);
69 chld = list_entry(tsk, struct task_struct, sibling); 79 if (child->mm != mm && child->mm)
70 if (chld->mm != p->mm && chld->mm) 80 points += child->mm->total_vm/2 + 1;
71 points += chld->mm->total_vm/2 + 1; 81 task_unlock(child);
72 } 82 }
73 83
74 /* 84 /*
@@ -244,17 +254,24 @@ static void __oom_kill_task(task_t *p, const char *message)
244 force_sig(SIGKILL, p); 254 force_sig(SIGKILL, p);
245} 255}
246 256
247static struct mm_struct *oom_kill_task(task_t *p, const char *message) 257static int oom_kill_task(task_t *p, const char *message)
248{ 258{
249 struct mm_struct *mm = get_task_mm(p); 259 struct mm_struct *mm;
250 task_t * g, * q; 260 task_t * g, * q;
251 261
252 if (!mm) 262 mm = p->mm;
253 return NULL; 263
254 if (mm == &init_mm) { 264 /* WARNING: mm may not be dereferenced since we did not obtain its
255 mmput(mm); 265 * value from get_task_mm(p). This is OK since all we need to do is
256 return NULL; 266 * compare mm to q->mm below.
257 } 267 *
268 * Furthermore, even if mm contains a non-NULL value, p->mm may
269 * change to NULL at any time since we do not hold task_lock(p).
270 * However, this is of no concern to us.
271 */
272
273 if (mm == NULL || mm == &init_mm)
274 return 1;
258 275
259 __oom_kill_task(p, message); 276 __oom_kill_task(p, message);
260 /* 277 /*
@@ -266,13 +283,12 @@ static struct mm_struct *oom_kill_task(task_t *p, const char *message)
266 __oom_kill_task(q, message); 283 __oom_kill_task(q, message);
267 while_each_thread(g, q); 284 while_each_thread(g, q);
268 285
269 return mm; 286 return 0;
270} 287}
271 288
272static struct mm_struct *oom_kill_process(struct task_struct *p, 289static int oom_kill_process(struct task_struct *p, unsigned long points,
273 unsigned long points, const char *message) 290 const char *message)
274{ 291{
275 struct mm_struct *mm;
276 struct task_struct *c; 292 struct task_struct *c;
277 struct list_head *tsk; 293 struct list_head *tsk;
278 294
@@ -283,9 +299,8 @@ static struct mm_struct *oom_kill_process(struct task_struct *p,
283 c = list_entry(tsk, struct task_struct, sibling); 299 c = list_entry(tsk, struct task_struct, sibling);
284 if (c->mm == p->mm) 300 if (c->mm == p->mm)
285 continue; 301 continue;
286 mm = oom_kill_task(c, message); 302 if (!oom_kill_task(c, message))
287 if (mm) 303 return 0;
288 return mm;
289 } 304 }
290 return oom_kill_task(p, message); 305 return oom_kill_task(p, message);
291} 306}
@@ -300,7 +315,6 @@ static struct mm_struct *oom_kill_process(struct task_struct *p,
300 */ 315 */
301void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) 316void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
302{ 317{
303 struct mm_struct *mm = NULL;
304 task_t *p; 318 task_t *p;
305 unsigned long points = 0; 319 unsigned long points = 0;
306 320
@@ -320,12 +334,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
320 */ 334 */
321 switch (constrained_alloc(zonelist, gfp_mask)) { 335 switch (constrained_alloc(zonelist, gfp_mask)) {
322 case CONSTRAINT_MEMORY_POLICY: 336 case CONSTRAINT_MEMORY_POLICY:
323 mm = oom_kill_process(current, points, 337 oom_kill_process(current, points,
324 "No available memory (MPOL_BIND)"); 338 "No available memory (MPOL_BIND)");
325 break; 339 break;
326 340
327 case CONSTRAINT_CPUSET: 341 case CONSTRAINT_CPUSET:
328 mm = oom_kill_process(current, points, 342 oom_kill_process(current, points,
329 "No available memory in cpuset"); 343 "No available memory in cpuset");
330 break; 344 break;
331 345
@@ -347,8 +361,7 @@ retry:
347 panic("Out of memory and no killable processes...\n"); 361 panic("Out of memory and no killable processes...\n");
348 } 362 }
349 363
350 mm = oom_kill_process(p, points, "Out of memory"); 364 if (oom_kill_process(p, points, "Out of memory"))
351 if (!mm)
352 goto retry; 365 goto retry;
353 366
354 break; 367 break;
@@ -357,8 +370,6 @@ retry:
357out: 370out:
358 read_unlock(&tasklist_lock); 371 read_unlock(&tasklist_lock);
359 cpuset_unlock(); 372 cpuset_unlock();
360 if (mm)
361 mmput(mm);
362 373
363 /* 374 /*
364 * Give "p" a good chance of killing itself before we 375 * Give "p" a good chance of killing itself before we
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6dcce3a4bbdc..75d7f48b79bb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -72,13 +72,12 @@ int dirty_background_ratio = 10;
72int vm_dirty_ratio = 40; 72int vm_dirty_ratio = 40;
73 73
74/* 74/*
75 * The interval between `kupdate'-style writebacks, in centiseconds 75 * The interval between `kupdate'-style writebacks, in jiffies
76 * (hundredths of a second)
77 */ 76 */
78int dirty_writeback_interval = 5 * HZ; 77int dirty_writeback_interval = 5 * HZ;
79 78
80/* 79/*
81 * The longest number of centiseconds for which data is allowed to remain dirty 80 * The longest number of jiffies for which data is allowed to remain dirty
82 */ 81 */
83int dirty_expire_interval = 30 * HZ; 82int dirty_expire_interval = 30 * HZ;
84 83
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dc523a1f270d..123c60586740 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
51EXPORT_SYMBOL(node_possible_map); 51EXPORT_SYMBOL(node_possible_map);
52unsigned long totalram_pages __read_mostly; 52unsigned long totalram_pages __read_mostly;
53unsigned long totalhigh_pages __read_mostly; 53unsigned long totalhigh_pages __read_mostly;
54unsigned long totalreserve_pages __read_mostly;
54long nr_swap_pages; 55long nr_swap_pages;
55int percpu_pagelist_fraction; 56int percpu_pagelist_fraction;
56 57
@@ -151,7 +152,8 @@ static void bad_page(struct page *page)
151 1 << PG_reclaim | 152 1 << PG_reclaim |
152 1 << PG_slab | 153 1 << PG_slab |
153 1 << PG_swapcache | 154 1 << PG_swapcache |
154 1 << PG_writeback ); 155 1 << PG_writeback |
156 1 << PG_buddy );
155 set_page_count(page, 0); 157 set_page_count(page, 0);
156 reset_page_mapcount(page); 158 reset_page_mapcount(page);
157 page->mapping = NULL; 159 page->mapping = NULL;
@@ -230,18 +232,20 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
230 * zone->lock is already acquired when we use these. 232 * zone->lock is already acquired when we use these.
231 * So, we don't need atomic page->flags operations here. 233 * So, we don't need atomic page->flags operations here.
232 */ 234 */
233static inline unsigned long page_order(struct page *page) { 235static inline unsigned long page_order(struct page *page)
236{
234 return page_private(page); 237 return page_private(page);
235} 238}
236 239
237static inline void set_page_order(struct page *page, int order) { 240static inline void set_page_order(struct page *page, int order)
241{
238 set_page_private(page, order); 242 set_page_private(page, order);
239 __SetPagePrivate(page); 243 __SetPageBuddy(page);
240} 244}
241 245
242static inline void rmv_page_order(struct page *page) 246static inline void rmv_page_order(struct page *page)
243{ 247{
244 __ClearPagePrivate(page); 248 __ClearPageBuddy(page);
245 set_page_private(page, 0); 249 set_page_private(page, 0);
246} 250}
247 251
@@ -280,11 +284,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
280 * This function checks whether a page is free && is the buddy 284 * This function checks whether a page is free && is the buddy
281 * we can do coalesce a page and its buddy if 285 * we can do coalesce a page and its buddy if
282 * (a) the buddy is not in a hole && 286 * (a) the buddy is not in a hole &&
283 * (b) the buddy is free && 287 * (b) the buddy is in the buddy system &&
284 * (c) the buddy is on the buddy system && 288 * (c) a page and its buddy have the same order.
285 * (d) a page and its buddy have the same order.
286 * for recording page's order, we use page_private(page) and PG_private.
287 * 289 *
290 * For recording whether a page is in the buddy system, we use PG_buddy.
291 * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
292 *
293 * For recording page's order, we use page_private(page).
288 */ 294 */
289static inline int page_is_buddy(struct page *page, int order) 295static inline int page_is_buddy(struct page *page, int order)
290{ 296{
@@ -293,11 +299,11 @@ static inline int page_is_buddy(struct page *page, int order)
293 return 0; 299 return 0;
294#endif 300#endif
295 301
296 if (PagePrivate(page) && 302 if (PageBuddy(page) && page_order(page) == order) {
297 (page_order(page) == order) && 303 BUG_ON(page_count(page) != 0);
298 page_count(page) == 0) 304 return 1;
299 return 1; 305 }
300 return 0; 306 return 0;
301} 307}
302 308
303/* 309/*
@@ -313,7 +319,7 @@ static inline int page_is_buddy(struct page *page, int order)
313 * as necessary, plus some accounting needed to play nicely with other 319 * as necessary, plus some accounting needed to play nicely with other
314 * parts of the VM system. 320 * parts of the VM system.
315 * At each level, we keep a list of pages, which are heads of continuous 321 * At each level, we keep a list of pages, which are heads of continuous
316 * free pages of length of (1 << order) and marked with PG_Private.Page's 322 * free pages of length of (1 << order) and marked with PG_buddy. Page's
317 * order is recorded in page_private(page) field. 323 * order is recorded in page_private(page) field.
318 * So when we are allocating or freeing one, we can derive the state of the 324 * So when we are allocating or freeing one, we can derive the state of the
319 * other. That is, if we allocate a small block, and both were 325 * other. That is, if we allocate a small block, and both were
@@ -376,7 +382,8 @@ static inline int free_pages_check(struct page *page)
376 1 << PG_slab | 382 1 << PG_slab |
377 1 << PG_swapcache | 383 1 << PG_swapcache |
378 1 << PG_writeback | 384 1 << PG_writeback |
379 1 << PG_reserved )))) 385 1 << PG_reserved |
386 1 << PG_buddy ))))
380 bad_page(page); 387 bad_page(page);
381 if (PageDirty(page)) 388 if (PageDirty(page))
382 __ClearPageDirty(page); 389 __ClearPageDirty(page);
@@ -524,7 +531,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
524 1 << PG_slab | 531 1 << PG_slab |
525 1 << PG_swapcache | 532 1 << PG_swapcache |
526 1 << PG_writeback | 533 1 << PG_writeback |
527 1 << PG_reserved )))) 534 1 << PG_reserved |
535 1 << PG_buddy ))))
528 bad_page(page); 536 bad_page(page);
529 537
530 /* 538 /*
@@ -2472,6 +2480,38 @@ void __init page_alloc_init(void)
2472} 2480}
2473 2481
2474/* 2482/*
2483 * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
2484 * or min_free_kbytes changes.
2485 */
2486static void calculate_totalreserve_pages(void)
2487{
2488 struct pglist_data *pgdat;
2489 unsigned long reserve_pages = 0;
2490 int i, j;
2491
2492 for_each_online_pgdat(pgdat) {
2493 for (i = 0; i < MAX_NR_ZONES; i++) {
2494 struct zone *zone = pgdat->node_zones + i;
2495 unsigned long max = 0;
2496
2497 /* Find valid and maximum lowmem_reserve in the zone */
2498 for (j = i; j < MAX_NR_ZONES; j++) {
2499 if (zone->lowmem_reserve[j] > max)
2500 max = zone->lowmem_reserve[j];
2501 }
2502
2503 /* we treat pages_high as reserved pages. */
2504 max += zone->pages_high;
2505
2506 if (max > zone->present_pages)
2507 max = zone->present_pages;
2508 reserve_pages += max;
2509 }
2510 }
2511 totalreserve_pages = reserve_pages;
2512}
2513
2514/*
2475 * setup_per_zone_lowmem_reserve - called whenever 2515 * setup_per_zone_lowmem_reserve - called whenever
2476 * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone 2516 * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone
2477 * has a correct pages reserved value, so an adequate number of 2517 * has a correct pages reserved value, so an adequate number of
@@ -2502,6 +2542,9 @@ static void setup_per_zone_lowmem_reserve(void)
2502 } 2542 }
2503 } 2543 }
2504 } 2544 }
2545
2546 /* update totalreserve_pages */
2547 calculate_totalreserve_pages();
2505} 2548}
2506 2549
2507/* 2550/*
@@ -2556,6 +2599,9 @@ void setup_per_zone_pages_min(void)
2556 zone->pages_high = zone->pages_min + tmp / 2; 2599 zone->pages_high = zone->pages_min + tmp / 2;
2557 spin_unlock_irqrestore(&zone->lru_lock, flags); 2600 spin_unlock_irqrestore(&zone->lru_lock, flags);
2558 } 2601 }
2602
2603 /* update totalreserve_pages */
2604 calculate_totalreserve_pages();
2559} 2605}
2560 2606
2561/* 2607/*
diff --git a/mm/slab.c b/mm/slab.c
index f055c1420216..e6ef9bd52335 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -420,6 +420,7 @@ struct kmem_cache {
420 unsigned long max_freeable; 420 unsigned long max_freeable;
421 unsigned long node_allocs; 421 unsigned long node_allocs;
422 unsigned long node_frees; 422 unsigned long node_frees;
423 unsigned long node_overflow;
423 atomic_t allochit; 424 atomic_t allochit;
424 atomic_t allocmiss; 425 atomic_t allocmiss;
425 atomic_t freehit; 426 atomic_t freehit;
@@ -465,6 +466,7 @@ struct kmem_cache {
465#define STATS_INC_ERR(x) ((x)->errors++) 466#define STATS_INC_ERR(x) ((x)->errors++)
466#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) 467#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
467#define STATS_INC_NODEFREES(x) ((x)->node_frees++) 468#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
469#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
468#define STATS_SET_FREEABLE(x, i) \ 470#define STATS_SET_FREEABLE(x, i) \
469 do { \ 471 do { \
470 if ((x)->max_freeable < i) \ 472 if ((x)->max_freeable < i) \
@@ -484,6 +486,7 @@ struct kmem_cache {
484#define STATS_INC_ERR(x) do { } while (0) 486#define STATS_INC_ERR(x) do { } while (0)
485#define STATS_INC_NODEALLOCS(x) do { } while (0) 487#define STATS_INC_NODEALLOCS(x) do { } while (0)
486#define STATS_INC_NODEFREES(x) do { } while (0) 488#define STATS_INC_NODEFREES(x) do { } while (0)
489#define STATS_INC_ACOVERFLOW(x) do { } while (0)
487#define STATS_SET_FREEABLE(x, i) do { } while (0) 490#define STATS_SET_FREEABLE(x, i) do { } while (0)
488#define STATS_INC_ALLOCHIT(x) do { } while (0) 491#define STATS_INC_ALLOCHIT(x) do { } while (0)
489#define STATS_INC_ALLOCMISS(x) do { } while (0) 492#define STATS_INC_ALLOCMISS(x) do { } while (0)
@@ -1453,7 +1456,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1453 int i; 1456 int i;
1454 1457
1455 flags |= cachep->gfpflags; 1458 flags |= cachep->gfpflags;
1459#ifndef CONFIG_MMU
1460 /* nommu uses slab's for process anonymous memory allocations, so
1461 * requires __GFP_COMP to properly refcount higher order allocations"
1462 */
1463 page = alloc_pages_node(nodeid, (flags | __GFP_COMP), cachep->gfporder);
1464#else
1456 page = alloc_pages_node(nodeid, flags, cachep->gfporder); 1465 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1466#endif
1457 if (!page) 1467 if (!page)
1458 return NULL; 1468 return NULL;
1459 addr = page_address(page); 1469 addr = page_address(page);
@@ -2318,13 +2328,15 @@ EXPORT_SYMBOL(kmem_cache_destroy);
2318 2328
2319/* Get the memory for a slab management obj. */ 2329/* Get the memory for a slab management obj. */
2320static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, 2330static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2321 int colour_off, gfp_t local_flags) 2331 int colour_off, gfp_t local_flags,
2332 int nodeid)
2322{ 2333{
2323 struct slab *slabp; 2334 struct slab *slabp;
2324 2335
2325 if (OFF_SLAB(cachep)) { 2336 if (OFF_SLAB(cachep)) {
2326 /* Slab management obj is off-slab. */ 2337 /* Slab management obj is off-slab. */
2327 slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags); 2338 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2339 local_flags, nodeid);
2328 if (!slabp) 2340 if (!slabp)
2329 return NULL; 2341 return NULL;
2330 } else { 2342 } else {
@@ -2334,6 +2346,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2334 slabp->inuse = 0; 2346 slabp->inuse = 0;
2335 slabp->colouroff = colour_off; 2347 slabp->colouroff = colour_off;
2336 slabp->s_mem = objp + colour_off; 2348 slabp->s_mem = objp + colour_off;
2349 slabp->nodeid = nodeid;
2337 return slabp; 2350 return slabp;
2338} 2351}
2339 2352
@@ -2519,7 +2532,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2519 goto failed; 2532 goto failed;
2520 2533
2521 /* Get slab management. */ 2534 /* Get slab management. */
2522 slabp = alloc_slabmgmt(cachep, objp, offset, local_flags); 2535 slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid);
2523 if (!slabp) 2536 if (!slabp)
2524 goto opps1; 2537 goto opps1;
2525 2538
@@ -3080,9 +3093,11 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3080 if (l3->alien && l3->alien[nodeid]) { 3093 if (l3->alien && l3->alien[nodeid]) {
3081 alien = l3->alien[nodeid]; 3094 alien = l3->alien[nodeid];
3082 spin_lock(&alien->lock); 3095 spin_lock(&alien->lock);
3083 if (unlikely(alien->avail == alien->limit)) 3096 if (unlikely(alien->avail == alien->limit)) {
3097 STATS_INC_ACOVERFLOW(cachep);
3084 __drain_alien_cache(cachep, 3098 __drain_alien_cache(cachep,
3085 alien, nodeid); 3099 alien, nodeid);
3100 }
3086 alien->entry[alien->avail++] = objp; 3101 alien->entry[alien->avail++] = objp;
3087 spin_unlock(&alien->lock); 3102 spin_unlock(&alien->lock);
3088 } else { 3103 } else {
@@ -3760,7 +3775,7 @@ static void print_slabinfo_header(struct seq_file *m)
3760 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 3775 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
3761#if STATS 3776#if STATS
3762 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " 3777 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
3763 "<error> <maxfreeable> <nodeallocs> <remotefrees>"); 3778 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
3764 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); 3779 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
3765#endif 3780#endif
3766 seq_putc(m, '\n'); 3781 seq_putc(m, '\n');
@@ -3874,11 +3889,12 @@ static int s_show(struct seq_file *m, void *p)
3874 unsigned long max_freeable = cachep->max_freeable; 3889 unsigned long max_freeable = cachep->max_freeable;
3875 unsigned long node_allocs = cachep->node_allocs; 3890 unsigned long node_allocs = cachep->node_allocs;
3876 unsigned long node_frees = cachep->node_frees; 3891 unsigned long node_frees = cachep->node_frees;
3892 unsigned long overflows = cachep->node_overflow;
3877 3893
3878 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ 3894 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
3879 %4lu %4lu %4lu %4lu", allocs, high, grown, 3895 %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
3880 reaped, errors, max_freeable, node_allocs, 3896 reaped, errors, max_freeable, node_allocs,
3881 node_frees); 3897 node_frees, overflows);
3882 } 3898 }
3883 /* cpu stats */ 3899 /* cpu stats */
3884 { 3900 {
diff --git a/mm/slob.c b/mm/slob.c
index 9bcc7e2cabfd..a68255ba4553 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -354,9 +354,7 @@ void *__alloc_percpu(size_t size)
354 if (!pdata) 354 if (!pdata)
355 return NULL; 355 return NULL;
356 356
357 for (i = 0; i < NR_CPUS; i++) { 357 for_each_possible_cpu(i) {
358 if (!cpu_possible(i))
359 continue;
360 pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); 358 pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
361 if (!pdata->ptrs[i]) 359 if (!pdata->ptrs[i])
362 goto unwind_oom; 360 goto unwind_oom;
@@ -383,11 +381,9 @@ free_percpu(const void *objp)
383 int i; 381 int i;
384 struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp); 382 struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
385 383
386 for (i = 0; i < NR_CPUS; i++) { 384 for_each_possible_cpu(i)
387 if (!cpu_possible(i))
388 continue;
389 kfree(p->ptrs[i]); 385 kfree(p->ptrs[i]);
390 } 386
391 kfree(p); 387 kfree(p);
392} 388}
393EXPORT_SYMBOL(free_percpu); 389EXPORT_SYMBOL(free_percpu);