diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2006-04-21 12:52:36 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-04-21 12:52:36 -0400 |
commit | a748422ee45725e04e1d3792fa19dfa90ddfd116 (patch) | |
tree | 978e12895468baaa9f7ab2747b9f7d50beaf1717 /mm | |
parent | c63e31c2cc1ec67372920b5e1aff8204d04dd172 (diff) | |
parent | f4ffaa452e71495a06376f12f772342bc57051fc (diff) |
Merge branch 'master'
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 9 | ||||
-rw-r--r-- | mm/madvise.c | 3 | ||||
-rw-r--r-- | mm/mempolicy.c | 1 | ||||
-rw-r--r-- | mm/migrate.c | 8 | ||||
-rw-r--r-- | mm/mmap.c | 34 | ||||
-rw-r--r-- | mm/nommu.c | 18 | ||||
-rw-r--r-- | mm/oom_kill.c | 71 | ||||
-rw-r--r-- | mm/page-writeback.c | 5 | ||||
-rw-r--r-- | mm/page_alloc.c | 80 | ||||
-rw-r--r-- | mm/slab.c | 30 | ||||
-rw-r--r-- | mm/slob.c | 10 |
11 files changed, 186 insertions, 83 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index d3e3bd2ffcea..d213feded10d 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -401,7 +401,7 @@ unsigned long __init free_all_bootmem (void) | |||
401 | return(free_all_bootmem_core(NODE_DATA(0))); | 401 | return(free_all_bootmem_core(NODE_DATA(0))); |
402 | } | 402 | } |
403 | 403 | ||
404 | void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) | 404 | void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal) |
405 | { | 405 | { |
406 | bootmem_data_t *bdata; | 406 | bootmem_data_t *bdata; |
407 | void *ptr; | 407 | void *ptr; |
@@ -409,7 +409,14 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned | |||
409 | list_for_each_entry(bdata, &bdata_list, list) | 409 | list_for_each_entry(bdata, &bdata_list, list) |
410 | if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0))) | 410 | if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0))) |
411 | return(ptr); | 411 | return(ptr); |
412 | return NULL; | ||
413 | } | ||
412 | 414 | ||
415 | void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) | ||
416 | { | ||
417 | void *mem = __alloc_bootmem_nopanic(size,align,goal); | ||
418 | if (mem) | ||
419 | return mem; | ||
413 | /* | 420 | /* |
414 | * Whoops, we cannot satisfy the allocation request. | 421 | * Whoops, we cannot satisfy the allocation request. |
415 | */ | 422 | */ |
diff --git a/mm/madvise.c b/mm/madvise.c index af3d573b0141..4e196155a0c3 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -168,6 +168,9 @@ static long madvise_remove(struct vm_area_struct *vma, | |||
168 | return -EINVAL; | 168 | return -EINVAL; |
169 | } | 169 | } |
170 | 170 | ||
171 | if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)) | ||
172 | return -EACCES; | ||
173 | |||
171 | mapping = vma->vm_file->f_mapping; | 174 | mapping = vma->vm_file->f_mapping; |
172 | 175 | ||
173 | offset = (loff_t)(start - vma->vm_start) | 176 | offset = (loff_t)(start - vma->vm_start) |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index dec8249e972d..8778f58880c4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1761,7 +1761,6 @@ static void gather_stats(struct page *page, void *private, int pte_dirty) | |||
1761 | md->mapcount_max = count; | 1761 | md->mapcount_max = count; |
1762 | 1762 | ||
1763 | md->node[page_to_nid(page)]++; | 1763 | md->node[page_to_nid(page)]++; |
1764 | cond_resched(); | ||
1765 | } | 1764 | } |
1766 | 1765 | ||
1767 | #ifdef CONFIG_HUGETLB_PAGE | 1766 | #ifdef CONFIG_HUGETLB_PAGE |
diff --git a/mm/migrate.c b/mm/migrate.c index 09f6e4aa87fc..d444229f2599 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -16,8 +16,7 @@ | |||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/swap.h> | 17 | #include <linux/swap.h> |
18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
19 | #include <linux/buffer_head.h> /* for try_to_release_page(), | 19 | #include <linux/buffer_head.h> |
20 | buffer_heads_over_limit */ | ||
21 | #include <linux/mm_inline.h> | 20 | #include <linux/mm_inline.h> |
22 | #include <linux/pagevec.h> | 21 | #include <linux/pagevec.h> |
23 | #include <linux/rmap.h> | 22 | #include <linux/rmap.h> |
@@ -28,8 +27,6 @@ | |||
28 | 27 | ||
29 | #include "internal.h" | 28 | #include "internal.h" |
30 | 29 | ||
31 | #include "internal.h" | ||
32 | |||
33 | /* The maximum number of pages to take off the LRU for migration */ | 30 | /* The maximum number of pages to take off the LRU for migration */ |
34 | #define MIGRATE_CHUNK_SIZE 256 | 31 | #define MIGRATE_CHUNK_SIZE 256 |
35 | 32 | ||
@@ -176,7 +173,6 @@ unlock_retry: | |||
176 | retry: | 173 | retry: |
177 | return -EAGAIN; | 174 | return -EAGAIN; |
178 | } | 175 | } |
179 | EXPORT_SYMBOL(swap_page); | ||
180 | 176 | ||
181 | /* | 177 | /* |
182 | * Remove references for a page and establish the new page with the correct | 178 | * Remove references for a page and establish the new page with the correct |
@@ -234,7 +230,7 @@ int migrate_page_remove_references(struct page *newpage, | |||
234 | if (!page_mapping(page) || page_count(page) != nr_refs || | 230 | if (!page_mapping(page) || page_count(page) != nr_refs || |
235 | *radix_pointer != page) { | 231 | *radix_pointer != page) { |
236 | write_unlock_irq(&mapping->tree_lock); | 232 | write_unlock_irq(&mapping->tree_lock); |
237 | return 1; | 233 | return -EAGAIN; |
238 | } | 234 | } |
239 | 235 | ||
240 | /* | 236 | /* |
@@ -121,14 +121,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin) | |||
121 | * only call if we're about to fail. | 121 | * only call if we're about to fail. |
122 | */ | 122 | */ |
123 | n = nr_free_pages(); | 123 | n = nr_free_pages(); |
124 | |||
125 | /* | ||
126 | * Leave reserved pages. The pages are not for anonymous pages. | ||
127 | */ | ||
128 | if (n <= totalreserve_pages) | ||
129 | goto error; | ||
130 | else | ||
131 | n -= totalreserve_pages; | ||
132 | |||
133 | /* | ||
134 | * Leave the last 3% for root | ||
135 | */ | ||
124 | if (!cap_sys_admin) | 136 | if (!cap_sys_admin) |
125 | n -= n / 32; | 137 | n -= n / 32; |
126 | free += n; | 138 | free += n; |
127 | 139 | ||
128 | if (free > pages) | 140 | if (free > pages) |
129 | return 0; | 141 | return 0; |
130 | vm_unacct_memory(pages); | 142 | |
131 | return -ENOMEM; | 143 | goto error; |
132 | } | 144 | } |
133 | 145 | ||
134 | allowed = (totalram_pages - hugetlb_total_pages()) | 146 | allowed = (totalram_pages - hugetlb_total_pages()) |
@@ -150,7 +162,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) | |||
150 | */ | 162 | */ |
151 | if (atomic_read(&vm_committed_space) < (long)allowed) | 163 | if (atomic_read(&vm_committed_space) < (long)allowed) |
152 | return 0; | 164 | return 0; |
153 | 165 | error: | |
154 | vm_unacct_memory(pages); | 166 | vm_unacct_memory(pages); |
155 | 167 | ||
156 | return -ENOMEM; | 168 | return -ENOMEM; |
@@ -220,6 +232,17 @@ asmlinkage unsigned long sys_brk(unsigned long brk) | |||
220 | 232 | ||
221 | if (brk < mm->end_code) | 233 | if (brk < mm->end_code) |
222 | goto out; | 234 | goto out; |
235 | |||
236 | /* | ||
237 | * Check against rlimit here. If this check is done later after the test | ||
238 | * of oldbrk with newbrk then it can escape the test and let the data | ||
239 | * segment grow beyond its set limit the in case where the limit is | ||
240 | * not page aligned -Ram Gupta | ||
241 | */ | ||
242 | rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; | ||
243 | if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) | ||
244 | goto out; | ||
245 | |||
223 | newbrk = PAGE_ALIGN(brk); | 246 | newbrk = PAGE_ALIGN(brk); |
224 | oldbrk = PAGE_ALIGN(mm->brk); | 247 | oldbrk = PAGE_ALIGN(mm->brk); |
225 | if (oldbrk == newbrk) | 248 | if (oldbrk == newbrk) |
@@ -232,11 +255,6 @@ asmlinkage unsigned long sys_brk(unsigned long brk) | |||
232 | goto out; | 255 | goto out; |
233 | } | 256 | } |
234 | 257 | ||
235 | /* Check against rlimit.. */ | ||
236 | rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; | ||
237 | if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) | ||
238 | goto out; | ||
239 | |||
240 | /* Check against existing mmap mappings. */ | 258 | /* Check against existing mmap mappings. */ |
241 | if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) | 259 | if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) |
242 | goto out; | 260 | goto out; |
diff --git a/mm/nommu.c b/mm/nommu.c index db45efac17cc..029fadac0fb5 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1147,14 +1147,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin) | |||
1147 | * only call if we're about to fail. | 1147 | * only call if we're about to fail. |
1148 | */ | 1148 | */ |
1149 | n = nr_free_pages(); | 1149 | n = nr_free_pages(); |
1150 | |||
1151 | /* | ||
1152 | * Leave reserved pages. The pages are not for anonymous pages. | ||
1153 | */ | ||
1154 | if (n <= totalreserve_pages) | ||
1155 | goto error; | ||
1156 | else | ||
1157 | n -= totalreserve_pages; | ||
1158 | |||
1159 | /* | ||
1160 | * Leave the last 3% for root | ||
1161 | */ | ||
1150 | if (!cap_sys_admin) | 1162 | if (!cap_sys_admin) |
1151 | n -= n / 32; | 1163 | n -= n / 32; |
1152 | free += n; | 1164 | free += n; |
1153 | 1165 | ||
1154 | if (free > pages) | 1166 | if (free > pages) |
1155 | return 0; | 1167 | return 0; |
1156 | vm_unacct_memory(pages); | 1168 | |
1157 | return -ENOMEM; | 1169 | goto error; |
1158 | } | 1170 | } |
1159 | 1171 | ||
1160 | allowed = totalram_pages * sysctl_overcommit_ratio / 100; | 1172 | allowed = totalram_pages * sysctl_overcommit_ratio / 100; |
@@ -1175,7 +1187,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) | |||
1175 | */ | 1187 | */ |
1176 | if (atomic_read(&vm_committed_space) < (long)allowed) | 1188 | if (atomic_read(&vm_committed_space) < (long)allowed) |
1177 | return 0; | 1189 | return 0; |
1178 | 1190 | error: | |
1179 | vm_unacct_memory(pages); | 1191 | vm_unacct_memory(pages); |
1180 | 1192 | ||
1181 | return -ENOMEM; | 1193 | return -ENOMEM; |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 78747afad6b0..042e6436c3ee 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -46,15 +46,25 @@ | |||
46 | unsigned long badness(struct task_struct *p, unsigned long uptime) | 46 | unsigned long badness(struct task_struct *p, unsigned long uptime) |
47 | { | 47 | { |
48 | unsigned long points, cpu_time, run_time, s; | 48 | unsigned long points, cpu_time, run_time, s; |
49 | struct list_head *tsk; | 49 | struct mm_struct *mm; |
50 | struct task_struct *child; | ||
50 | 51 | ||
51 | if (!p->mm) | 52 | task_lock(p); |
53 | mm = p->mm; | ||
54 | if (!mm) { | ||
55 | task_unlock(p); | ||
52 | return 0; | 56 | return 0; |
57 | } | ||
53 | 58 | ||
54 | /* | 59 | /* |
55 | * The memory size of the process is the basis for the badness. | 60 | * The memory size of the process is the basis for the badness. |
56 | */ | 61 | */ |
57 | points = p->mm->total_vm; | 62 | points = mm->total_vm; |
63 | |||
64 | /* | ||
65 | * After this unlock we can no longer dereference local variable `mm' | ||
66 | */ | ||
67 | task_unlock(p); | ||
58 | 68 | ||
59 | /* | 69 | /* |
60 | * Processes which fork a lot of child processes are likely | 70 | * Processes which fork a lot of child processes are likely |
@@ -64,11 +74,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
64 | * child is eating the vast majority of memory, adding only half | 74 | * child is eating the vast majority of memory, adding only half |
65 | * to the parents will make the child our kill candidate of choice. | 75 | * to the parents will make the child our kill candidate of choice. |
66 | */ | 76 | */ |
67 | list_for_each(tsk, &p->children) { | 77 | list_for_each_entry(child, &p->children, sibling) { |
68 | struct task_struct *chld; | 78 | task_lock(child); |
69 | chld = list_entry(tsk, struct task_struct, sibling); | 79 | if (child->mm != mm && child->mm) |
70 | if (chld->mm != p->mm && chld->mm) | 80 | points += child->mm->total_vm/2 + 1; |
71 | points += chld->mm->total_vm/2 + 1; | 81 | task_unlock(child); |
72 | } | 82 | } |
73 | 83 | ||
74 | /* | 84 | /* |
@@ -244,17 +254,24 @@ static void __oom_kill_task(task_t *p, const char *message) | |||
244 | force_sig(SIGKILL, p); | 254 | force_sig(SIGKILL, p); |
245 | } | 255 | } |
246 | 256 | ||
247 | static struct mm_struct *oom_kill_task(task_t *p, const char *message) | 257 | static int oom_kill_task(task_t *p, const char *message) |
248 | { | 258 | { |
249 | struct mm_struct *mm = get_task_mm(p); | 259 | struct mm_struct *mm; |
250 | task_t * g, * q; | 260 | task_t * g, * q; |
251 | 261 | ||
252 | if (!mm) | 262 | mm = p->mm; |
253 | return NULL; | 263 | |
254 | if (mm == &init_mm) { | 264 | /* WARNING: mm may not be dereferenced since we did not obtain its |
255 | mmput(mm); | 265 | * value from get_task_mm(p). This is OK since all we need to do is |
256 | return NULL; | 266 | * compare mm to q->mm below. |
257 | } | 267 | * |
268 | * Furthermore, even if mm contains a non-NULL value, p->mm may | ||
269 | * change to NULL at any time since we do not hold task_lock(p). | ||
270 | * However, this is of no concern to us. | ||
271 | */ | ||
272 | |||
273 | if (mm == NULL || mm == &init_mm) | ||
274 | return 1; | ||
258 | 275 | ||
259 | __oom_kill_task(p, message); | 276 | __oom_kill_task(p, message); |
260 | /* | 277 | /* |
@@ -266,13 +283,12 @@ static struct mm_struct *oom_kill_task(task_t *p, const char *message) | |||
266 | __oom_kill_task(q, message); | 283 | __oom_kill_task(q, message); |
267 | while_each_thread(g, q); | 284 | while_each_thread(g, q); |
268 | 285 | ||
269 | return mm; | 286 | return 0; |
270 | } | 287 | } |
271 | 288 | ||
272 | static struct mm_struct *oom_kill_process(struct task_struct *p, | 289 | static int oom_kill_process(struct task_struct *p, unsigned long points, |
273 | unsigned long points, const char *message) | 290 | const char *message) |
274 | { | 291 | { |
275 | struct mm_struct *mm; | ||
276 | struct task_struct *c; | 292 | struct task_struct *c; |
277 | struct list_head *tsk; | 293 | struct list_head *tsk; |
278 | 294 | ||
@@ -283,9 +299,8 @@ static struct mm_struct *oom_kill_process(struct task_struct *p, | |||
283 | c = list_entry(tsk, struct task_struct, sibling); | 299 | c = list_entry(tsk, struct task_struct, sibling); |
284 | if (c->mm == p->mm) | 300 | if (c->mm == p->mm) |
285 | continue; | 301 | continue; |
286 | mm = oom_kill_task(c, message); | 302 | if (!oom_kill_task(c, message)) |
287 | if (mm) | 303 | return 0; |
288 | return mm; | ||
289 | } | 304 | } |
290 | return oom_kill_task(p, message); | 305 | return oom_kill_task(p, message); |
291 | } | 306 | } |
@@ -300,7 +315,6 @@ static struct mm_struct *oom_kill_process(struct task_struct *p, | |||
300 | */ | 315 | */ |
301 | void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | 316 | void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) |
302 | { | 317 | { |
303 | struct mm_struct *mm = NULL; | ||
304 | task_t *p; | 318 | task_t *p; |
305 | unsigned long points = 0; | 319 | unsigned long points = 0; |
306 | 320 | ||
@@ -320,12 +334,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | |||
320 | */ | 334 | */ |
321 | switch (constrained_alloc(zonelist, gfp_mask)) { | 335 | switch (constrained_alloc(zonelist, gfp_mask)) { |
322 | case CONSTRAINT_MEMORY_POLICY: | 336 | case CONSTRAINT_MEMORY_POLICY: |
323 | mm = oom_kill_process(current, points, | 337 | oom_kill_process(current, points, |
324 | "No available memory (MPOL_BIND)"); | 338 | "No available memory (MPOL_BIND)"); |
325 | break; | 339 | break; |
326 | 340 | ||
327 | case CONSTRAINT_CPUSET: | 341 | case CONSTRAINT_CPUSET: |
328 | mm = oom_kill_process(current, points, | 342 | oom_kill_process(current, points, |
329 | "No available memory in cpuset"); | 343 | "No available memory in cpuset"); |
330 | break; | 344 | break; |
331 | 345 | ||
@@ -347,8 +361,7 @@ retry: | |||
347 | panic("Out of memory and no killable processes...\n"); | 361 | panic("Out of memory and no killable processes...\n"); |
348 | } | 362 | } |
349 | 363 | ||
350 | mm = oom_kill_process(p, points, "Out of memory"); | 364 | if (oom_kill_process(p, points, "Out of memory")) |
351 | if (!mm) | ||
352 | goto retry; | 365 | goto retry; |
353 | 366 | ||
354 | break; | 367 | break; |
@@ -357,8 +370,6 @@ retry: | |||
357 | out: | 370 | out: |
358 | read_unlock(&tasklist_lock); | 371 | read_unlock(&tasklist_lock); |
359 | cpuset_unlock(); | 372 | cpuset_unlock(); |
360 | if (mm) | ||
361 | mmput(mm); | ||
362 | 373 | ||
363 | /* | 374 | /* |
364 | * Give "p" a good chance of killing itself before we | 375 | * Give "p" a good chance of killing itself before we |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6dcce3a4bbdc..75d7f48b79bb 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -72,13 +72,12 @@ int dirty_background_ratio = 10; | |||
72 | int vm_dirty_ratio = 40; | 72 | int vm_dirty_ratio = 40; |
73 | 73 | ||
74 | /* | 74 | /* |
75 | * The interval between `kupdate'-style writebacks, in centiseconds | 75 | * The interval between `kupdate'-style writebacks, in jiffies |
76 | * (hundredths of a second) | ||
77 | */ | 76 | */ |
78 | int dirty_writeback_interval = 5 * HZ; | 77 | int dirty_writeback_interval = 5 * HZ; |
79 | 78 | ||
80 | /* | 79 | /* |
81 | * The longest number of centiseconds for which data is allowed to remain dirty | 80 | * The longest number of jiffies for which data is allowed to remain dirty |
82 | */ | 81 | */ |
83 | int dirty_expire_interval = 30 * HZ; | 82 | int dirty_expire_interval = 30 * HZ; |
84 | 83 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dc523a1f270d..123c60586740 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; | |||
51 | EXPORT_SYMBOL(node_possible_map); | 51 | EXPORT_SYMBOL(node_possible_map); |
52 | unsigned long totalram_pages __read_mostly; | 52 | unsigned long totalram_pages __read_mostly; |
53 | unsigned long totalhigh_pages __read_mostly; | 53 | unsigned long totalhigh_pages __read_mostly; |
54 | unsigned long totalreserve_pages __read_mostly; | ||
54 | long nr_swap_pages; | 55 | long nr_swap_pages; |
55 | int percpu_pagelist_fraction; | 56 | int percpu_pagelist_fraction; |
56 | 57 | ||
@@ -151,7 +152,8 @@ static void bad_page(struct page *page) | |||
151 | 1 << PG_reclaim | | 152 | 1 << PG_reclaim | |
152 | 1 << PG_slab | | 153 | 1 << PG_slab | |
153 | 1 << PG_swapcache | | 154 | 1 << PG_swapcache | |
154 | 1 << PG_writeback ); | 155 | 1 << PG_writeback | |
156 | 1 << PG_buddy ); | ||
155 | set_page_count(page, 0); | 157 | set_page_count(page, 0); |
156 | reset_page_mapcount(page); | 158 | reset_page_mapcount(page); |
157 | page->mapping = NULL; | 159 | page->mapping = NULL; |
@@ -230,18 +232,20 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | |||
230 | * zone->lock is already acquired when we use these. | 232 | * zone->lock is already acquired when we use these. |
231 | * So, we don't need atomic page->flags operations here. | 233 | * So, we don't need atomic page->flags operations here. |
232 | */ | 234 | */ |
233 | static inline unsigned long page_order(struct page *page) { | 235 | static inline unsigned long page_order(struct page *page) |
236 | { | ||
234 | return page_private(page); | 237 | return page_private(page); |
235 | } | 238 | } |
236 | 239 | ||
237 | static inline void set_page_order(struct page *page, int order) { | 240 | static inline void set_page_order(struct page *page, int order) |
241 | { | ||
238 | set_page_private(page, order); | 242 | set_page_private(page, order); |
239 | __SetPagePrivate(page); | 243 | __SetPageBuddy(page); |
240 | } | 244 | } |
241 | 245 | ||
242 | static inline void rmv_page_order(struct page *page) | 246 | static inline void rmv_page_order(struct page *page) |
243 | { | 247 | { |
244 | __ClearPagePrivate(page); | 248 | __ClearPageBuddy(page); |
245 | set_page_private(page, 0); | 249 | set_page_private(page, 0); |
246 | } | 250 | } |
247 | 251 | ||
@@ -280,11 +284,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order) | |||
280 | * This function checks whether a page is free && is the buddy | 284 | * This function checks whether a page is free && is the buddy |
281 | * we can do coalesce a page and its buddy if | 285 | * we can do coalesce a page and its buddy if |
282 | * (a) the buddy is not in a hole && | 286 | * (a) the buddy is not in a hole && |
283 | * (b) the buddy is free && | 287 | * (b) the buddy is in the buddy system && |
284 | * (c) the buddy is on the buddy system && | 288 | * (c) a page and its buddy have the same order. |
285 | * (d) a page and its buddy have the same order. | ||
286 | * for recording page's order, we use page_private(page) and PG_private. | ||
287 | * | 289 | * |
290 | * For recording whether a page is in the buddy system, we use PG_buddy. | ||
291 | * Setting, clearing, and testing PG_buddy is serialized by zone->lock. | ||
292 | * | ||
293 | * For recording page's order, we use page_private(page). | ||
288 | */ | 294 | */ |
289 | static inline int page_is_buddy(struct page *page, int order) | 295 | static inline int page_is_buddy(struct page *page, int order) |
290 | { | 296 | { |
@@ -293,11 +299,11 @@ static inline int page_is_buddy(struct page *page, int order) | |||
293 | return 0; | 299 | return 0; |
294 | #endif | 300 | #endif |
295 | 301 | ||
296 | if (PagePrivate(page) && | 302 | if (PageBuddy(page) && page_order(page) == order) { |
297 | (page_order(page) == order) && | 303 | BUG_ON(page_count(page) != 0); |
298 | page_count(page) == 0) | 304 | return 1; |
299 | return 1; | 305 | } |
300 | return 0; | 306 | return 0; |
301 | } | 307 | } |
302 | 308 | ||
303 | /* | 309 | /* |
@@ -313,7 +319,7 @@ static inline int page_is_buddy(struct page *page, int order) | |||
313 | * as necessary, plus some accounting needed to play nicely with other | 319 | * as necessary, plus some accounting needed to play nicely with other |
314 | * parts of the VM system. | 320 | * parts of the VM system. |
315 | * At each level, we keep a list of pages, which are heads of continuous | 321 | * At each level, we keep a list of pages, which are heads of continuous |
316 | * free pages of length of (1 << order) and marked with PG_Private.Page's | 322 | * free pages of length of (1 << order) and marked with PG_buddy. Page's |
317 | * order is recorded in page_private(page) field. | 323 | * order is recorded in page_private(page) field. |
318 | * So when we are allocating or freeing one, we can derive the state of the | 324 | * So when we are allocating or freeing one, we can derive the state of the |
319 | * other. That is, if we allocate a small block, and both were | 325 | * other. That is, if we allocate a small block, and both were |
@@ -376,7 +382,8 @@ static inline int free_pages_check(struct page *page) | |||
376 | 1 << PG_slab | | 382 | 1 << PG_slab | |
377 | 1 << PG_swapcache | | 383 | 1 << PG_swapcache | |
378 | 1 << PG_writeback | | 384 | 1 << PG_writeback | |
379 | 1 << PG_reserved )))) | 385 | 1 << PG_reserved | |
386 | 1 << PG_buddy )))) | ||
380 | bad_page(page); | 387 | bad_page(page); |
381 | if (PageDirty(page)) | 388 | if (PageDirty(page)) |
382 | __ClearPageDirty(page); | 389 | __ClearPageDirty(page); |
@@ -524,7 +531,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | |||
524 | 1 << PG_slab | | 531 | 1 << PG_slab | |
525 | 1 << PG_swapcache | | 532 | 1 << PG_swapcache | |
526 | 1 << PG_writeback | | 533 | 1 << PG_writeback | |
527 | 1 << PG_reserved )))) | 534 | 1 << PG_reserved | |
535 | 1 << PG_buddy )))) | ||
528 | bad_page(page); | 536 | bad_page(page); |
529 | 537 | ||
530 | /* | 538 | /* |
@@ -2472,6 +2480,38 @@ void __init page_alloc_init(void) | |||
2472 | } | 2480 | } |
2473 | 2481 | ||
2474 | /* | 2482 | /* |
2483 | * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio | ||
2484 | * or min_free_kbytes changes. | ||
2485 | */ | ||
2486 | static void calculate_totalreserve_pages(void) | ||
2487 | { | ||
2488 | struct pglist_data *pgdat; | ||
2489 | unsigned long reserve_pages = 0; | ||
2490 | int i, j; | ||
2491 | |||
2492 | for_each_online_pgdat(pgdat) { | ||
2493 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
2494 | struct zone *zone = pgdat->node_zones + i; | ||
2495 | unsigned long max = 0; | ||
2496 | |||
2497 | /* Find valid and maximum lowmem_reserve in the zone */ | ||
2498 | for (j = i; j < MAX_NR_ZONES; j++) { | ||
2499 | if (zone->lowmem_reserve[j] > max) | ||
2500 | max = zone->lowmem_reserve[j]; | ||
2501 | } | ||
2502 | |||
2503 | /* we treat pages_high as reserved pages. */ | ||
2504 | max += zone->pages_high; | ||
2505 | |||
2506 | if (max > zone->present_pages) | ||
2507 | max = zone->present_pages; | ||
2508 | reserve_pages += max; | ||
2509 | } | ||
2510 | } | ||
2511 | totalreserve_pages = reserve_pages; | ||
2512 | } | ||
2513 | |||
2514 | /* | ||
2475 | * setup_per_zone_lowmem_reserve - called whenever | 2515 | * setup_per_zone_lowmem_reserve - called whenever |
2476 | * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone | 2516 | * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone |
2477 | * has a correct pages reserved value, so an adequate number of | 2517 | * has a correct pages reserved value, so an adequate number of |
@@ -2502,6 +2542,9 @@ static void setup_per_zone_lowmem_reserve(void) | |||
2502 | } | 2542 | } |
2503 | } | 2543 | } |
2504 | } | 2544 | } |
2545 | |||
2546 | /* update totalreserve_pages */ | ||
2547 | calculate_totalreserve_pages(); | ||
2505 | } | 2548 | } |
2506 | 2549 | ||
2507 | /* | 2550 | /* |
@@ -2556,6 +2599,9 @@ void setup_per_zone_pages_min(void) | |||
2556 | zone->pages_high = zone->pages_min + tmp / 2; | 2599 | zone->pages_high = zone->pages_min + tmp / 2; |
2557 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 2600 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
2558 | } | 2601 | } |
2602 | |||
2603 | /* update totalreserve_pages */ | ||
2604 | calculate_totalreserve_pages(); | ||
2559 | } | 2605 | } |
2560 | 2606 | ||
2561 | /* | 2607 | /* |
@@ -420,6 +420,7 @@ struct kmem_cache { | |||
420 | unsigned long max_freeable; | 420 | unsigned long max_freeable; |
421 | unsigned long node_allocs; | 421 | unsigned long node_allocs; |
422 | unsigned long node_frees; | 422 | unsigned long node_frees; |
423 | unsigned long node_overflow; | ||
423 | atomic_t allochit; | 424 | atomic_t allochit; |
424 | atomic_t allocmiss; | 425 | atomic_t allocmiss; |
425 | atomic_t freehit; | 426 | atomic_t freehit; |
@@ -465,6 +466,7 @@ struct kmem_cache { | |||
465 | #define STATS_INC_ERR(x) ((x)->errors++) | 466 | #define STATS_INC_ERR(x) ((x)->errors++) |
466 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) | 467 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) |
467 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) | 468 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) |
469 | #define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++) | ||
468 | #define STATS_SET_FREEABLE(x, i) \ | 470 | #define STATS_SET_FREEABLE(x, i) \ |
469 | do { \ | 471 | do { \ |
470 | if ((x)->max_freeable < i) \ | 472 | if ((x)->max_freeable < i) \ |
@@ -484,6 +486,7 @@ struct kmem_cache { | |||
484 | #define STATS_INC_ERR(x) do { } while (0) | 486 | #define STATS_INC_ERR(x) do { } while (0) |
485 | #define STATS_INC_NODEALLOCS(x) do { } while (0) | 487 | #define STATS_INC_NODEALLOCS(x) do { } while (0) |
486 | #define STATS_INC_NODEFREES(x) do { } while (0) | 488 | #define STATS_INC_NODEFREES(x) do { } while (0) |
489 | #define STATS_INC_ACOVERFLOW(x) do { } while (0) | ||
487 | #define STATS_SET_FREEABLE(x, i) do { } while (0) | 490 | #define STATS_SET_FREEABLE(x, i) do { } while (0) |
488 | #define STATS_INC_ALLOCHIT(x) do { } while (0) | 491 | #define STATS_INC_ALLOCHIT(x) do { } while (0) |
489 | #define STATS_INC_ALLOCMISS(x) do { } while (0) | 492 | #define STATS_INC_ALLOCMISS(x) do { } while (0) |
@@ -1453,7 +1456,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1453 | int i; | 1456 | int i; |
1454 | 1457 | ||
1455 | flags |= cachep->gfpflags; | 1458 | flags |= cachep->gfpflags; |
1459 | #ifndef CONFIG_MMU | ||
1460 | /* nommu uses slab's for process anonymous memory allocations, so | ||
1461 | * requires __GFP_COMP to properly refcount higher order allocations" | ||
1462 | */ | ||
1463 | page = alloc_pages_node(nodeid, (flags | __GFP_COMP), cachep->gfporder); | ||
1464 | #else | ||
1456 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); | 1465 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); |
1466 | #endif | ||
1457 | if (!page) | 1467 | if (!page) |
1458 | return NULL; | 1468 | return NULL; |
1459 | addr = page_address(page); | 1469 | addr = page_address(page); |
@@ -2318,13 +2328,15 @@ EXPORT_SYMBOL(kmem_cache_destroy); | |||
2318 | 2328 | ||
2319 | /* Get the memory for a slab management obj. */ | 2329 | /* Get the memory for a slab management obj. */ |
2320 | static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | 2330 | static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, |
2321 | int colour_off, gfp_t local_flags) | 2331 | int colour_off, gfp_t local_flags, |
2332 | int nodeid) | ||
2322 | { | 2333 | { |
2323 | struct slab *slabp; | 2334 | struct slab *slabp; |
2324 | 2335 | ||
2325 | if (OFF_SLAB(cachep)) { | 2336 | if (OFF_SLAB(cachep)) { |
2326 | /* Slab management obj is off-slab. */ | 2337 | /* Slab management obj is off-slab. */ |
2327 | slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags); | 2338 | slabp = kmem_cache_alloc_node(cachep->slabp_cache, |
2339 | local_flags, nodeid); | ||
2328 | if (!slabp) | 2340 | if (!slabp) |
2329 | return NULL; | 2341 | return NULL; |
2330 | } else { | 2342 | } else { |
@@ -2334,6 +2346,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | |||
2334 | slabp->inuse = 0; | 2346 | slabp->inuse = 0; |
2335 | slabp->colouroff = colour_off; | 2347 | slabp->colouroff = colour_off; |
2336 | slabp->s_mem = objp + colour_off; | 2348 | slabp->s_mem = objp + colour_off; |
2349 | slabp->nodeid = nodeid; | ||
2337 | return slabp; | 2350 | return slabp; |
2338 | } | 2351 | } |
2339 | 2352 | ||
@@ -2519,7 +2532,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2519 | goto failed; | 2532 | goto failed; |
2520 | 2533 | ||
2521 | /* Get slab management. */ | 2534 | /* Get slab management. */ |
2522 | slabp = alloc_slabmgmt(cachep, objp, offset, local_flags); | 2535 | slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid); |
2523 | if (!slabp) | 2536 | if (!slabp) |
2524 | goto opps1; | 2537 | goto opps1; |
2525 | 2538 | ||
@@ -3080,9 +3093,11 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
3080 | if (l3->alien && l3->alien[nodeid]) { | 3093 | if (l3->alien && l3->alien[nodeid]) { |
3081 | alien = l3->alien[nodeid]; | 3094 | alien = l3->alien[nodeid]; |
3082 | spin_lock(&alien->lock); | 3095 | spin_lock(&alien->lock); |
3083 | if (unlikely(alien->avail == alien->limit)) | 3096 | if (unlikely(alien->avail == alien->limit)) { |
3097 | STATS_INC_ACOVERFLOW(cachep); | ||
3084 | __drain_alien_cache(cachep, | 3098 | __drain_alien_cache(cachep, |
3085 | alien, nodeid); | 3099 | alien, nodeid); |
3100 | } | ||
3086 | alien->entry[alien->avail++] = objp; | 3101 | alien->entry[alien->avail++] = objp; |
3087 | spin_unlock(&alien->lock); | 3102 | spin_unlock(&alien->lock); |
3088 | } else { | 3103 | } else { |
@@ -3760,7 +3775,7 @@ static void print_slabinfo_header(struct seq_file *m) | |||
3760 | seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); | 3775 | seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); |
3761 | #if STATS | 3776 | #if STATS |
3762 | seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " | 3777 | seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " |
3763 | "<error> <maxfreeable> <nodeallocs> <remotefrees>"); | 3778 | "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); |
3764 | seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); | 3779 | seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); |
3765 | #endif | 3780 | #endif |
3766 | seq_putc(m, '\n'); | 3781 | seq_putc(m, '\n'); |
@@ -3874,11 +3889,12 @@ static int s_show(struct seq_file *m, void *p) | |||
3874 | unsigned long max_freeable = cachep->max_freeable; | 3889 | unsigned long max_freeable = cachep->max_freeable; |
3875 | unsigned long node_allocs = cachep->node_allocs; | 3890 | unsigned long node_allocs = cachep->node_allocs; |
3876 | unsigned long node_frees = cachep->node_frees; | 3891 | unsigned long node_frees = cachep->node_frees; |
3892 | unsigned long overflows = cachep->node_overflow; | ||
3877 | 3893 | ||
3878 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ | 3894 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ |
3879 | %4lu %4lu %4lu %4lu", allocs, high, grown, | 3895 | %4lu %4lu %4lu %4lu %4lu", allocs, high, grown, |
3880 | reaped, errors, max_freeable, node_allocs, | 3896 | reaped, errors, max_freeable, node_allocs, |
3881 | node_frees); | 3897 | node_frees, overflows); |
3882 | } | 3898 | } |
3883 | /* cpu stats */ | 3899 | /* cpu stats */ |
3884 | { | 3900 | { |
@@ -354,9 +354,7 @@ void *__alloc_percpu(size_t size) | |||
354 | if (!pdata) | 354 | if (!pdata) |
355 | return NULL; | 355 | return NULL; |
356 | 356 | ||
357 | for (i = 0; i < NR_CPUS; i++) { | 357 | for_each_possible_cpu(i) { |
358 | if (!cpu_possible(i)) | ||
359 | continue; | ||
360 | pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); | 358 | pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); |
361 | if (!pdata->ptrs[i]) | 359 | if (!pdata->ptrs[i]) |
362 | goto unwind_oom; | 360 | goto unwind_oom; |
@@ -383,11 +381,9 @@ free_percpu(const void *objp) | |||
383 | int i; | 381 | int i; |
384 | struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp); | 382 | struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp); |
385 | 383 | ||
386 | for (i = 0; i < NR_CPUS; i++) { | 384 | for_each_possible_cpu(i) |
387 | if (!cpu_possible(i)) | ||
388 | continue; | ||
389 | kfree(p->ptrs[i]); | 385 | kfree(p->ptrs[i]); |
390 | } | 386 | |
391 | kfree(p); | 387 | kfree(p); |
392 | } | 388 | } |
393 | EXPORT_SYMBOL(free_percpu); | 389 | EXPORT_SYMBOL(free_percpu); |