diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-09-14 11:26:53 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-09-14 11:26:53 -0400 |
commit | f81b691a3df09806385ea413c3a2ee094c705ca3 (patch) | |
tree | 01c0d6d319fcbddc98171d06cfe8e742cd270455 /mm | |
parent | 110e0358e7dfd9cc56d47077068f3680dae10b56 (diff) | |
parent | adee14b2e1557d0a8559f29681732d05a89dfc35 (diff) |
Merge commit 'v2.6.27-rc6' into x86/pat
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 35 | ||||
-rw-r--r-- | mm/filemap.c | 11 | ||||
-rw-r--r-- | mm/filemap_xip.c | 65 | ||||
-rw-r--r-- | mm/mm_init.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 9 | ||||
-rw-r--r-- | mm/page_isolation.c | 1 | ||||
-rw-r--r-- | mm/quicklist.c | 9 | ||||
-rw-r--r-- | mm/rmap.c | 39 | ||||
-rw-r--r-- | mm/slub.c | 4 | ||||
-rw-r--r-- | mm/swap_state.c | 2 | ||||
-rw-r--r-- | mm/truncate.c | 4 | ||||
-rw-r--r-- | mm/vmstat.c | 19 |
13 files changed, 157 insertions, 47 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index e023c68b0255..ad8eec6e44a8 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -405,6 +405,29 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size, | |||
405 | } | 405 | } |
406 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ | 406 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ |
407 | 407 | ||
408 | static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx, | ||
409 | unsigned long step) | ||
410 | { | ||
411 | unsigned long base = bdata->node_min_pfn; | ||
412 | |||
413 | /* | ||
414 | * Align the index with respect to the node start so that the | ||
415 | * combination of both satisfies the requested alignment. | ||
416 | */ | ||
417 | |||
418 | return ALIGN(base + idx, step) - base; | ||
419 | } | ||
420 | |||
421 | static unsigned long align_off(struct bootmem_data *bdata, unsigned long off, | ||
422 | unsigned long align) | ||
423 | { | ||
424 | unsigned long base = PFN_PHYS(bdata->node_min_pfn); | ||
425 | |||
426 | /* Same as align_idx for byte offsets */ | ||
427 | |||
428 | return ALIGN(base + off, align) - base; | ||
429 | } | ||
430 | |||
408 | static void * __init alloc_bootmem_core(struct bootmem_data *bdata, | 431 | static void * __init alloc_bootmem_core(struct bootmem_data *bdata, |
409 | unsigned long size, unsigned long align, | 432 | unsigned long size, unsigned long align, |
410 | unsigned long goal, unsigned long limit) | 433 | unsigned long goal, unsigned long limit) |
@@ -441,7 +464,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, | |||
441 | else | 464 | else |
442 | start = ALIGN(min, step); | 465 | start = ALIGN(min, step); |
443 | 466 | ||
444 | sidx = start - bdata->node_min_pfn;; | 467 | sidx = start - bdata->node_min_pfn; |
445 | midx = max - bdata->node_min_pfn; | 468 | midx = max - bdata->node_min_pfn; |
446 | 469 | ||
447 | if (bdata->hint_idx > sidx) { | 470 | if (bdata->hint_idx > sidx) { |
@@ -450,7 +473,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, | |||
450 | * catch the fallback below. | 473 | * catch the fallback below. |
451 | */ | 474 | */ |
452 | fallback = sidx + 1; | 475 | fallback = sidx + 1; |
453 | sidx = ALIGN(bdata->hint_idx, step); | 476 | sidx = align_idx(bdata, bdata->hint_idx, step); |
454 | } | 477 | } |
455 | 478 | ||
456 | while (1) { | 479 | while (1) { |
@@ -459,7 +482,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, | |||
459 | unsigned long eidx, i, start_off, end_off; | 482 | unsigned long eidx, i, start_off, end_off; |
460 | find_block: | 483 | find_block: |
461 | sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); | 484 | sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); |
462 | sidx = ALIGN(sidx, step); | 485 | sidx = align_idx(bdata, sidx, step); |
463 | eidx = sidx + PFN_UP(size); | 486 | eidx = sidx + PFN_UP(size); |
464 | 487 | ||
465 | if (sidx >= midx || eidx > midx) | 488 | if (sidx >= midx || eidx > midx) |
@@ -467,7 +490,7 @@ find_block: | |||
467 | 490 | ||
468 | for (i = sidx; i < eidx; i++) | 491 | for (i = sidx; i < eidx; i++) |
469 | if (test_bit(i, bdata->node_bootmem_map)) { | 492 | if (test_bit(i, bdata->node_bootmem_map)) { |
470 | sidx = ALIGN(i, step); | 493 | sidx = align_idx(bdata, i, step); |
471 | if (sidx == i) | 494 | if (sidx == i) |
472 | sidx += step; | 495 | sidx += step; |
473 | goto find_block; | 496 | goto find_block; |
@@ -475,7 +498,7 @@ find_block: | |||
475 | 498 | ||
476 | if (bdata->last_end_off & (PAGE_SIZE - 1) && | 499 | if (bdata->last_end_off & (PAGE_SIZE - 1) && |
477 | PFN_DOWN(bdata->last_end_off) + 1 == sidx) | 500 | PFN_DOWN(bdata->last_end_off) + 1 == sidx) |
478 | start_off = ALIGN(bdata->last_end_off, align); | 501 | start_off = align_off(bdata, bdata->last_end_off, align); |
479 | else | 502 | else |
480 | start_off = PFN_PHYS(sidx); | 503 | start_off = PFN_PHYS(sidx); |
481 | 504 | ||
@@ -499,7 +522,7 @@ find_block: | |||
499 | } | 522 | } |
500 | 523 | ||
501 | if (fallback) { | 524 | if (fallback) { |
502 | sidx = ALIGN(fallback - 1, step); | 525 | sidx = align_idx(bdata, fallback - 1, step); |
503 | fallback = 0; | 526 | fallback = 0; |
504 | goto find_block; | 527 | goto find_block; |
505 | } | 528 | } |
diff --git a/mm/filemap.c b/mm/filemap.c index 54e968650855..876bc595d0f8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2129,13 +2129,20 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
2129 | * After a write we want buffered reads to be sure to go to disk to get | 2129 | * After a write we want buffered reads to be sure to go to disk to get |
2130 | * the new data. We invalidate clean cached page from the region we're | 2130 | * the new data. We invalidate clean cached page from the region we're |
2131 | * about to write. We do this *before* the write so that we can return | 2131 | * about to write. We do this *before* the write so that we can return |
2132 | * -EIO without clobbering -EIOCBQUEUED from ->direct_IO(). | 2132 | * without clobbering -EIOCBQUEUED from ->direct_IO(). |
2133 | */ | 2133 | */ |
2134 | if (mapping->nrpages) { | 2134 | if (mapping->nrpages) { |
2135 | written = invalidate_inode_pages2_range(mapping, | 2135 | written = invalidate_inode_pages2_range(mapping, |
2136 | pos >> PAGE_CACHE_SHIFT, end); | 2136 | pos >> PAGE_CACHE_SHIFT, end); |
2137 | if (written) | 2137 | /* |
2138 | * If a page can not be invalidated, return 0 to fall back | ||
2139 | * to buffered write. | ||
2140 | */ | ||
2141 | if (written) { | ||
2142 | if (written == -EBUSY) | ||
2143 | return 0; | ||
2138 | goto out; | 2144 | goto out; |
2145 | } | ||
2139 | } | 2146 | } |
2140 | 2147 | ||
2141 | written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs); | 2148 | written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs); |
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 380ab402d711..b5167dfb2f2d 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <linux/rmap.h> | 15 | #include <linux/rmap.h> |
16 | #include <linux/mmu_notifier.h> | 16 | #include <linux/mmu_notifier.h> |
17 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
18 | #include <linux/seqlock.h> | ||
19 | #include <linux/mutex.h> | ||
18 | #include <asm/tlbflush.h> | 20 | #include <asm/tlbflush.h> |
19 | #include <asm/io.h> | 21 | #include <asm/io.h> |
20 | 22 | ||
@@ -22,22 +24,18 @@ | |||
22 | * We do use our own empty page to avoid interference with other users | 24 | * We do use our own empty page to avoid interference with other users |
23 | * of ZERO_PAGE(), such as /dev/zero | 25 | * of ZERO_PAGE(), such as /dev/zero |
24 | */ | 26 | */ |
27 | static DEFINE_MUTEX(xip_sparse_mutex); | ||
28 | static seqcount_t xip_sparse_seq = SEQCNT_ZERO; | ||
25 | static struct page *__xip_sparse_page; | 29 | static struct page *__xip_sparse_page; |
26 | 30 | ||
31 | /* called under xip_sparse_mutex */ | ||
27 | static struct page *xip_sparse_page(void) | 32 | static struct page *xip_sparse_page(void) |
28 | { | 33 | { |
29 | if (!__xip_sparse_page) { | 34 | if (!__xip_sparse_page) { |
30 | struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); | 35 | struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); |
31 | 36 | ||
32 | if (page) { | 37 | if (page) |
33 | static DEFINE_SPINLOCK(xip_alloc_lock); | 38 | __xip_sparse_page = page; |
34 | spin_lock(&xip_alloc_lock); | ||
35 | if (!__xip_sparse_page) | ||
36 | __xip_sparse_page = page; | ||
37 | else | ||
38 | __free_page(page); | ||
39 | spin_unlock(&xip_alloc_lock); | ||
40 | } | ||
41 | } | 39 | } |
42 | return __xip_sparse_page; | 40 | return __xip_sparse_page; |
43 | } | 41 | } |
@@ -174,18 +172,23 @@ __xip_unmap (struct address_space * mapping, | |||
174 | pte_t pteval; | 172 | pte_t pteval; |
175 | spinlock_t *ptl; | 173 | spinlock_t *ptl; |
176 | struct page *page; | 174 | struct page *page; |
175 | unsigned count; | ||
176 | int locked = 0; | ||
177 | |||
178 | count = read_seqcount_begin(&xip_sparse_seq); | ||
177 | 179 | ||
178 | page = __xip_sparse_page; | 180 | page = __xip_sparse_page; |
179 | if (!page) | 181 | if (!page) |
180 | return; | 182 | return; |
181 | 183 | ||
184 | retry: | ||
182 | spin_lock(&mapping->i_mmap_lock); | 185 | spin_lock(&mapping->i_mmap_lock); |
183 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 186 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
184 | mm = vma->vm_mm; | 187 | mm = vma->vm_mm; |
185 | address = vma->vm_start + | 188 | address = vma->vm_start + |
186 | ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | 189 | ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); |
187 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); | 190 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); |
188 | pte = page_check_address(page, mm, address, &ptl); | 191 | pte = page_check_address(page, mm, address, &ptl, 1); |
189 | if (pte) { | 192 | if (pte) { |
190 | /* Nuke the page table entry. */ | 193 | /* Nuke the page table entry. */ |
191 | flush_cache_page(vma, address, pte_pfn(*pte)); | 194 | flush_cache_page(vma, address, pte_pfn(*pte)); |
@@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapping, | |||
198 | } | 201 | } |
199 | } | 202 | } |
200 | spin_unlock(&mapping->i_mmap_lock); | 203 | spin_unlock(&mapping->i_mmap_lock); |
204 | |||
205 | if (locked) { | ||
206 | mutex_unlock(&xip_sparse_mutex); | ||
207 | } else if (read_seqcount_retry(&xip_sparse_seq, count)) { | ||
208 | mutex_lock(&xip_sparse_mutex); | ||
209 | locked = 1; | ||
210 | goto retry; | ||
211 | } | ||
201 | } | 212 | } |
202 | 213 | ||
203 | /* | 214 | /* |
@@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
218 | int error; | 229 | int error; |
219 | 230 | ||
220 | /* XXX: are VM_FAULT_ codes OK? */ | 231 | /* XXX: are VM_FAULT_ codes OK? */ |
221 | 232 | again: | |
222 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 233 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
223 | if (vmf->pgoff >= size) | 234 | if (vmf->pgoff >= size) |
224 | return VM_FAULT_SIGBUS; | 235 | return VM_FAULT_SIGBUS; |
@@ -237,8 +248,10 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
237 | int err; | 248 | int err; |
238 | 249 | ||
239 | /* maybe shared writable, allocate new block */ | 250 | /* maybe shared writable, allocate new block */ |
251 | mutex_lock(&xip_sparse_mutex); | ||
240 | error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1, | 252 | error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1, |
241 | &xip_mem, &xip_pfn); | 253 | &xip_mem, &xip_pfn); |
254 | mutex_unlock(&xip_sparse_mutex); | ||
242 | if (error) | 255 | if (error) |
243 | return VM_FAULT_SIGBUS; | 256 | return VM_FAULT_SIGBUS; |
244 | /* unmap sparse mappings at pgoff from all other vmas */ | 257 | /* unmap sparse mappings at pgoff from all other vmas */ |
@@ -252,14 +265,34 @@ found: | |||
252 | BUG_ON(err); | 265 | BUG_ON(err); |
253 | return VM_FAULT_NOPAGE; | 266 | return VM_FAULT_NOPAGE; |
254 | } else { | 267 | } else { |
268 | int err, ret = VM_FAULT_OOM; | ||
269 | |||
270 | mutex_lock(&xip_sparse_mutex); | ||
271 | write_seqcount_begin(&xip_sparse_seq); | ||
272 | error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0, | ||
273 | &xip_mem, &xip_pfn); | ||
274 | if (unlikely(!error)) { | ||
275 | write_seqcount_end(&xip_sparse_seq); | ||
276 | mutex_unlock(&xip_sparse_mutex); | ||
277 | goto again; | ||
278 | } | ||
279 | if (error != -ENODATA) | ||
280 | goto out; | ||
255 | /* not shared and writable, use xip_sparse_page() */ | 281 | /* not shared and writable, use xip_sparse_page() */ |
256 | page = xip_sparse_page(); | 282 | page = xip_sparse_page(); |
257 | if (!page) | 283 | if (!page) |
258 | return VM_FAULT_OOM; | 284 | goto out; |
285 | err = vm_insert_page(vma, (unsigned long)vmf->virtual_address, | ||
286 | page); | ||
287 | if (err == -ENOMEM) | ||
288 | goto out; | ||
259 | 289 | ||
260 | page_cache_get(page); | 290 | ret = VM_FAULT_NOPAGE; |
261 | vmf->page = page; | 291 | out: |
262 | return 0; | 292 | write_seqcount_end(&xip_sparse_seq); |
293 | mutex_unlock(&xip_sparse_mutex); | ||
294 | |||
295 | return ret; | ||
263 | } | 296 | } |
264 | } | 297 | } |
265 | 298 | ||
@@ -308,8 +341,10 @@ __xip_file_write(struct file *filp, const char __user *buf, | |||
308 | &xip_mem, &xip_pfn); | 341 | &xip_mem, &xip_pfn); |
309 | if (status == -ENODATA) { | 342 | if (status == -ENODATA) { |
310 | /* we allocate a new page unmap it */ | 343 | /* we allocate a new page unmap it */ |
344 | mutex_lock(&xip_sparse_mutex); | ||
311 | status = a_ops->get_xip_mem(mapping, index, 1, | 345 | status = a_ops->get_xip_mem(mapping, index, 1, |
312 | &xip_mem, &xip_pfn); | 346 | &xip_mem, &xip_pfn); |
347 | mutex_unlock(&xip_sparse_mutex); | ||
313 | if (!status) | 348 | if (!status) |
314 | /* unmap page at pgoff from all other vmas */ | 349 | /* unmap page at pgoff from all other vmas */ |
315 | __xip_unmap(mapping, index); | 350 | __xip_unmap(mapping, index); |
diff --git a/mm/mm_init.c b/mm/mm_init.c index 936ef2efd892..4e0e26591dfa 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include "internal.h" | 12 | #include "internal.h" |
13 | 13 | ||
14 | #ifdef CONFIG_DEBUG_MEMORY_INIT | 14 | #ifdef CONFIG_DEBUG_MEMORY_INIT |
15 | int __meminitdata mminit_loglevel; | 15 | int mminit_loglevel; |
16 | 16 | ||
17 | #ifndef SECTIONS_SHIFT | 17 | #ifndef SECTIONS_SHIFT |
18 | #define SECTIONS_SHIFT 0 | 18 | #define SECTIONS_SHIFT 0 |
@@ -1030,6 +1030,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, | |||
1030 | } else { | 1030 | } else { |
1031 | switch (flags & MAP_TYPE) { | 1031 | switch (flags & MAP_TYPE) { |
1032 | case MAP_SHARED: | 1032 | case MAP_SHARED: |
1033 | /* | ||
1034 | * Ignore pgoff. | ||
1035 | */ | ||
1036 | pgoff = 0; | ||
1033 | vm_flags |= VM_SHARED | VM_MAYSHARE; | 1037 | vm_flags |= VM_SHARED | VM_MAYSHARE; |
1034 | break; | 1038 | break; |
1035 | case MAP_PRIVATE: | 1039 | case MAP_PRIVATE: |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index af982f7cdb2a..e293c58bea58 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -694,6 +694,9 @@ static int move_freepages(struct zone *zone, | |||
694 | #endif | 694 | #endif |
695 | 695 | ||
696 | for (page = start_page; page <= end_page;) { | 696 | for (page = start_page; page <= end_page;) { |
697 | /* Make sure we are not inadvertently changing nodes */ | ||
698 | VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone)); | ||
699 | |||
697 | if (!pfn_valid_within(page_to_pfn(page))) { | 700 | if (!pfn_valid_within(page_to_pfn(page))) { |
698 | page++; | 701 | page++; |
699 | continue; | 702 | continue; |
@@ -2516,6 +2519,10 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
2516 | continue; | 2519 | continue; |
2517 | page = pfn_to_page(pfn); | 2520 | page = pfn_to_page(pfn); |
2518 | 2521 | ||
2522 | /* Watch out for overlapping nodes */ | ||
2523 | if (page_to_nid(page) != zone_to_nid(zone)) | ||
2524 | continue; | ||
2525 | |||
2519 | /* Blocks with reserved pages will never free, skip them. */ | 2526 | /* Blocks with reserved pages will never free, skip them. */ |
2520 | if (PageReserved(page)) | 2527 | if (PageReserved(page)) |
2521 | continue; | 2528 | continue; |
@@ -4064,7 +4071,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) | |||
4064 | } | 4071 | } |
4065 | 4072 | ||
4066 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 4073 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
4067 | struct pglist_data contig_page_data = { .bdata = &bootmem_node_data[0] }; | 4074 | struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] }; |
4068 | EXPORT_SYMBOL(contig_page_data); | 4075 | EXPORT_SYMBOL(contig_page_data); |
4069 | #endif | 4076 | #endif |
4070 | 4077 | ||
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 3444b58033c8..c69f84fe038d 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -2,7 +2,6 @@ | |||
2 | * linux/mm/page_isolation.c | 2 | * linux/mm/page_isolation.c |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <stddef.h> | ||
6 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
7 | #include <linux/page-isolation.h> | 6 | #include <linux/page-isolation.h> |
8 | #include <linux/pageblock-flags.h> | 7 | #include <linux/pageblock-flags.h> |
diff --git a/mm/quicklist.c b/mm/quicklist.c index 3f703f7cb398..8dbb6805ef35 100644 --- a/mm/quicklist.c +++ b/mm/quicklist.c | |||
@@ -26,7 +26,10 @@ DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; | |||
26 | static unsigned long max_pages(unsigned long min_pages) | 26 | static unsigned long max_pages(unsigned long min_pages) |
27 | { | 27 | { |
28 | unsigned long node_free_pages, max; | 28 | unsigned long node_free_pages, max; |
29 | struct zone *zones = NODE_DATA(numa_node_id())->node_zones; | 29 | int node = numa_node_id(); |
30 | struct zone *zones = NODE_DATA(node)->node_zones; | ||
31 | int num_cpus_on_node; | ||
32 | node_to_cpumask_ptr(cpumask_on_node, node); | ||
30 | 33 | ||
31 | node_free_pages = | 34 | node_free_pages = |
32 | #ifdef CONFIG_ZONE_DMA | 35 | #ifdef CONFIG_ZONE_DMA |
@@ -38,6 +41,10 @@ static unsigned long max_pages(unsigned long min_pages) | |||
38 | zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES); | 41 | zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES); |
39 | 42 | ||
40 | max = node_free_pages / FRACTION_OF_NODE_MEM; | 43 | max = node_free_pages / FRACTION_OF_NODE_MEM; |
44 | |||
45 | num_cpus_on_node = cpus_weight_nr(*cpumask_on_node); | ||
46 | max /= num_cpus_on_node; | ||
47 | |||
41 | return max(max, min_pages); | 48 | return max(max, min_pages); |
42 | } | 49 | } |
43 | 50 | ||
@@ -224,10 +224,14 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | |||
224 | /* | 224 | /* |
225 | * Check that @page is mapped at @address into @mm. | 225 | * Check that @page is mapped at @address into @mm. |
226 | * | 226 | * |
227 | * If @sync is false, page_check_address may perform a racy check to avoid | ||
228 | * the page table lock when the pte is not present (helpful when reclaiming | ||
229 | * highly shared pages). | ||
230 | * | ||
227 | * On success returns with pte mapped and locked. | 231 | * On success returns with pte mapped and locked. |
228 | */ | 232 | */ |
229 | pte_t *page_check_address(struct page *page, struct mm_struct *mm, | 233 | pte_t *page_check_address(struct page *page, struct mm_struct *mm, |
230 | unsigned long address, spinlock_t **ptlp) | 234 | unsigned long address, spinlock_t **ptlp, int sync) |
231 | { | 235 | { |
232 | pgd_t *pgd; | 236 | pgd_t *pgd; |
233 | pud_t *pud; | 237 | pud_t *pud; |
@@ -249,7 +253,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, | |||
249 | 253 | ||
250 | pte = pte_offset_map(pmd, address); | 254 | pte = pte_offset_map(pmd, address); |
251 | /* Make a quick check before getting the lock */ | 255 | /* Make a quick check before getting the lock */ |
252 | if (!pte_present(*pte)) { | 256 | if (!sync && !pte_present(*pte)) { |
253 | pte_unmap(pte); | 257 | pte_unmap(pte); |
254 | return NULL; | 258 | return NULL; |
255 | } | 259 | } |
@@ -281,7 +285,7 @@ static int page_referenced_one(struct page *page, | |||
281 | if (address == -EFAULT) | 285 | if (address == -EFAULT) |
282 | goto out; | 286 | goto out; |
283 | 287 | ||
284 | pte = page_check_address(page, mm, address, &ptl); | 288 | pte = page_check_address(page, mm, address, &ptl, 0); |
285 | if (!pte) | 289 | if (!pte) |
286 | goto out; | 290 | goto out; |
287 | 291 | ||
@@ -450,7 +454,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) | |||
450 | if (address == -EFAULT) | 454 | if (address == -EFAULT) |
451 | goto out; | 455 | goto out; |
452 | 456 | ||
453 | pte = page_check_address(page, mm, address, &ptl); | 457 | pte = page_check_address(page, mm, address, &ptl, 1); |
454 | if (!pte) | 458 | if (!pte) |
455 | goto out; | 459 | goto out; |
456 | 460 | ||
@@ -659,23 +663,30 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) | |||
659 | } | 663 | } |
660 | 664 | ||
661 | /* | 665 | /* |
662 | * It would be tidy to reset the PageAnon mapping here, | 666 | * Now that the last pte has gone, s390 must transfer dirty |
663 | * but that might overwrite a racing page_add_anon_rmap | 667 | * flag from storage key to struct page. We can usually skip |
664 | * which increments mapcount after us but sets mapping | 668 | * this if the page is anon, so about to be freed; but perhaps |
665 | * before us: so leave the reset to free_hot_cold_page, | 669 | * not if it's in swapcache - there might be another pte slot |
666 | * and remember that it's only reliable while mapped. | 670 | * containing the swap entry, but page not yet written to swap. |
667 | * Leaving it set also helps swapoff to reinstate ptes | ||
668 | * faster for those pages still in swapcache. | ||
669 | */ | 671 | */ |
670 | if ((!PageAnon(page) || PageSwapCache(page)) && | 672 | if ((!PageAnon(page) || PageSwapCache(page)) && |
671 | page_test_dirty(page)) { | 673 | page_test_dirty(page)) { |
672 | page_clear_dirty(page); | 674 | page_clear_dirty(page); |
673 | set_page_dirty(page); | 675 | set_page_dirty(page); |
674 | } | 676 | } |
675 | mem_cgroup_uncharge_page(page); | ||
676 | 677 | ||
678 | mem_cgroup_uncharge_page(page); | ||
677 | __dec_zone_page_state(page, | 679 | __dec_zone_page_state(page, |
678 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); | 680 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); |
681 | /* | ||
682 | * It would be tidy to reset the PageAnon mapping here, | ||
683 | * but that might overwrite a racing page_add_anon_rmap | ||
684 | * which increments mapcount after us but sets mapping | ||
685 | * before us: so leave the reset to free_hot_cold_page, | ||
686 | * and remember that it's only reliable while mapped. | ||
687 | * Leaving it set also helps swapoff to reinstate ptes | ||
688 | * faster for those pages still in swapcache. | ||
689 | */ | ||
679 | } | 690 | } |
680 | } | 691 | } |
681 | 692 | ||
@@ -697,7 +708,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
697 | if (address == -EFAULT) | 708 | if (address == -EFAULT) |
698 | goto out; | 709 | goto out; |
699 | 710 | ||
700 | pte = page_check_address(page, mm, address, &ptl); | 711 | pte = page_check_address(page, mm, address, &ptl, 0); |
701 | if (!pte) | 712 | if (!pte) |
702 | goto out; | 713 | goto out; |
703 | 714 | ||
@@ -2312,7 +2312,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | |||
2312 | 2312 | ||
2313 | s->refcount = 1; | 2313 | s->refcount = 1; |
2314 | #ifdef CONFIG_NUMA | 2314 | #ifdef CONFIG_NUMA |
2315 | s->remote_node_defrag_ratio = 100; | 2315 | s->remote_node_defrag_ratio = 1000; |
2316 | #endif | 2316 | #endif |
2317 | if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) | 2317 | if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) |
2318 | goto error; | 2318 | goto error; |
@@ -4058,7 +4058,7 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, | |||
4058 | if (err) | 4058 | if (err) |
4059 | return err; | 4059 | return err; |
4060 | 4060 | ||
4061 | if (ratio < 100) | 4061 | if (ratio <= 100) |
4062 | s->remote_node_defrag_ratio = ratio * 10; | 4062 | s->remote_node_defrag_ratio = ratio * 10; |
4063 | 4063 | ||
4064 | return length; | 4064 | return length; |
diff --git a/mm/swap_state.c b/mm/swap_state.c index 167cf2dc8a03..797c3831cbec 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -60,7 +60,7 @@ void show_swap_cache_info(void) | |||
60 | printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", | 60 | printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", |
61 | swap_cache_info.add_total, swap_cache_info.del_total, | 61 | swap_cache_info.add_total, swap_cache_info.del_total, |
62 | swap_cache_info.find_success, swap_cache_info.find_total); | 62 | swap_cache_info.find_success, swap_cache_info.find_total); |
63 | printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); | 63 | printk("Free swap = %ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10)); |
64 | printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); | 64 | printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); |
65 | } | 65 | } |
66 | 66 | ||
diff --git a/mm/truncate.c b/mm/truncate.c index 250505091d37..6650c1d878b4 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -380,7 +380,7 @@ static int do_launder_page(struct address_space *mapping, struct page *page) | |||
380 | * Any pages which are found to be mapped into pagetables are unmapped prior to | 380 | * Any pages which are found to be mapped into pagetables are unmapped prior to |
381 | * invalidation. | 381 | * invalidation. |
382 | * | 382 | * |
383 | * Returns -EIO if any pages could not be invalidated. | 383 | * Returns -EBUSY if any pages could not be invalidated. |
384 | */ | 384 | */ |
385 | int invalidate_inode_pages2_range(struct address_space *mapping, | 385 | int invalidate_inode_pages2_range(struct address_space *mapping, |
386 | pgoff_t start, pgoff_t end) | 386 | pgoff_t start, pgoff_t end) |
@@ -440,7 +440,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
440 | ret2 = do_launder_page(mapping, page); | 440 | ret2 = do_launder_page(mapping, page); |
441 | if (ret2 == 0) { | 441 | if (ret2 == 0) { |
442 | if (!invalidate_complete_page2(mapping, page)) | 442 | if (!invalidate_complete_page2(mapping, page)) |
443 | ret2 = -EIO; | 443 | ret2 = -EBUSY; |
444 | } | 444 | } |
445 | if (ret2 < 0) | 445 | if (ret2 < 0) |
446 | ret = ret2; | 446 | ret = ret2; |
diff --git a/mm/vmstat.c b/mm/vmstat.c index b0d08e667ece..d7826af2fb07 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -516,9 +516,26 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m, | |||
516 | continue; | 516 | continue; |
517 | 517 | ||
518 | page = pfn_to_page(pfn); | 518 | page = pfn_to_page(pfn); |
519 | #ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES | ||
520 | /* | ||
521 | * Ordinarily, memory holes in flatmem still have a valid | ||
522 | * memmap for the PFN range. However, an architecture for | ||
523 | * embedded systems (e.g. ARM) can free up the memmap backing | ||
524 | * holes to save memory on the assumption the memmap is | ||
525 | * never used. The page_zone linkages are then broken even | ||
526 | * though pfn_valid() returns true. Skip the page if the | ||
527 | * linkages are broken. Even if this test passed, the impact | ||
528 | * is that the counters for the movable type are off but | ||
529 | * fragmentation monitoring is likely meaningless on small | ||
530 | * systems. | ||
531 | */ | ||
532 | if (page_zone(page) != zone) | ||
533 | continue; | ||
534 | #endif | ||
519 | mtype = get_pageblock_migratetype(page); | 535 | mtype = get_pageblock_migratetype(page); |
520 | 536 | ||
521 | count[mtype]++; | 537 | if (mtype < MIGRATE_TYPES) |
538 | count[mtype]++; | ||
522 | } | 539 | } |
523 | 540 | ||
524 | /* Print counts */ | 541 | /* Print counts */ |