diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-09-05 12:56:57 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-09-05 12:56:57 -0400 |
| commit | 616ad8c44281c0c6711a72b560e01ec335ff27e0 (patch) | |
| tree | 0a20453ffedb09db6fb41a0c2208ccc2c7751d3a /mm | |
| parent | 99809963c99e1ed868d9ebeb4a5e7ee1cbe0309f (diff) | |
| parent | b380b0d4f7dffcc235c0facefa537d4655619101 (diff) | |
Merge branch 'linus' into x86/defconfig
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/Kconfig | 3 | ||||
| -rw-r--r-- | mm/bootmem.c | 37 | ||||
| -rw-r--r-- | mm/filemap.c | 11 | ||||
| -rw-r--r-- | mm/filemap_xip.c | 65 | ||||
| -rw-r--r-- | mm/hugetlb.c | 62 | ||||
| -rw-r--r-- | mm/memcontrol.c | 2 | ||||
| -rw-r--r-- | mm/mempolicy.c | 1 | ||||
| -rw-r--r-- | mm/mm_init.c | 2 | ||||
| -rw-r--r-- | mm/mmap.c | 24 | ||||
| -rw-r--r-- | mm/oom_kill.c | 6 | ||||
| -rw-r--r-- | mm/page_alloc.c | 11 | ||||
| -rw-r--r-- | mm/page_isolation.c | 1 | ||||
| -rw-r--r-- | mm/quicklist.c | 9 | ||||
| -rw-r--r-- | mm/rmap.c | 39 | ||||
| -rw-r--r-- | mm/slub.c | 4 | ||||
| -rw-r--r-- | mm/sparse.c | 1 | ||||
| -rw-r--r-- | mm/swap_state.c | 2 | ||||
| -rw-r--r-- | mm/truncate.c | 4 | ||||
| -rw-r--r-- | mm/util.c | 15 | ||||
| -rw-r--r-- | mm/vmstat.c | 19 |
20 files changed, 243 insertions, 75 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 446c6588c753..0bd9c2dbb2a0 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
| @@ -77,9 +77,6 @@ config FLAT_NODE_MEM_MAP | |||
| 77 | def_bool y | 77 | def_bool y |
| 78 | depends on !SPARSEMEM | 78 | depends on !SPARSEMEM |
| 79 | 79 | ||
| 80 | config HAVE_GET_USER_PAGES_FAST | ||
| 81 | bool | ||
| 82 | |||
| 83 | # | 80 | # |
| 84 | # Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's | 81 | # Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's |
| 85 | # to represent different areas of memory. This variable allows | 82 | # to represent different areas of memory. This variable allows |
diff --git a/mm/bootmem.c b/mm/bootmem.c index 4af15d0340ad..ad8eec6e44a8 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
| @@ -405,6 +405,29 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size, | |||
| 405 | } | 405 | } |
| 406 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ | 406 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ |
| 407 | 407 | ||
| 408 | static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx, | ||
| 409 | unsigned long step) | ||
| 410 | { | ||
| 411 | unsigned long base = bdata->node_min_pfn; | ||
| 412 | |||
| 413 | /* | ||
| 414 | * Align the index with respect to the node start so that the | ||
| 415 | * combination of both satisfies the requested alignment. | ||
| 416 | */ | ||
| 417 | |||
| 418 | return ALIGN(base + idx, step) - base; | ||
| 419 | } | ||
| 420 | |||
| 421 | static unsigned long align_off(struct bootmem_data *bdata, unsigned long off, | ||
| 422 | unsigned long align) | ||
| 423 | { | ||
| 424 | unsigned long base = PFN_PHYS(bdata->node_min_pfn); | ||
| 425 | |||
| 426 | /* Same as align_idx for byte offsets */ | ||
| 427 | |||
| 428 | return ALIGN(base + off, align) - base; | ||
| 429 | } | ||
| 430 | |||
| 408 | static void * __init alloc_bootmem_core(struct bootmem_data *bdata, | 431 | static void * __init alloc_bootmem_core(struct bootmem_data *bdata, |
| 409 | unsigned long size, unsigned long align, | 432 | unsigned long size, unsigned long align, |
| 410 | unsigned long goal, unsigned long limit) | 433 | unsigned long goal, unsigned long limit) |
| @@ -441,7 +464,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, | |||
| 441 | else | 464 | else |
| 442 | start = ALIGN(min, step); | 465 | start = ALIGN(min, step); |
| 443 | 466 | ||
| 444 | sidx = start - bdata->node_min_pfn;; | 467 | sidx = start - bdata->node_min_pfn; |
| 445 | midx = max - bdata->node_min_pfn; | 468 | midx = max - bdata->node_min_pfn; |
| 446 | 469 | ||
| 447 | if (bdata->hint_idx > sidx) { | 470 | if (bdata->hint_idx > sidx) { |
| @@ -450,7 +473,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, | |||
| 450 | * catch the fallback below. | 473 | * catch the fallback below. |
| 451 | */ | 474 | */ |
| 452 | fallback = sidx + 1; | 475 | fallback = sidx + 1; |
| 453 | sidx = ALIGN(bdata->hint_idx, step); | 476 | sidx = align_idx(bdata, bdata->hint_idx, step); |
| 454 | } | 477 | } |
| 455 | 478 | ||
| 456 | while (1) { | 479 | while (1) { |
| @@ -459,7 +482,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, | |||
| 459 | unsigned long eidx, i, start_off, end_off; | 482 | unsigned long eidx, i, start_off, end_off; |
| 460 | find_block: | 483 | find_block: |
| 461 | sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); | 484 | sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); |
| 462 | sidx = ALIGN(sidx, step); | 485 | sidx = align_idx(bdata, sidx, step); |
| 463 | eidx = sidx + PFN_UP(size); | 486 | eidx = sidx + PFN_UP(size); |
| 464 | 487 | ||
| 465 | if (sidx >= midx || eidx > midx) | 488 | if (sidx >= midx || eidx > midx) |
| @@ -467,15 +490,15 @@ find_block: | |||
| 467 | 490 | ||
| 468 | for (i = sidx; i < eidx; i++) | 491 | for (i = sidx; i < eidx; i++) |
| 469 | if (test_bit(i, bdata->node_bootmem_map)) { | 492 | if (test_bit(i, bdata->node_bootmem_map)) { |
| 470 | sidx = ALIGN(i, step); | 493 | sidx = align_idx(bdata, i, step); |
| 471 | if (sidx == i) | 494 | if (sidx == i) |
| 472 | sidx += step; | 495 | sidx += step; |
| 473 | goto find_block; | 496 | goto find_block; |
| 474 | } | 497 | } |
| 475 | 498 | ||
| 476 | if (bdata->last_end_off && | 499 | if (bdata->last_end_off & (PAGE_SIZE - 1) && |
| 477 | PFN_DOWN(bdata->last_end_off) + 1 == sidx) | 500 | PFN_DOWN(bdata->last_end_off) + 1 == sidx) |
| 478 | start_off = ALIGN(bdata->last_end_off, align); | 501 | start_off = align_off(bdata, bdata->last_end_off, align); |
| 479 | else | 502 | else |
| 480 | start_off = PFN_PHYS(sidx); | 503 | start_off = PFN_PHYS(sidx); |
| 481 | 504 | ||
| @@ -499,7 +522,7 @@ find_block: | |||
| 499 | } | 522 | } |
| 500 | 523 | ||
| 501 | if (fallback) { | 524 | if (fallback) { |
| 502 | sidx = ALIGN(fallback - 1, step); | 525 | sidx = align_idx(bdata, fallback - 1, step); |
| 503 | fallback = 0; | 526 | fallback = 0; |
| 504 | goto find_block; | 527 | goto find_block; |
| 505 | } | 528 | } |
diff --git a/mm/filemap.c b/mm/filemap.c index 54e968650855..876bc595d0f8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -2129,13 +2129,20 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 2129 | * After a write we want buffered reads to be sure to go to disk to get | 2129 | * After a write we want buffered reads to be sure to go to disk to get |
| 2130 | * the new data. We invalidate clean cached page from the region we're | 2130 | * the new data. We invalidate clean cached page from the region we're |
| 2131 | * about to write. We do this *before* the write so that we can return | 2131 | * about to write. We do this *before* the write so that we can return |
| 2132 | * -EIO without clobbering -EIOCBQUEUED from ->direct_IO(). | 2132 | * without clobbering -EIOCBQUEUED from ->direct_IO(). |
| 2133 | */ | 2133 | */ |
| 2134 | if (mapping->nrpages) { | 2134 | if (mapping->nrpages) { |
| 2135 | written = invalidate_inode_pages2_range(mapping, | 2135 | written = invalidate_inode_pages2_range(mapping, |
| 2136 | pos >> PAGE_CACHE_SHIFT, end); | 2136 | pos >> PAGE_CACHE_SHIFT, end); |
| 2137 | if (written) | 2137 | /* |
| 2138 | * If a page can not be invalidated, return 0 to fall back | ||
| 2139 | * to buffered write. | ||
| 2140 | */ | ||
| 2141 | if (written) { | ||
| 2142 | if (written == -EBUSY) | ||
| 2143 | return 0; | ||
| 2138 | goto out; | 2144 | goto out; |
| 2145 | } | ||
| 2139 | } | 2146 | } |
| 2140 | 2147 | ||
| 2141 | written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs); | 2148 | written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs); |
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 380ab402d711..b5167dfb2f2d 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
| @@ -15,6 +15,8 @@ | |||
| 15 | #include <linux/rmap.h> | 15 | #include <linux/rmap.h> |
| 16 | #include <linux/mmu_notifier.h> | 16 | #include <linux/mmu_notifier.h> |
| 17 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
| 18 | #include <linux/seqlock.h> | ||
| 19 | #include <linux/mutex.h> | ||
| 18 | #include <asm/tlbflush.h> | 20 | #include <asm/tlbflush.h> |
| 19 | #include <asm/io.h> | 21 | #include <asm/io.h> |
| 20 | 22 | ||
| @@ -22,22 +24,18 @@ | |||
| 22 | * We do use our own empty page to avoid interference with other users | 24 | * We do use our own empty page to avoid interference with other users |
| 23 | * of ZERO_PAGE(), such as /dev/zero | 25 | * of ZERO_PAGE(), such as /dev/zero |
| 24 | */ | 26 | */ |
| 27 | static DEFINE_MUTEX(xip_sparse_mutex); | ||
| 28 | static seqcount_t xip_sparse_seq = SEQCNT_ZERO; | ||
| 25 | static struct page *__xip_sparse_page; | 29 | static struct page *__xip_sparse_page; |
| 26 | 30 | ||
| 31 | /* called under xip_sparse_mutex */ | ||
| 27 | static struct page *xip_sparse_page(void) | 32 | static struct page *xip_sparse_page(void) |
| 28 | { | 33 | { |
| 29 | if (!__xip_sparse_page) { | 34 | if (!__xip_sparse_page) { |
| 30 | struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); | 35 | struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); |
| 31 | 36 | ||
| 32 | if (page) { | 37 | if (page) |
| 33 | static DEFINE_SPINLOCK(xip_alloc_lock); | 38 | __xip_sparse_page = page; |
| 34 | spin_lock(&xip_alloc_lock); | ||
| 35 | if (!__xip_sparse_page) | ||
| 36 | __xip_sparse_page = page; | ||
| 37 | else | ||
| 38 | __free_page(page); | ||
| 39 | spin_unlock(&xip_alloc_lock); | ||
| 40 | } | ||
| 41 | } | 39 | } |
| 42 | return __xip_sparse_page; | 40 | return __xip_sparse_page; |
| 43 | } | 41 | } |
| @@ -174,18 +172,23 @@ __xip_unmap (struct address_space * mapping, | |||
| 174 | pte_t pteval; | 172 | pte_t pteval; |
| 175 | spinlock_t *ptl; | 173 | spinlock_t *ptl; |
| 176 | struct page *page; | 174 | struct page *page; |
| 175 | unsigned count; | ||
| 176 | int locked = 0; | ||
| 177 | |||
| 178 | count = read_seqcount_begin(&xip_sparse_seq); | ||
| 177 | 179 | ||
| 178 | page = __xip_sparse_page; | 180 | page = __xip_sparse_page; |
| 179 | if (!page) | 181 | if (!page) |
| 180 | return; | 182 | return; |
| 181 | 183 | ||
| 184 | retry: | ||
| 182 | spin_lock(&mapping->i_mmap_lock); | 185 | spin_lock(&mapping->i_mmap_lock); |
| 183 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 186 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
| 184 | mm = vma->vm_mm; | 187 | mm = vma->vm_mm; |
| 185 | address = vma->vm_start + | 188 | address = vma->vm_start + |
| 186 | ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | 189 | ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); |
| 187 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); | 190 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); |
| 188 | pte = page_check_address(page, mm, address, &ptl); | 191 | pte = page_check_address(page, mm, address, &ptl, 1); |
| 189 | if (pte) { | 192 | if (pte) { |
| 190 | /* Nuke the page table entry. */ | 193 | /* Nuke the page table entry. */ |
| 191 | flush_cache_page(vma, address, pte_pfn(*pte)); | 194 | flush_cache_page(vma, address, pte_pfn(*pte)); |
| @@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapping, | |||
| 198 | } | 201 | } |
| 199 | } | 202 | } |
| 200 | spin_unlock(&mapping->i_mmap_lock); | 203 | spin_unlock(&mapping->i_mmap_lock); |
| 204 | |||
| 205 | if (locked) { | ||
| 206 | mutex_unlock(&xip_sparse_mutex); | ||
| 207 | } else if (read_seqcount_retry(&xip_sparse_seq, count)) { | ||
| 208 | mutex_lock(&xip_sparse_mutex); | ||
| 209 | locked = 1; | ||
| 210 | goto retry; | ||
| 211 | } | ||
| 201 | } | 212 | } |
| 202 | 213 | ||
| 203 | /* | 214 | /* |
| @@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 218 | int error; | 229 | int error; |
| 219 | 230 | ||
| 220 | /* XXX: are VM_FAULT_ codes OK? */ | 231 | /* XXX: are VM_FAULT_ codes OK? */ |
| 221 | 232 | again: | |
| 222 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 233 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
| 223 | if (vmf->pgoff >= size) | 234 | if (vmf->pgoff >= size) |
| 224 | return VM_FAULT_SIGBUS; | 235 | return VM_FAULT_SIGBUS; |
| @@ -237,8 +248,10 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 237 | int err; | 248 | int err; |
| 238 | 249 | ||
| 239 | /* maybe shared writable, allocate new block */ | 250 | /* maybe shared writable, allocate new block */ |
| 251 | mutex_lock(&xip_sparse_mutex); | ||
| 240 | error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1, | 252 | error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1, |
| 241 | &xip_mem, &xip_pfn); | 253 | &xip_mem, &xip_pfn); |
| 254 | mutex_unlock(&xip_sparse_mutex); | ||
| 242 | if (error) | 255 | if (error) |
| 243 | return VM_FAULT_SIGBUS; | 256 | return VM_FAULT_SIGBUS; |
| 244 | /* unmap sparse mappings at pgoff from all other vmas */ | 257 | /* unmap sparse mappings at pgoff from all other vmas */ |
| @@ -252,14 +265,34 @@ found: | |||
| 252 | BUG_ON(err); | 265 | BUG_ON(err); |
| 253 | return VM_FAULT_NOPAGE; | 266 | return VM_FAULT_NOPAGE; |
| 254 | } else { | 267 | } else { |
| 268 | int err, ret = VM_FAULT_OOM; | ||
| 269 | |||
| 270 | mutex_lock(&xip_sparse_mutex); | ||
| 271 | write_seqcount_begin(&xip_sparse_seq); | ||
| 272 | error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0, | ||
| 273 | &xip_mem, &xip_pfn); | ||
| 274 | if (unlikely(!error)) { | ||
| 275 | write_seqcount_end(&xip_sparse_seq); | ||
| 276 | mutex_unlock(&xip_sparse_mutex); | ||
| 277 | goto again; | ||
| 278 | } | ||
| 279 | if (error != -ENODATA) | ||
| 280 | goto out; | ||
| 255 | /* not shared and writable, use xip_sparse_page() */ | 281 | /* not shared and writable, use xip_sparse_page() */ |
| 256 | page = xip_sparse_page(); | 282 | page = xip_sparse_page(); |
| 257 | if (!page) | 283 | if (!page) |
| 258 | return VM_FAULT_OOM; | 284 | goto out; |
| 285 | err = vm_insert_page(vma, (unsigned long)vmf->virtual_address, | ||
| 286 | page); | ||
| 287 | if (err == -ENOMEM) | ||
| 288 | goto out; | ||
| 259 | 289 | ||
| 260 | page_cache_get(page); | 290 | ret = VM_FAULT_NOPAGE; |
| 261 | vmf->page = page; | 291 | out: |
| 262 | return 0; | 292 | write_seqcount_end(&xip_sparse_seq); |
| 293 | mutex_unlock(&xip_sparse_mutex); | ||
| 294 | |||
| 295 | return ret; | ||
| 263 | } | 296 | } |
| 264 | } | 297 | } |
| 265 | 298 | ||
| @@ -308,8 +341,10 @@ __xip_file_write(struct file *filp, const char __user *buf, | |||
| 308 | &xip_mem, &xip_pfn); | 341 | &xip_mem, &xip_pfn); |
| 309 | if (status == -ENODATA) { | 342 | if (status == -ENODATA) { |
| 310 | /* we allocate a new page unmap it */ | 343 | /* we allocate a new page unmap it */ |
| 344 | mutex_lock(&xip_sparse_mutex); | ||
| 311 | status = a_ops->get_xip_mem(mapping, index, 1, | 345 | status = a_ops->get_xip_mem(mapping, index, 1, |
| 312 | &xip_mem, &xip_pfn); | 346 | &xip_mem, &xip_pfn); |
| 347 | mutex_unlock(&xip_sparse_mutex); | ||
| 313 | if (!status) | 348 | if (!status) |
| 314 | /* unmap page at pgoff from all other vmas */ | 349 | /* unmap page at pgoff from all other vmas */ |
| 315 | __xip_unmap(mapping, index); | 350 | __xip_unmap(mapping, index); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 757ca983fd99..67a71191136e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -565,7 +565,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) | |||
| 565 | huge_page_order(h)); | 565 | huge_page_order(h)); |
| 566 | if (page) { | 566 | if (page) { |
| 567 | if (arch_prepare_hugepage(page)) { | 567 | if (arch_prepare_hugepage(page)) { |
| 568 | __free_pages(page, HUGETLB_PAGE_ORDER); | 568 | __free_pages(page, huge_page_order(h)); |
| 569 | return NULL; | 569 | return NULL; |
| 570 | } | 570 | } |
| 571 | prep_new_huge_page(h, page, nid); | 571 | prep_new_huge_page(h, page, nid); |
| @@ -665,6 +665,11 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, | |||
| 665 | __GFP_REPEAT|__GFP_NOWARN, | 665 | __GFP_REPEAT|__GFP_NOWARN, |
| 666 | huge_page_order(h)); | 666 | huge_page_order(h)); |
| 667 | 667 | ||
| 668 | if (page && arch_prepare_hugepage(page)) { | ||
| 669 | __free_pages(page, huge_page_order(h)); | ||
| 670 | return NULL; | ||
| 671 | } | ||
| 672 | |||
| 668 | spin_lock(&hugetlb_lock); | 673 | spin_lock(&hugetlb_lock); |
| 669 | if (page) { | 674 | if (page) { |
| 670 | /* | 675 | /* |
| @@ -1937,6 +1942,18 @@ retry: | |||
| 1937 | lock_page(page); | 1942 | lock_page(page); |
| 1938 | } | 1943 | } |
| 1939 | 1944 | ||
| 1945 | /* | ||
| 1946 | * If we are going to COW a private mapping later, we examine the | ||
| 1947 | * pending reservations for this page now. This will ensure that | ||
| 1948 | * any allocations necessary to record that reservation occur outside | ||
| 1949 | * the spinlock. | ||
| 1950 | */ | ||
| 1951 | if (write_access && !(vma->vm_flags & VM_SHARED)) | ||
| 1952 | if (vma_needs_reservation(h, vma, address) < 0) { | ||
| 1953 | ret = VM_FAULT_OOM; | ||
| 1954 | goto backout_unlocked; | ||
| 1955 | } | ||
| 1956 | |||
| 1940 | spin_lock(&mm->page_table_lock); | 1957 | spin_lock(&mm->page_table_lock); |
| 1941 | size = i_size_read(mapping->host) >> huge_page_shift(h); | 1958 | size = i_size_read(mapping->host) >> huge_page_shift(h); |
| 1942 | if (idx >= size) | 1959 | if (idx >= size) |
| @@ -1962,6 +1979,7 @@ out: | |||
| 1962 | 1979 | ||
| 1963 | backout: | 1980 | backout: |
| 1964 | spin_unlock(&mm->page_table_lock); | 1981 | spin_unlock(&mm->page_table_lock); |
| 1982 | backout_unlocked: | ||
| 1965 | unlock_page(page); | 1983 | unlock_page(page); |
| 1966 | put_page(page); | 1984 | put_page(page); |
| 1967 | goto out; | 1985 | goto out; |
| @@ -1973,6 +1991,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1973 | pte_t *ptep; | 1991 | pte_t *ptep; |
| 1974 | pte_t entry; | 1992 | pte_t entry; |
| 1975 | int ret; | 1993 | int ret; |
| 1994 | struct page *pagecache_page = NULL; | ||
| 1976 | static DEFINE_MUTEX(hugetlb_instantiation_mutex); | 1995 | static DEFINE_MUTEX(hugetlb_instantiation_mutex); |
| 1977 | struct hstate *h = hstate_vma(vma); | 1996 | struct hstate *h = hstate_vma(vma); |
| 1978 | 1997 | ||
| @@ -1989,25 +2008,44 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1989 | entry = huge_ptep_get(ptep); | 2008 | entry = huge_ptep_get(ptep); |
| 1990 | if (huge_pte_none(entry)) { | 2009 | if (huge_pte_none(entry)) { |
| 1991 | ret = hugetlb_no_page(mm, vma, address, ptep, write_access); | 2010 | ret = hugetlb_no_page(mm, vma, address, ptep, write_access); |
| 1992 | mutex_unlock(&hugetlb_instantiation_mutex); | 2011 | goto out_unlock; |
| 1993 | return ret; | ||
| 1994 | } | 2012 | } |
| 1995 | 2013 | ||
| 1996 | ret = 0; | 2014 | ret = 0; |
| 1997 | 2015 | ||
| 2016 | /* | ||
| 2017 | * If we are going to COW the mapping later, we examine the pending | ||
| 2018 | * reservations for this page now. This will ensure that any | ||
| 2019 | * allocations necessary to record that reservation occur outside the | ||
| 2020 | * spinlock. For private mappings, we also lookup the pagecache | ||
| 2021 | * page now as it is used to determine if a reservation has been | ||
| 2022 | * consumed. | ||
| 2023 | */ | ||
| 2024 | if (write_access && !pte_write(entry)) { | ||
| 2025 | if (vma_needs_reservation(h, vma, address) < 0) { | ||
| 2026 | ret = VM_FAULT_OOM; | ||
| 2027 | goto out_unlock; | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | if (!(vma->vm_flags & VM_SHARED)) | ||
| 2031 | pagecache_page = hugetlbfs_pagecache_page(h, | ||
| 2032 | vma, address); | ||
| 2033 | } | ||
| 2034 | |||
| 1998 | spin_lock(&mm->page_table_lock); | 2035 | spin_lock(&mm->page_table_lock); |
| 1999 | /* Check for a racing update before calling hugetlb_cow */ | 2036 | /* Check for a racing update before calling hugetlb_cow */ |
| 2000 | if (likely(pte_same(entry, huge_ptep_get(ptep)))) | 2037 | if (likely(pte_same(entry, huge_ptep_get(ptep)))) |
| 2001 | if (write_access && !pte_write(entry)) { | 2038 | if (write_access && !pte_write(entry)) |
| 2002 | struct page *page; | 2039 | ret = hugetlb_cow(mm, vma, address, ptep, entry, |
| 2003 | page = hugetlbfs_pagecache_page(h, vma, address); | 2040 | pagecache_page); |
| 2004 | ret = hugetlb_cow(mm, vma, address, ptep, entry, page); | ||
| 2005 | if (page) { | ||
| 2006 | unlock_page(page); | ||
| 2007 | put_page(page); | ||
| 2008 | } | ||
| 2009 | } | ||
| 2010 | spin_unlock(&mm->page_table_lock); | 2041 | spin_unlock(&mm->page_table_lock); |
| 2042 | |||
| 2043 | if (pagecache_page) { | ||
| 2044 | unlock_page(pagecache_page); | ||
| 2045 | put_page(pagecache_page); | ||
| 2046 | } | ||
| 2047 | |||
| 2048 | out_unlock: | ||
| 2011 | mutex_unlock(&hugetlb_instantiation_mutex); | 2049 | mutex_unlock(&hugetlb_instantiation_mutex); |
| 2012 | 2050 | ||
| 2013 | return ret; | 2051 | return ret; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7056c3bdb478..0f1f7a7374ba 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -796,6 +796,8 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) | |||
| 796 | 796 | ||
| 797 | if (mem_cgroup_subsys.disabled) | 797 | if (mem_cgroup_subsys.disabled) |
| 798 | return 0; | 798 | return 0; |
| 799 | if (!mm) | ||
| 800 | return 0; | ||
| 799 | 801 | ||
| 800 | rcu_read_lock(); | 802 | rcu_read_lock(); |
| 801 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 803 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e550bec20582..83369058ec13 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -803,7 +803,6 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, | |||
| 803 | int do_migrate_pages(struct mm_struct *mm, | 803 | int do_migrate_pages(struct mm_struct *mm, |
| 804 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) | 804 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) |
| 805 | { | 805 | { |
| 806 | LIST_HEAD(pagelist); | ||
| 807 | int busy = 0; | 806 | int busy = 0; |
| 808 | int err = 0; | 807 | int err = 0; |
| 809 | nodemask_t tmp; | 808 | nodemask_t tmp; |
diff --git a/mm/mm_init.c b/mm/mm_init.c index 936ef2efd892..4e0e26591dfa 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include "internal.h" | 12 | #include "internal.h" |
| 13 | 13 | ||
| 14 | #ifdef CONFIG_DEBUG_MEMORY_INIT | 14 | #ifdef CONFIG_DEBUG_MEMORY_INIT |
| 15 | int __meminitdata mminit_loglevel; | 15 | int mminit_loglevel; |
| 16 | 16 | ||
| 17 | #ifndef SECTIONS_SHIFT | 17 | #ifndef SECTIONS_SHIFT |
| 18 | #define SECTIONS_SHIFT 0 | 18 | #define SECTIONS_SHIFT 0 |
| @@ -1030,6 +1030,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, | |||
| 1030 | } else { | 1030 | } else { |
| 1031 | switch (flags & MAP_TYPE) { | 1031 | switch (flags & MAP_TYPE) { |
| 1032 | case MAP_SHARED: | 1032 | case MAP_SHARED: |
| 1033 | /* | ||
| 1034 | * Ignore pgoff. | ||
| 1035 | */ | ||
| 1036 | pgoff = 0; | ||
| 1033 | vm_flags |= VM_SHARED | VM_MAYSHARE; | 1037 | vm_flags |= VM_SHARED | VM_MAYSHARE; |
| 1034 | break; | 1038 | break; |
| 1035 | case MAP_PRIVATE: | 1039 | case MAP_PRIVATE: |
| @@ -2273,14 +2277,14 @@ int install_special_mapping(struct mm_struct *mm, | |||
| 2273 | 2277 | ||
| 2274 | static DEFINE_MUTEX(mm_all_locks_mutex); | 2278 | static DEFINE_MUTEX(mm_all_locks_mutex); |
| 2275 | 2279 | ||
| 2276 | static void vm_lock_anon_vma(struct anon_vma *anon_vma) | 2280 | static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) |
| 2277 | { | 2281 | { |
| 2278 | if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) { | 2282 | if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) { |
| 2279 | /* | 2283 | /* |
| 2280 | * The LSB of head.next can't change from under us | 2284 | * The LSB of head.next can't change from under us |
| 2281 | * because we hold the mm_all_locks_mutex. | 2285 | * because we hold the mm_all_locks_mutex. |
| 2282 | */ | 2286 | */ |
| 2283 | spin_lock(&anon_vma->lock); | 2287 | spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem); |
| 2284 | /* | 2288 | /* |
| 2285 | * We can safely modify head.next after taking the | 2289 | * We can safely modify head.next after taking the |
| 2286 | * anon_vma->lock. If some other vma in this mm shares | 2290 | * anon_vma->lock. If some other vma in this mm shares |
| @@ -2296,7 +2300,7 @@ static void vm_lock_anon_vma(struct anon_vma *anon_vma) | |||
| 2296 | } | 2300 | } |
| 2297 | } | 2301 | } |
| 2298 | 2302 | ||
| 2299 | static void vm_lock_mapping(struct address_space *mapping) | 2303 | static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) |
| 2300 | { | 2304 | { |
| 2301 | if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { | 2305 | if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { |
| 2302 | /* | 2306 | /* |
| @@ -2310,7 +2314,7 @@ static void vm_lock_mapping(struct address_space *mapping) | |||
| 2310 | */ | 2314 | */ |
| 2311 | if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) | 2315 | if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) |
| 2312 | BUG(); | 2316 | BUG(); |
| 2313 | spin_lock(&mapping->i_mmap_lock); | 2317 | spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem); |
| 2314 | } | 2318 | } |
| 2315 | } | 2319 | } |
| 2316 | 2320 | ||
| @@ -2358,11 +2362,17 @@ int mm_take_all_locks(struct mm_struct *mm) | |||
| 2358 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 2362 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
| 2359 | if (signal_pending(current)) | 2363 | if (signal_pending(current)) |
| 2360 | goto out_unlock; | 2364 | goto out_unlock; |
| 2361 | if (vma->anon_vma) | ||
| 2362 | vm_lock_anon_vma(vma->anon_vma); | ||
| 2363 | if (vma->vm_file && vma->vm_file->f_mapping) | 2365 | if (vma->vm_file && vma->vm_file->f_mapping) |
| 2364 | vm_lock_mapping(vma->vm_file->f_mapping); | 2366 | vm_lock_mapping(mm, vma->vm_file->f_mapping); |
| 2367 | } | ||
| 2368 | |||
| 2369 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
| 2370 | if (signal_pending(current)) | ||
| 2371 | goto out_unlock; | ||
| 2372 | if (vma->anon_vma) | ||
| 2373 | vm_lock_anon_vma(mm, vma->anon_vma); | ||
| 2365 | } | 2374 | } |
| 2375 | |||
| 2366 | ret = 0; | 2376 | ret = 0; |
| 2367 | 2377 | ||
| 2368 | out_unlock: | 2378 | out_unlock: |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 8a5467ee6265..64e5b4bcd964 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <linux/module.h> | 26 | #include <linux/module.h> |
| 27 | #include <linux/notifier.h> | 27 | #include <linux/notifier.h> |
| 28 | #include <linux/memcontrol.h> | 28 | #include <linux/memcontrol.h> |
| 29 | #include <linux/security.h> | ||
| 29 | 30 | ||
| 30 | int sysctl_panic_on_oom; | 31 | int sysctl_panic_on_oom; |
| 31 | int sysctl_oom_kill_allocating_task; | 32 | int sysctl_oom_kill_allocating_task; |
| @@ -128,7 +129,8 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
| 128 | * Superuser processes are usually more important, so we make it | 129 | * Superuser processes are usually more important, so we make it |
| 129 | * less likely that we kill those. | 130 | * less likely that we kill those. |
| 130 | */ | 131 | */ |
| 131 | if (__capable(p, CAP_SYS_ADMIN) || __capable(p, CAP_SYS_RESOURCE)) | 132 | if (has_capability(p, CAP_SYS_ADMIN) || |
| 133 | has_capability(p, CAP_SYS_RESOURCE)) | ||
| 132 | points /= 4; | 134 | points /= 4; |
| 133 | 135 | ||
| 134 | /* | 136 | /* |
| @@ -137,7 +139,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
| 137 | * tend to only have this flag set on applications they think | 139 | * tend to only have this flag set on applications they think |
| 138 | * of as important. | 140 | * of as important. |
| 139 | */ | 141 | */ |
| 140 | if (__capable(p, CAP_SYS_RAWIO)) | 142 | if (has_capability(p, CAP_SYS_RAWIO)) |
| 141 | points /= 4; | 143 | points /= 4; |
| 142 | 144 | ||
| 143 | /* | 145 | /* |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 401d104d2bb6..e293c58bea58 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -694,6 +694,9 @@ static int move_freepages(struct zone *zone, | |||
| 694 | #endif | 694 | #endif |
| 695 | 695 | ||
| 696 | for (page = start_page; page <= end_page;) { | 696 | for (page = start_page; page <= end_page;) { |
| 697 | /* Make sure we are not inadvertently changing nodes */ | ||
| 698 | VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone)); | ||
| 699 | |||
| 697 | if (!pfn_valid_within(page_to_pfn(page))) { | 700 | if (!pfn_valid_within(page_to_pfn(page))) { |
| 698 | page++; | 701 | page++; |
| 699 | continue; | 702 | continue; |
| @@ -2516,6 +2519,10 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
| 2516 | continue; | 2519 | continue; |
| 2517 | page = pfn_to_page(pfn); | 2520 | page = pfn_to_page(pfn); |
| 2518 | 2521 | ||
| 2522 | /* Watch out for overlapping nodes */ | ||
| 2523 | if (page_to_nid(page) != zone_to_nid(zone)) | ||
| 2524 | continue; | ||
| 2525 | |||
| 2519 | /* Blocks with reserved pages will never free, skip them. */ | 2526 | /* Blocks with reserved pages will never free, skip them. */ |
| 2520 | if (PageReserved(page)) | 2527 | if (PageReserved(page)) |
| 2521 | continue; | 2528 | continue; |
| @@ -4064,7 +4071,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) | |||
| 4064 | } | 4071 | } |
| 4065 | 4072 | ||
| 4066 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 4073 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
| 4067 | struct pglist_data contig_page_data = { .bdata = &bootmem_node_data[0] }; | 4074 | struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] }; |
| 4068 | EXPORT_SYMBOL(contig_page_data); | 4075 | EXPORT_SYMBOL(contig_page_data); |
| 4069 | #endif | 4076 | #endif |
| 4070 | 4077 | ||
| @@ -4437,7 +4444,7 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
| 4437 | do { | 4444 | do { |
| 4438 | size = bucketsize << log2qty; | 4445 | size = bucketsize << log2qty; |
| 4439 | if (flags & HASH_EARLY) | 4446 | if (flags & HASH_EARLY) |
| 4440 | table = alloc_bootmem(size); | 4447 | table = alloc_bootmem_nopanic(size); |
| 4441 | else if (hashdist) | 4448 | else if (hashdist) |
| 4442 | table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL); | 4449 | table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL); |
| 4443 | else { | 4450 | else { |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 3444b58033c8..c69f84fe038d 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | * linux/mm/page_isolation.c | 2 | * linux/mm/page_isolation.c |
| 3 | */ | 3 | */ |
| 4 | 4 | ||
| 5 | #include <stddef.h> | ||
| 6 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
| 7 | #include <linux/page-isolation.h> | 6 | #include <linux/page-isolation.h> |
| 8 | #include <linux/pageblock-flags.h> | 7 | #include <linux/pageblock-flags.h> |
diff --git a/mm/quicklist.c b/mm/quicklist.c index 3f703f7cb398..8dbb6805ef35 100644 --- a/mm/quicklist.c +++ b/mm/quicklist.c | |||
| @@ -26,7 +26,10 @@ DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; | |||
| 26 | static unsigned long max_pages(unsigned long min_pages) | 26 | static unsigned long max_pages(unsigned long min_pages) |
| 27 | { | 27 | { |
| 28 | unsigned long node_free_pages, max; | 28 | unsigned long node_free_pages, max; |
| 29 | struct zone *zones = NODE_DATA(numa_node_id())->node_zones; | 29 | int node = numa_node_id(); |
| 30 | struct zone *zones = NODE_DATA(node)->node_zones; | ||
| 31 | int num_cpus_on_node; | ||
| 32 | node_to_cpumask_ptr(cpumask_on_node, node); | ||
| 30 | 33 | ||
| 31 | node_free_pages = | 34 | node_free_pages = |
| 32 | #ifdef CONFIG_ZONE_DMA | 35 | #ifdef CONFIG_ZONE_DMA |
| @@ -38,6 +41,10 @@ static unsigned long max_pages(unsigned long min_pages) | |||
| 38 | zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES); | 41 | zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES); |
| 39 | 42 | ||
| 40 | max = node_free_pages / FRACTION_OF_NODE_MEM; | 43 | max = node_free_pages / FRACTION_OF_NODE_MEM; |
| 44 | |||
| 45 | num_cpus_on_node = cpus_weight_nr(*cpumask_on_node); | ||
| 46 | max /= num_cpus_on_node; | ||
| 47 | |||
| 41 | return max(max, min_pages); | 48 | return max(max, min_pages); |
| 42 | } | 49 | } |
| 43 | 50 | ||
| @@ -224,10 +224,14 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | |||
| 224 | /* | 224 | /* |
| 225 | * Check that @page is mapped at @address into @mm. | 225 | * Check that @page is mapped at @address into @mm. |
| 226 | * | 226 | * |
| 227 | * If @sync is false, page_check_address may perform a racy check to avoid | ||
| 228 | * the page table lock when the pte is not present (helpful when reclaiming | ||
| 229 | * highly shared pages). | ||
| 230 | * | ||
| 227 | * On success returns with pte mapped and locked. | 231 | * On success returns with pte mapped and locked. |
| 228 | */ | 232 | */ |
| 229 | pte_t *page_check_address(struct page *page, struct mm_struct *mm, | 233 | pte_t *page_check_address(struct page *page, struct mm_struct *mm, |
| 230 | unsigned long address, spinlock_t **ptlp) | 234 | unsigned long address, spinlock_t **ptlp, int sync) |
| 231 | { | 235 | { |
| 232 | pgd_t *pgd; | 236 | pgd_t *pgd; |
| 233 | pud_t *pud; | 237 | pud_t *pud; |
| @@ -249,7 +253,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, | |||
| 249 | 253 | ||
| 250 | pte = pte_offset_map(pmd, address); | 254 | pte = pte_offset_map(pmd, address); |
| 251 | /* Make a quick check before getting the lock */ | 255 | /* Make a quick check before getting the lock */ |
| 252 | if (!pte_present(*pte)) { | 256 | if (!sync && !pte_present(*pte)) { |
| 253 | pte_unmap(pte); | 257 | pte_unmap(pte); |
| 254 | return NULL; | 258 | return NULL; |
| 255 | } | 259 | } |
| @@ -281,7 +285,7 @@ static int page_referenced_one(struct page *page, | |||
| 281 | if (address == -EFAULT) | 285 | if (address == -EFAULT) |
| 282 | goto out; | 286 | goto out; |
| 283 | 287 | ||
| 284 | pte = page_check_address(page, mm, address, &ptl); | 288 | pte = page_check_address(page, mm, address, &ptl, 0); |
| 285 | if (!pte) | 289 | if (!pte) |
| 286 | goto out; | 290 | goto out; |
| 287 | 291 | ||
| @@ -450,7 +454,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) | |||
| 450 | if (address == -EFAULT) | 454 | if (address == -EFAULT) |
| 451 | goto out; | 455 | goto out; |
| 452 | 456 | ||
| 453 | pte = page_check_address(page, mm, address, &ptl); | 457 | pte = page_check_address(page, mm, address, &ptl, 1); |
| 454 | if (!pte) | 458 | if (!pte) |
| 455 | goto out; | 459 | goto out; |
| 456 | 460 | ||
| @@ -659,23 +663,30 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) | |||
| 659 | } | 663 | } |
| 660 | 664 | ||
| 661 | /* | 665 | /* |
| 662 | * It would be tidy to reset the PageAnon mapping here, | 666 | * Now that the last pte has gone, s390 must transfer dirty |
| 663 | * but that might overwrite a racing page_add_anon_rmap | 667 | * flag from storage key to struct page. We can usually skip |
| 664 | * which increments mapcount after us but sets mapping | 668 | * this if the page is anon, so about to be freed; but perhaps |
| 665 | * before us: so leave the reset to free_hot_cold_page, | 669 | * not if it's in swapcache - there might be another pte slot |
| 666 | * and remember that it's only reliable while mapped. | 670 | * containing the swap entry, but page not yet written to swap. |
| 667 | * Leaving it set also helps swapoff to reinstate ptes | ||
| 668 | * faster for those pages still in swapcache. | ||
| 669 | */ | 671 | */ |
| 670 | if ((!PageAnon(page) || PageSwapCache(page)) && | 672 | if ((!PageAnon(page) || PageSwapCache(page)) && |
| 671 | page_test_dirty(page)) { | 673 | page_test_dirty(page)) { |
| 672 | page_clear_dirty(page); | 674 | page_clear_dirty(page); |
| 673 | set_page_dirty(page); | 675 | set_page_dirty(page); |
| 674 | } | 676 | } |
| 675 | mem_cgroup_uncharge_page(page); | ||
| 676 | 677 | ||
| 678 | mem_cgroup_uncharge_page(page); | ||
| 677 | __dec_zone_page_state(page, | 679 | __dec_zone_page_state(page, |
| 678 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); | 680 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); |
| 681 | /* | ||
| 682 | * It would be tidy to reset the PageAnon mapping here, | ||
| 683 | * but that might overwrite a racing page_add_anon_rmap | ||
| 684 | * which increments mapcount after us but sets mapping | ||
| 685 | * before us: so leave the reset to free_hot_cold_page, | ||
| 686 | * and remember that it's only reliable while mapped. | ||
| 687 | * Leaving it set also helps swapoff to reinstate ptes | ||
| 688 | * faster for those pages still in swapcache. | ||
| 689 | */ | ||
| 679 | } | 690 | } |
| 680 | } | 691 | } |
| 681 | 692 | ||
| @@ -697,7 +708,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
| 697 | if (address == -EFAULT) | 708 | if (address == -EFAULT) |
| 698 | goto out; | 709 | goto out; |
| 699 | 710 | ||
| 700 | pte = page_check_address(page, mm, address, &ptl); | 711 | pte = page_check_address(page, mm, address, &ptl, 0); |
| 701 | if (!pte) | 712 | if (!pte) |
| 702 | goto out; | 713 | goto out; |
| 703 | 714 | ||
| @@ -2312,7 +2312,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | |||
| 2312 | 2312 | ||
| 2313 | s->refcount = 1; | 2313 | s->refcount = 1; |
| 2314 | #ifdef CONFIG_NUMA | 2314 | #ifdef CONFIG_NUMA |
| 2315 | s->remote_node_defrag_ratio = 100; | 2315 | s->remote_node_defrag_ratio = 1000; |
| 2316 | #endif | 2316 | #endif |
| 2317 | if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) | 2317 | if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) |
| 2318 | goto error; | 2318 | goto error; |
| @@ -4058,7 +4058,7 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, | |||
| 4058 | if (err) | 4058 | if (err) |
| 4059 | return err; | 4059 | return err; |
| 4060 | 4060 | ||
| 4061 | if (ratio < 100) | 4061 | if (ratio <= 100) |
| 4062 | s->remote_node_defrag_ratio = ratio * 10; | 4062 | s->remote_node_defrag_ratio = ratio * 10; |
| 4063 | 4063 | ||
| 4064 | return length; | 4064 | return length; |
diff --git a/mm/sparse.c b/mm/sparse.c index 5d9dbbb9d39e..39db301b920d 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include <asm/dma.h> | 12 | #include <asm/dma.h> |
| 13 | #include <asm/pgalloc.h> | 13 | #include <asm/pgalloc.h> |
| 14 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
| 15 | #include "internal.h" | ||
| 16 | 15 | ||
| 17 | /* | 16 | /* |
| 18 | * Permanent SPARSEMEM data: | 17 | * Permanent SPARSEMEM data: |
diff --git a/mm/swap_state.c b/mm/swap_state.c index 167cf2dc8a03..797c3831cbec 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
| @@ -60,7 +60,7 @@ void show_swap_cache_info(void) | |||
| 60 | printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", | 60 | printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", |
| 61 | swap_cache_info.add_total, swap_cache_info.del_total, | 61 | swap_cache_info.add_total, swap_cache_info.del_total, |
| 62 | swap_cache_info.find_success, swap_cache_info.find_total); | 62 | swap_cache_info.find_success, swap_cache_info.find_total); |
| 63 | printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); | 63 | printk("Free swap = %ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10)); |
| 64 | printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); | 64 | printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); |
| 65 | } | 65 | } |
| 66 | 66 | ||
diff --git a/mm/truncate.c b/mm/truncate.c index 250505091d37..6650c1d878b4 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
| @@ -380,7 +380,7 @@ static int do_launder_page(struct address_space *mapping, struct page *page) | |||
| 380 | * Any pages which are found to be mapped into pagetables are unmapped prior to | 380 | * Any pages which are found to be mapped into pagetables are unmapped prior to |
| 381 | * invalidation. | 381 | * invalidation. |
| 382 | * | 382 | * |
| 383 | * Returns -EIO if any pages could not be invalidated. | 383 | * Returns -EBUSY if any pages could not be invalidated. |
| 384 | */ | 384 | */ |
| 385 | int invalidate_inode_pages2_range(struct address_space *mapping, | 385 | int invalidate_inode_pages2_range(struct address_space *mapping, |
| 386 | pgoff_t start, pgoff_t end) | 386 | pgoff_t start, pgoff_t end) |
| @@ -440,7 +440,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
| 440 | ret2 = do_launder_page(mapping, page); | 440 | ret2 = do_launder_page(mapping, page); |
| 441 | if (ret2 == 0) { | 441 | if (ret2 == 0) { |
| 442 | if (!invalidate_complete_page2(mapping, page)) | 442 | if (!invalidate_complete_page2(mapping, page)) |
| 443 | ret2 = -EIO; | 443 | ret2 = -EBUSY; |
| 444 | } | 444 | } |
| 445 | if (ret2 < 0) | 445 | if (ret2 < 0) |
| 446 | ret = ret2; | 446 | ret = ret2; |
| @@ -171,3 +171,18 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
| 171 | mm->unmap_area = arch_unmap_area; | 171 | mm->unmap_area = arch_unmap_area; |
| 172 | } | 172 | } |
| 173 | #endif | 173 | #endif |
| 174 | |||
| 175 | int __attribute__((weak)) get_user_pages_fast(unsigned long start, | ||
| 176 | int nr_pages, int write, struct page **pages) | ||
| 177 | { | ||
| 178 | struct mm_struct *mm = current->mm; | ||
| 179 | int ret; | ||
| 180 | |||
| 181 | down_read(&mm->mmap_sem); | ||
| 182 | ret = get_user_pages(current, mm, start, nr_pages, | ||
| 183 | write, 0, pages, NULL); | ||
| 184 | up_read(&mm->mmap_sem); | ||
| 185 | |||
| 186 | return ret; | ||
| 187 | } | ||
| 188 | EXPORT_SYMBOL_GPL(get_user_pages_fast); | ||
diff --git a/mm/vmstat.c b/mm/vmstat.c index b0d08e667ece..d7826af2fb07 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
| @@ -516,9 +516,26 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m, | |||
| 516 | continue; | 516 | continue; |
| 517 | 517 | ||
| 518 | page = pfn_to_page(pfn); | 518 | page = pfn_to_page(pfn); |
| 519 | #ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES | ||
| 520 | /* | ||
| 521 | * Ordinarily, memory holes in flatmem still have a valid | ||
| 522 | * memmap for the PFN range. However, an architecture for | ||
| 523 | * embedded systems (e.g. ARM) can free up the memmap backing | ||
| 524 | * holes to save memory on the assumption the memmap is | ||
| 525 | * never used. The page_zone linkages are then broken even | ||
| 526 | * though pfn_valid() returns true. Skip the page if the | ||
| 527 | * linkages are broken. Even if this test passed, the impact | ||
| 528 | * is that the counters for the movable type are off but | ||
| 529 | * fragmentation monitoring is likely meaningless on small | ||
| 530 | * systems. | ||
| 531 | */ | ||
| 532 | if (page_zone(page) != zone) | ||
| 533 | continue; | ||
| 534 | #endif | ||
| 519 | mtype = get_pageblock_migratetype(page); | 535 | mtype = get_pageblock_migratetype(page); |
| 520 | 536 | ||
| 521 | count[mtype]++; | 537 | if (mtype < MIGRATE_TYPES) |
| 538 | count[mtype]++; | ||
| 522 | } | 539 | } |
| 523 | 540 | ||
| 524 | /* Print counts */ | 541 | /* Print counts */ |
