diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 54 | ||||
-rw-r--r-- | mm/internal.h | 29 | ||||
-rw-r--r-- | mm/memory.c | 15 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 3 | ||||
-rw-r--r-- | mm/mempolicy.c | 18 | ||||
-rw-r--r-- | mm/migrate.c | 7 | ||||
-rw-r--r-- | mm/mlock.c | 18 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 32 | ||||
-rw-r--r-- | mm/page_cgroup.c | 4 | ||||
-rw-r--r-- | mm/page_isolation.c | 5 | ||||
-rw-r--r-- | mm/sparse-vmemmap.c | 2 | ||||
-rw-r--r-- | mm/vmalloc.c | 30 | ||||
-rw-r--r-- | mm/vmscan.c | 44 |
15 files changed, 178 insertions, 88 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 421aee99b84a..6058b53dcb89 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -354,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma) | |||
354 | return 0; | 354 | return 0; |
355 | } | 355 | } |
356 | 356 | ||
357 | static void clear_gigantic_page(struct page *page, | ||
358 | unsigned long addr, unsigned long sz) | ||
359 | { | ||
360 | int i; | ||
361 | struct page *p = page; | ||
362 | |||
363 | might_sleep(); | ||
364 | for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) { | ||
365 | cond_resched(); | ||
366 | clear_user_highpage(p, addr + i * PAGE_SIZE); | ||
367 | } | ||
368 | } | ||
357 | static void clear_huge_page(struct page *page, | 369 | static void clear_huge_page(struct page *page, |
358 | unsigned long addr, unsigned long sz) | 370 | unsigned long addr, unsigned long sz) |
359 | { | 371 | { |
360 | int i; | 372 | int i; |
361 | 373 | ||
374 | if (unlikely(sz > MAX_ORDER_NR_PAGES)) | ||
375 | return clear_gigantic_page(page, addr, sz); | ||
376 | |||
362 | might_sleep(); | 377 | might_sleep(); |
363 | for (i = 0; i < sz/PAGE_SIZE; i++) { | 378 | for (i = 0; i < sz/PAGE_SIZE; i++) { |
364 | cond_resched(); | 379 | cond_resched(); |
@@ -366,12 +381,32 @@ static void clear_huge_page(struct page *page, | |||
366 | } | 381 | } |
367 | } | 382 | } |
368 | 383 | ||
384 | static void copy_gigantic_page(struct page *dst, struct page *src, | ||
385 | unsigned long addr, struct vm_area_struct *vma) | ||
386 | { | ||
387 | int i; | ||
388 | struct hstate *h = hstate_vma(vma); | ||
389 | struct page *dst_base = dst; | ||
390 | struct page *src_base = src; | ||
391 | might_sleep(); | ||
392 | for (i = 0; i < pages_per_huge_page(h); ) { | ||
393 | cond_resched(); | ||
394 | copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); | ||
395 | |||
396 | i++; | ||
397 | dst = mem_map_next(dst, dst_base, i); | ||
398 | src = mem_map_next(src, src_base, i); | ||
399 | } | ||
400 | } | ||
369 | static void copy_huge_page(struct page *dst, struct page *src, | 401 | static void copy_huge_page(struct page *dst, struct page *src, |
370 | unsigned long addr, struct vm_area_struct *vma) | 402 | unsigned long addr, struct vm_area_struct *vma) |
371 | { | 403 | { |
372 | int i; | 404 | int i; |
373 | struct hstate *h = hstate_vma(vma); | 405 | struct hstate *h = hstate_vma(vma); |
374 | 406 | ||
407 | if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) | ||
408 | return copy_gigantic_page(dst, src, addr, vma); | ||
409 | |||
375 | might_sleep(); | 410 | might_sleep(); |
376 | for (i = 0; i < pages_per_huge_page(h); i++) { | 411 | for (i = 0; i < pages_per_huge_page(h); i++) { |
377 | cond_resched(); | 412 | cond_resched(); |
@@ -456,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page) | |||
456 | { | 491 | { |
457 | int i; | 492 | int i; |
458 | 493 | ||
494 | VM_BUG_ON(h->order >= MAX_ORDER); | ||
495 | |||
459 | h->nr_huge_pages--; | 496 | h->nr_huge_pages--; |
460 | h->nr_huge_pages_node[page_to_nid(page)]--; | 497 | h->nr_huge_pages_node[page_to_nid(page)]--; |
461 | for (i = 0; i < pages_per_huge_page(h); i++) { | 498 | for (i = 0; i < pages_per_huge_page(h); i++) { |
@@ -970,6 +1007,14 @@ found: | |||
970 | return 1; | 1007 | return 1; |
971 | } | 1008 | } |
972 | 1009 | ||
1010 | static void prep_compound_huge_page(struct page *page, int order) | ||
1011 | { | ||
1012 | if (unlikely(order > (MAX_ORDER - 1))) | ||
1013 | prep_compound_gigantic_page(page, order); | ||
1014 | else | ||
1015 | prep_compound_page(page, order); | ||
1016 | } | ||
1017 | |||
973 | /* Put bootmem huge pages into the standard lists after mem_map is up */ | 1018 | /* Put bootmem huge pages into the standard lists after mem_map is up */ |
974 | static void __init gather_bootmem_prealloc(void) | 1019 | static void __init gather_bootmem_prealloc(void) |
975 | { | 1020 | { |
@@ -980,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void) | |||
980 | struct hstate *h = m->hstate; | 1025 | struct hstate *h = m->hstate; |
981 | __ClearPageReserved(page); | 1026 | __ClearPageReserved(page); |
982 | WARN_ON(page_count(page) != 1); | 1027 | WARN_ON(page_count(page) != 1); |
983 | prep_compound_page(page, h->order); | 1028 | prep_compound_huge_page(page, h->order); |
984 | prep_new_huge_page(h, page, page_to_nid(page)); | 1029 | prep_new_huge_page(h, page, page_to_nid(page)); |
985 | } | 1030 | } |
986 | } | 1031 | } |
@@ -1751,6 +1796,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
1751 | static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | 1796 | static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, |
1752 | struct page *page, unsigned long address) | 1797 | struct page *page, unsigned long address) |
1753 | { | 1798 | { |
1799 | struct hstate *h = hstate_vma(vma); | ||
1754 | struct vm_area_struct *iter_vma; | 1800 | struct vm_area_struct *iter_vma; |
1755 | struct address_space *mapping; | 1801 | struct address_space *mapping; |
1756 | struct prio_tree_iter iter; | 1802 | struct prio_tree_iter iter; |
@@ -1760,7 +1806,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1760 | * vm_pgoff is in PAGE_SIZE units, hence the different calculation | 1806 | * vm_pgoff is in PAGE_SIZE units, hence the different calculation |
1761 | * from page cache lookup which is in HPAGE_SIZE units. | 1807 | * from page cache lookup which is in HPAGE_SIZE units. |
1762 | */ | 1808 | */ |
1763 | address = address & huge_page_mask(hstate_vma(vma)); | 1809 | address = address & huge_page_mask(h); |
1764 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) | 1810 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) |
1765 | + (vma->vm_pgoff >> PAGE_SHIFT); | 1811 | + (vma->vm_pgoff >> PAGE_SHIFT); |
1766 | mapping = (struct address_space *)page_private(page); | 1812 | mapping = (struct address_space *)page_private(page); |
@@ -1779,7 +1825,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1779 | */ | 1825 | */ |
1780 | if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) | 1826 | if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) |
1781 | unmap_hugepage_range(iter_vma, | 1827 | unmap_hugepage_range(iter_vma, |
1782 | address, address + HPAGE_SIZE, | 1828 | address, address + huge_page_size(h), |
1783 | page); | 1829 | page); |
1784 | } | 1830 | } |
1785 | 1831 | ||
@@ -2130,7 +2176,7 @@ same_page: | |||
2130 | if (zeropage_ok) | 2176 | if (zeropage_ok) |
2131 | pages[i] = ZERO_PAGE(0); | 2177 | pages[i] = ZERO_PAGE(0); |
2132 | else | 2178 | else |
2133 | pages[i] = page + pfn_offset; | 2179 | pages[i] = mem_map_offset(page, pfn_offset); |
2134 | get_page(pages[i]); | 2180 | get_page(pages[i]); |
2135 | } | 2181 | } |
2136 | 2182 | ||
diff --git a/mm/internal.h b/mm/internal.h index e4e728bdf324..13333bc2eb68 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -17,6 +17,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, | |||
17 | unsigned long floor, unsigned long ceiling); | 17 | unsigned long floor, unsigned long ceiling); |
18 | 18 | ||
19 | extern void prep_compound_page(struct page *page, unsigned long order); | 19 | extern void prep_compound_page(struct page *page, unsigned long order); |
20 | extern void prep_compound_gigantic_page(struct page *page, unsigned long order); | ||
20 | 21 | ||
21 | static inline void set_page_count(struct page *page, int v) | 22 | static inline void set_page_count(struct page *page, int v) |
22 | { | 23 | { |
@@ -176,6 +177,34 @@ static inline void free_page_mlock(struct page *page) { } | |||
176 | #endif /* CONFIG_UNEVICTABLE_LRU */ | 177 | #endif /* CONFIG_UNEVICTABLE_LRU */ |
177 | 178 | ||
178 | /* | 179 | /* |
180 | * Return the mem_map entry representing the 'offset' subpage within | ||
181 | * the maximally aligned gigantic page 'base'. Handle any discontiguity | ||
182 | * in the mem_map at MAX_ORDER_NR_PAGES boundaries. | ||
183 | */ | ||
184 | static inline struct page *mem_map_offset(struct page *base, int offset) | ||
185 | { | ||
186 | if (unlikely(offset >= MAX_ORDER_NR_PAGES)) | ||
187 | return pfn_to_page(page_to_pfn(base) + offset); | ||
188 | return base + offset; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Iterator over all subpages withing the maximally aligned gigantic | ||
193 | * page 'base'. Handle any discontiguity in the mem_map. | ||
194 | */ | ||
195 | static inline struct page *mem_map_next(struct page *iter, | ||
196 | struct page *base, int offset) | ||
197 | { | ||
198 | if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) { | ||
199 | unsigned long pfn = page_to_pfn(base) + offset; | ||
200 | if (!pfn_valid(pfn)) | ||
201 | return NULL; | ||
202 | return pfn_to_page(pfn); | ||
203 | } | ||
204 | return iter + 1; | ||
205 | } | ||
206 | |||
207 | /* | ||
179 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, | 208 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, |
180 | * so all functions starting at paging_init should be marked __init | 209 | * so all functions starting at paging_init should be marked __init |
181 | * in those cases. SPARSEMEM, however, allows for memory hotplug, | 210 | * in those cases. SPARSEMEM, however, allows for memory hotplug, |
diff --git a/mm/memory.c b/mm/memory.c index 164951c47305..fc031d68327e 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3049,3 +3049,18 @@ void print_vma_addr(char *prefix, unsigned long ip) | |||
3049 | } | 3049 | } |
3050 | up_read(¤t->mm->mmap_sem); | 3050 | up_read(¤t->mm->mmap_sem); |
3051 | } | 3051 | } |
3052 | |||
3053 | #ifdef CONFIG_PROVE_LOCKING | ||
3054 | void might_fault(void) | ||
3055 | { | ||
3056 | might_sleep(); | ||
3057 | /* | ||
3058 | * it would be nicer only to annotate paths which are not under | ||
3059 | * pagefault_disable, however that requires a larger audit and | ||
3060 | * providing helpers like get_user_atomic. | ||
3061 | */ | ||
3062 | if (!in_atomic() && current->mm) | ||
3063 | might_lock_read(¤t->mm->mmap_sem); | ||
3064 | } | ||
3065 | EXPORT_SYMBOL(might_fault); | ||
3066 | #endif | ||
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6837a1014372..b5b2b15085a8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
24 | #include <linux/ioport.h> | 24 | #include <linux/ioport.h> |
25 | #include <linux/cpuset.h> | ||
26 | #include <linux/delay.h> | 25 | #include <linux/delay.h> |
27 | #include <linux/migrate.h> | 26 | #include <linux/migrate.h> |
28 | #include <linux/page-isolation.h> | 27 | #include <linux/page-isolation.h> |
@@ -498,8 +497,6 @@ int add_memory(int nid, u64 start, u64 size) | |||
498 | /* we online node here. we can't roll back from here. */ | 497 | /* we online node here. we can't roll back from here. */ |
499 | node_set_online(nid); | 498 | node_set_online(nid); |
500 | 499 | ||
501 | cpuset_track_online_nodes(); | ||
502 | |||
503 | if (new_pgdat) { | 500 | if (new_pgdat) { |
504 | ret = register_one_node(nid); | 501 | ret = register_one_node(nid); |
505 | /* | 502 | /* |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 36f42573a335..e9493b1c1117 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -489,12 +489,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
489 | int err; | 489 | int err; |
490 | struct vm_area_struct *first, *vma, *prev; | 490 | struct vm_area_struct *first, *vma, *prev; |
491 | 491 | ||
492 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | ||
493 | |||
494 | err = migrate_prep(); | ||
495 | if (err) | ||
496 | return ERR_PTR(err); | ||
497 | } | ||
498 | 492 | ||
499 | first = find_vma(mm, start); | 493 | first = find_vma(mm, start); |
500 | if (!first) | 494 | if (!first) |
@@ -809,9 +803,13 @@ int do_migrate_pages(struct mm_struct *mm, | |||
809 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) | 803 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) |
810 | { | 804 | { |
811 | int busy = 0; | 805 | int busy = 0; |
812 | int err = 0; | 806 | int err; |
813 | nodemask_t tmp; | 807 | nodemask_t tmp; |
814 | 808 | ||
809 | err = migrate_prep(); | ||
810 | if (err) | ||
811 | return err; | ||
812 | |||
815 | down_read(&mm->mmap_sem); | 813 | down_read(&mm->mmap_sem); |
816 | 814 | ||
817 | err = migrate_vmas(mm, from_nodes, to_nodes, flags); | 815 | err = migrate_vmas(mm, from_nodes, to_nodes, flags); |
@@ -974,6 +972,12 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
974 | start, start + len, mode, mode_flags, | 972 | start, start + len, mode, mode_flags, |
975 | nmask ? nodes_addr(*nmask)[0] : -1); | 973 | nmask ? nodes_addr(*nmask)[0] : -1); |
976 | 974 | ||
975 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | ||
976 | |||
977 | err = migrate_prep(); | ||
978 | if (err) | ||
979 | return err; | ||
980 | } | ||
977 | down_write(&mm->mmap_sem); | 981 | down_write(&mm->mmap_sem); |
978 | vma = check_range(mm, start, end, nmask, | 982 | vma = check_range(mm, start, end, nmask, |
979 | flags | MPOL_MF_INVERT, &pagelist); | 983 | flags | MPOL_MF_INVERT, &pagelist); |
diff --git a/mm/migrate.c b/mm/migrate.c index 6602941bfab0..1e0d6b237f44 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -522,15 +522,12 @@ static int writeout(struct address_space *mapping, struct page *page) | |||
522 | remove_migration_ptes(page, page); | 522 | remove_migration_ptes(page, page); |
523 | 523 | ||
524 | rc = mapping->a_ops->writepage(page, &wbc); | 524 | rc = mapping->a_ops->writepage(page, &wbc); |
525 | if (rc < 0) | ||
526 | /* I/O Error writing */ | ||
527 | return -EIO; | ||
528 | 525 | ||
529 | if (rc != AOP_WRITEPAGE_ACTIVATE) | 526 | if (rc != AOP_WRITEPAGE_ACTIVATE) |
530 | /* unlocked. Relock */ | 527 | /* unlocked. Relock */ |
531 | lock_page(page); | 528 | lock_page(page); |
532 | 529 | ||
533 | return -EAGAIN; | 530 | return (rc < 0) ? -EIO : -EAGAIN; |
534 | } | 531 | } |
535 | 532 | ||
536 | /* | 533 | /* |
@@ -841,12 +838,12 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
841 | struct page_to_node *pp; | 838 | struct page_to_node *pp; |
842 | LIST_HEAD(pagelist); | 839 | LIST_HEAD(pagelist); |
843 | 840 | ||
841 | migrate_prep(); | ||
844 | down_read(&mm->mmap_sem); | 842 | down_read(&mm->mmap_sem); |
845 | 843 | ||
846 | /* | 844 | /* |
847 | * Build a list of pages to migrate | 845 | * Build a list of pages to migrate |
848 | */ | 846 | */ |
849 | migrate_prep(); | ||
850 | for (pp = pm; pp->node != MAX_NUMNODES; pp++) { | 847 | for (pp = pm; pp->node != MAX_NUMNODES; pp++) { |
851 | struct vm_area_struct *vma; | 848 | struct vm_area_struct *vma; |
852 | struct page *page; | 849 | struct page *page; |
diff --git a/mm/mlock.c b/mm/mlock.c index 008ea70b7afa..1ada366570cb 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -66,14 +66,10 @@ void __clear_page_mlock(struct page *page) | |||
66 | putback_lru_page(page); | 66 | putback_lru_page(page); |
67 | } else { | 67 | } else { |
68 | /* | 68 | /* |
69 | * Page not on the LRU yet. Flush all pagevecs and retry. | 69 | * We lost the race. the page already moved to evictable list. |
70 | */ | 70 | */ |
71 | lru_add_drain_all(); | 71 | if (PageUnevictable(page)) |
72 | if (!isolate_lru_page(page)) | ||
73 | putback_lru_page(page); | ||
74 | else if (PageUnevictable(page)) | ||
75 | count_vm_event(UNEVICTABLE_PGSTRANDED); | 72 | count_vm_event(UNEVICTABLE_PGSTRANDED); |
76 | |||
77 | } | 73 | } |
78 | } | 74 | } |
79 | 75 | ||
@@ -166,7 +162,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
166 | unsigned long addr = start; | 162 | unsigned long addr = start; |
167 | struct page *pages[16]; /* 16 gives a reasonable batch */ | 163 | struct page *pages[16]; /* 16 gives a reasonable batch */ |
168 | int nr_pages = (end - start) / PAGE_SIZE; | 164 | int nr_pages = (end - start) / PAGE_SIZE; |
169 | int ret; | 165 | int ret = 0; |
170 | int gup_flags = 0; | 166 | int gup_flags = 0; |
171 | 167 | ||
172 | VM_BUG_ON(start & ~PAGE_MASK); | 168 | VM_BUG_ON(start & ~PAGE_MASK); |
@@ -187,8 +183,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
187 | if (vma->vm_flags & VM_WRITE) | 183 | if (vma->vm_flags & VM_WRITE) |
188 | gup_flags |= GUP_FLAGS_WRITE; | 184 | gup_flags |= GUP_FLAGS_WRITE; |
189 | 185 | ||
190 | lru_add_drain_all(); /* push cached pages to LRU */ | ||
191 | |||
192 | while (nr_pages > 0) { | 186 | while (nr_pages > 0) { |
193 | int i; | 187 | int i; |
194 | 188 | ||
@@ -251,8 +245,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
251 | ret = 0; | 245 | ret = 0; |
252 | } | 246 | } |
253 | 247 | ||
254 | lru_add_drain_all(); /* to update stats */ | ||
255 | |||
256 | return ret; /* count entire vma as locked_vm */ | 248 | return ret; /* count entire vma as locked_vm */ |
257 | } | 249 | } |
258 | 250 | ||
@@ -546,6 +538,8 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) | |||
546 | if (!can_do_mlock()) | 538 | if (!can_do_mlock()) |
547 | return -EPERM; | 539 | return -EPERM; |
548 | 540 | ||
541 | lru_add_drain_all(); /* flush pagevec */ | ||
542 | |||
549 | down_write(¤t->mm->mmap_sem); | 543 | down_write(¤t->mm->mmap_sem); |
550 | len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); | 544 | len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); |
551 | start &= PAGE_MASK; | 545 | start &= PAGE_MASK; |
@@ -612,6 +606,8 @@ asmlinkage long sys_mlockall(int flags) | |||
612 | if (!can_do_mlock()) | 606 | if (!can_do_mlock()) |
613 | goto out; | 607 | goto out; |
614 | 608 | ||
609 | lru_add_drain_all(); /* flush pagevec */ | ||
610 | |||
615 | down_write(¤t->mm->mmap_sem); | 611 | down_write(¤t->mm->mmap_sem); |
616 | 612 | ||
617 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | 613 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; |
@@ -1704,7 +1704,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) | |||
1704 | vma = find_vma_prev(mm, addr, &prev); | 1704 | vma = find_vma_prev(mm, addr, &prev); |
1705 | if (vma && (vma->vm_start <= addr)) | 1705 | if (vma && (vma->vm_start <= addr)) |
1706 | return vma; | 1706 | return vma; |
1707 | if (expand_stack(prev, addr)) | 1707 | if (!prev || expand_stack(prev, addr)) |
1708 | return NULL; | 1708 | return NULL; |
1709 | if (prev->vm_flags & VM_LOCKED) { | 1709 | if (prev->vm_flags & VM_LOCKED) { |
1710 | if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0) | 1710 | if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0) |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 64e5b4bcd964..a0a01902f551 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -38,7 +38,6 @@ static DEFINE_SPINLOCK(zone_scan_mutex); | |||
38 | * badness - calculate a numeric value for how bad this task has been | 38 | * badness - calculate a numeric value for how bad this task has been |
39 | * @p: task struct of which task we should calculate | 39 | * @p: task struct of which task we should calculate |
40 | * @uptime: current uptime in seconds | 40 | * @uptime: current uptime in seconds |
41 | * @mem: target memory controller | ||
42 | * | 41 | * |
43 | * The formula used is relatively simple and documented inline in the | 42 | * The formula used is relatively simple and documented inline in the |
44 | * function. The main rationale is that we want to select a good task | 43 | * function. The main rationale is that we want to select a good task |
@@ -295,6 +294,8 @@ static void dump_tasks(const struct mem_cgroup *mem) | |||
295 | continue; | 294 | continue; |
296 | if (mem && !task_in_mem_cgroup(p, mem)) | 295 | if (mem && !task_in_mem_cgroup(p, mem)) |
297 | continue; | 296 | continue; |
297 | if (!thread_group_leader(p)) | ||
298 | continue; | ||
298 | 299 | ||
299 | task_lock(p); | 300 | task_lock(p); |
300 | printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", | 301 | printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d0a240fbb8bf..d8ac01474563 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -263,24 +263,39 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
263 | { | 263 | { |
264 | int i; | 264 | int i; |
265 | int nr_pages = 1 << order; | 265 | int nr_pages = 1 << order; |
266 | |||
267 | set_compound_page_dtor(page, free_compound_page); | ||
268 | set_compound_order(page, order); | ||
269 | __SetPageHead(page); | ||
270 | for (i = 1; i < nr_pages; i++) { | ||
271 | struct page *p = page + i; | ||
272 | |||
273 | __SetPageTail(p); | ||
274 | p->first_page = page; | ||
275 | } | ||
276 | } | ||
277 | |||
278 | #ifdef CONFIG_HUGETLBFS | ||
279 | void prep_compound_gigantic_page(struct page *page, unsigned long order) | ||
280 | { | ||
281 | int i; | ||
282 | int nr_pages = 1 << order; | ||
266 | struct page *p = page + 1; | 283 | struct page *p = page + 1; |
267 | 284 | ||
268 | set_compound_page_dtor(page, free_compound_page); | 285 | set_compound_page_dtor(page, free_compound_page); |
269 | set_compound_order(page, order); | 286 | set_compound_order(page, order); |
270 | __SetPageHead(page); | 287 | __SetPageHead(page); |
271 | for (i = 1; i < nr_pages; i++, p++) { | 288 | for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { |
272 | if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) | ||
273 | p = pfn_to_page(page_to_pfn(page) + i); | ||
274 | __SetPageTail(p); | 289 | __SetPageTail(p); |
275 | p->first_page = page; | 290 | p->first_page = page; |
276 | } | 291 | } |
277 | } | 292 | } |
293 | #endif | ||
278 | 294 | ||
279 | static void destroy_compound_page(struct page *page, unsigned long order) | 295 | static void destroy_compound_page(struct page *page, unsigned long order) |
280 | { | 296 | { |
281 | int i; | 297 | int i; |
282 | int nr_pages = 1 << order; | 298 | int nr_pages = 1 << order; |
283 | struct page *p = page + 1; | ||
284 | 299 | ||
285 | if (unlikely(compound_order(page) != order)) | 300 | if (unlikely(compound_order(page) != order)) |
286 | bad_page(page); | 301 | bad_page(page); |
@@ -288,9 +303,8 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
288 | if (unlikely(!PageHead(page))) | 303 | if (unlikely(!PageHead(page))) |
289 | bad_page(page); | 304 | bad_page(page); |
290 | __ClearPageHead(page); | 305 | __ClearPageHead(page); |
291 | for (i = 1; i < nr_pages; i++, p++) { | 306 | for (i = 1; i < nr_pages; i++) { |
292 | if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) | 307 | struct page *p = page + i; |
293 | p = pfn_to_page(page_to_pfn(page) + i); | ||
294 | 308 | ||
295 | if (unlikely(!PageTail(p) | | 309 | if (unlikely(!PageTail(p) | |
296 | (p->first_page != page))) | 310 | (p->first_page != page))) |
@@ -1547,6 +1561,10 @@ nofail_alloc: | |||
1547 | 1561 | ||
1548 | /* We now go into synchronous reclaim */ | 1562 | /* We now go into synchronous reclaim */ |
1549 | cpuset_memory_pressure_bump(); | 1563 | cpuset_memory_pressure_bump(); |
1564 | /* | ||
1565 | * The task's cpuset might have expanded its set of allowable nodes | ||
1566 | */ | ||
1567 | cpuset_update_task_memory_state(); | ||
1550 | p->flags |= PF_MEMALLOC; | 1568 | p->flags |= PF_MEMALLOC; |
1551 | reclaim_state.reclaimed_slab = 0; | 1569 | reclaim_state.reclaimed_slab = 0; |
1552 | p->reclaim_state = &reclaim_state; | 1570 | p->reclaim_state = &reclaim_state; |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index f59d797dc5a9..1223d927904d 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -165,7 +165,7 @@ int online_page_cgroup(unsigned long start_pfn, | |||
165 | unsigned long start, end, pfn; | 165 | unsigned long start, end, pfn; |
166 | int fail = 0; | 166 | int fail = 0; |
167 | 167 | ||
168 | start = start_pfn & (PAGES_PER_SECTION - 1); | 168 | start = start_pfn & ~(PAGES_PER_SECTION - 1); |
169 | end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); | 169 | end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); |
170 | 170 | ||
171 | for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { | 171 | for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { |
@@ -188,7 +188,7 @@ int offline_page_cgroup(unsigned long start_pfn, | |||
188 | { | 188 | { |
189 | unsigned long start, end, pfn; | 189 | unsigned long start, end, pfn; |
190 | 190 | ||
191 | start = start_pfn & (PAGES_PER_SECTION - 1); | 191 | start = start_pfn & ~(PAGES_PER_SECTION - 1); |
192 | end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); | 192 | end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); |
193 | 193 | ||
194 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) | 194 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index b70a7fec1ff6..5e0ffd967452 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -130,10 +130,11 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) | |||
130 | if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) | 130 | if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
131 | break; | 131 | break; |
132 | } | 132 | } |
133 | if (pfn < end_pfn) | 133 | page = __first_valid_page(start_pfn, end_pfn - start_pfn); |
134 | if ((pfn < end_pfn) || !page) | ||
134 | return -EBUSY; | 135 | return -EBUSY; |
135 | /* Check all pages are free or Marked as ISOLATED */ | 136 | /* Check all pages are free or Marked as ISOLATED */ |
136 | zone = page_zone(pfn_to_page(pfn)); | 137 | zone = page_zone(page); |
137 | spin_lock_irqsave(&zone->lock, flags); | 138 | spin_lock_irqsave(&zone->lock, flags); |
138 | ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn); | 139 | ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn); |
139 | spin_unlock_irqrestore(&zone->lock, flags); | 140 | spin_unlock_irqrestore(&zone->lock, flags); |
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index a91b5f8fcaf6..a13ea6401ae7 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c | |||
@@ -64,7 +64,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node, | |||
64 | unsigned long pfn = pte_pfn(*pte); | 64 | unsigned long pfn = pte_pfn(*pte); |
65 | int actual_node = early_pfn_to_nid(pfn); | 65 | int actual_node = early_pfn_to_nid(pfn); |
66 | 66 | ||
67 | if (actual_node != node) | 67 | if (node_distance(actual_node, node) > LOCAL_DISTANCE) |
68 | printk(KERN_WARNING "[%lx-%lx] potential offnode " | 68 | printk(KERN_WARNING "[%lx-%lx] potential offnode " |
69 | "page_structs\n", start, end - 1); | 69 | "page_structs\n", start, end - 1); |
70 | } | 70 | } |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f1cc03bbf6ac..30f826d484f0 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -178,7 +178,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end, | |||
178 | static inline int is_vmalloc_or_module_addr(const void *x) | 178 | static inline int is_vmalloc_or_module_addr(const void *x) |
179 | { | 179 | { |
180 | /* | 180 | /* |
181 | * x86-64 and sparc64 put modules in a special place, | 181 | * ARM, x86-64 and sparc64 put modules in a special place, |
182 | * and fall back on vmalloc() if that fails. Others | 182 | * and fall back on vmalloc() if that fails. Others |
183 | * just put it in the vmalloc space. | 183 | * just put it in the vmalloc space. |
184 | */ | 184 | */ |
@@ -324,14 +324,14 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, | |||
324 | 324 | ||
325 | BUG_ON(size & ~PAGE_MASK); | 325 | BUG_ON(size & ~PAGE_MASK); |
326 | 326 | ||
327 | addr = ALIGN(vstart, align); | ||
328 | |||
329 | va = kmalloc_node(sizeof(struct vmap_area), | 327 | va = kmalloc_node(sizeof(struct vmap_area), |
330 | gfp_mask & GFP_RECLAIM_MASK, node); | 328 | gfp_mask & GFP_RECLAIM_MASK, node); |
331 | if (unlikely(!va)) | 329 | if (unlikely(!va)) |
332 | return ERR_PTR(-ENOMEM); | 330 | return ERR_PTR(-ENOMEM); |
333 | 331 | ||
334 | retry: | 332 | retry: |
333 | addr = ALIGN(vstart, align); | ||
334 | |||
335 | spin_lock(&vmap_area_lock); | 335 | spin_lock(&vmap_area_lock); |
336 | /* XXX: could have a last_hole cache */ | 336 | /* XXX: could have a last_hole cache */ |
337 | n = vmap_area_root.rb_node; | 337 | n = vmap_area_root.rb_node; |
@@ -362,7 +362,7 @@ retry: | |||
362 | goto found; | 362 | goto found; |
363 | } | 363 | } |
364 | 364 | ||
365 | while (addr + size >= first->va_start && addr + size <= vend) { | 365 | while (addr + size > first->va_start && addr + size <= vend) { |
366 | addr = ALIGN(first->va_end + PAGE_SIZE, align); | 366 | addr = ALIGN(first->va_end + PAGE_SIZE, align); |
367 | 367 | ||
368 | n = rb_next(&first->rb_node); | 368 | n = rb_next(&first->rb_node); |
@@ -522,13 +522,24 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
522 | } | 522 | } |
523 | 523 | ||
524 | /* | 524 | /* |
525 | * Kick off a purge of the outstanding lazy areas. Don't bother if somebody | ||
526 | * is already purging. | ||
527 | */ | ||
528 | static void try_purge_vmap_area_lazy(void) | ||
529 | { | ||
530 | unsigned long start = ULONG_MAX, end = 0; | ||
531 | |||
532 | __purge_vmap_area_lazy(&start, &end, 0, 0); | ||
533 | } | ||
534 | |||
535 | /* | ||
525 | * Kick off a purge of the outstanding lazy areas. | 536 | * Kick off a purge of the outstanding lazy areas. |
526 | */ | 537 | */ |
527 | static void purge_vmap_area_lazy(void) | 538 | static void purge_vmap_area_lazy(void) |
528 | { | 539 | { |
529 | unsigned long start = ULONG_MAX, end = 0; | 540 | unsigned long start = ULONG_MAX, end = 0; |
530 | 541 | ||
531 | __purge_vmap_area_lazy(&start, &end, 0, 0); | 542 | __purge_vmap_area_lazy(&start, &end, 1, 0); |
532 | } | 543 | } |
533 | 544 | ||
534 | /* | 545 | /* |
@@ -539,7 +550,7 @@ static void free_unmap_vmap_area(struct vmap_area *va) | |||
539 | va->flags |= VM_LAZY_FREE; | 550 | va->flags |= VM_LAZY_FREE; |
540 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); | 551 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); |
541 | if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) | 552 | if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) |
542 | purge_vmap_area_lazy(); | 553 | try_purge_vmap_area_lazy(); |
543 | } | 554 | } |
544 | 555 | ||
545 | static struct vmap_area *find_vmap_area(unsigned long addr) | 556 | static struct vmap_area *find_vmap_area(unsigned long addr) |
@@ -592,6 +603,8 @@ static void free_unmap_vmap_area_addr(unsigned long addr) | |||
592 | 603 | ||
593 | #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) | 604 | #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) |
594 | 605 | ||
606 | static bool vmap_initialized __read_mostly = false; | ||
607 | |||
595 | struct vmap_block_queue { | 608 | struct vmap_block_queue { |
596 | spinlock_t lock; | 609 | spinlock_t lock; |
597 | struct list_head free; | 610 | struct list_head free; |
@@ -828,6 +841,9 @@ void vm_unmap_aliases(void) | |||
828 | int cpu; | 841 | int cpu; |
829 | int flush = 0; | 842 | int flush = 0; |
830 | 843 | ||
844 | if (unlikely(!vmap_initialized)) | ||
845 | return; | ||
846 | |||
831 | for_each_possible_cpu(cpu) { | 847 | for_each_possible_cpu(cpu) { |
832 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); | 848 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); |
833 | struct vmap_block *vb; | 849 | struct vmap_block *vb; |
@@ -942,6 +958,8 @@ void __init vmalloc_init(void) | |||
942 | INIT_LIST_HEAD(&vbq->dirty); | 958 | INIT_LIST_HEAD(&vbq->dirty); |
943 | vbq->nr_dirty = 0; | 959 | vbq->nr_dirty = 0; |
944 | } | 960 | } |
961 | |||
962 | vmap_initialized = true; | ||
945 | } | 963 | } |
946 | 964 | ||
947 | void unmap_kernel_range(unsigned long addr, unsigned long size) | 965 | void unmap_kernel_range(unsigned long addr, unsigned long size) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 3b5860294bb6..7ea1440b53db 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -623,6 +623,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
623 | * Try to allocate it some swap space here. | 623 | * Try to allocate it some swap space here. |
624 | */ | 624 | */ |
625 | if (PageAnon(page) && !PageSwapCache(page)) { | 625 | if (PageAnon(page) && !PageSwapCache(page)) { |
626 | if (!(sc->gfp_mask & __GFP_IO)) | ||
627 | goto keep_locked; | ||
626 | switch (try_to_munlock(page)) { | 628 | switch (try_to_munlock(page)) { |
627 | case SWAP_FAIL: /* shouldn't happen */ | 629 | case SWAP_FAIL: /* shouldn't happen */ |
628 | case SWAP_AGAIN: | 630 | case SWAP_AGAIN: |
@@ -634,6 +636,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
634 | } | 636 | } |
635 | if (!add_to_swap(page, GFP_ATOMIC)) | 637 | if (!add_to_swap(page, GFP_ATOMIC)) |
636 | goto activate_locked; | 638 | goto activate_locked; |
639 | may_enter_fs = 1; | ||
637 | } | 640 | } |
638 | #endif /* CONFIG_SWAP */ | 641 | #endif /* CONFIG_SWAP */ |
639 | 642 | ||
@@ -1386,9 +1389,9 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1386 | file_prio = 200 - sc->swappiness; | 1389 | file_prio = 200 - sc->swappiness; |
1387 | 1390 | ||
1388 | /* | 1391 | /* |
1389 | * anon recent_rotated[0] | 1392 | * The amount of pressure on anon vs file pages is inversely |
1390 | * %anon = 100 * ----------- / ----------------- * IO cost | 1393 | * proportional to the fraction of recently scanned pages on |
1391 | * anon + file rotate_sum | 1394 | * each list that were recently referenced and in active use. |
1392 | */ | 1395 | */ |
1393 | ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); | 1396 | ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); |
1394 | ap /= zone->recent_rotated[0] + 1; | 1397 | ap /= zone->recent_rotated[0] + 1; |
@@ -2368,39 +2371,6 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) | |||
2368 | return 1; | 2371 | return 1; |
2369 | } | 2372 | } |
2370 | 2373 | ||
2371 | static void show_page_path(struct page *page) | ||
2372 | { | ||
2373 | char buf[256]; | ||
2374 | if (page_is_file_cache(page)) { | ||
2375 | struct address_space *mapping = page->mapping; | ||
2376 | struct dentry *dentry; | ||
2377 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
2378 | |||
2379 | spin_lock(&mapping->i_mmap_lock); | ||
2380 | dentry = d_find_alias(mapping->host); | ||
2381 | printk(KERN_INFO "rescued: %s %lu\n", | ||
2382 | dentry_path(dentry, buf, 256), pgoff); | ||
2383 | spin_unlock(&mapping->i_mmap_lock); | ||
2384 | } else { | ||
2385 | #if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU) | ||
2386 | struct anon_vma *anon_vma; | ||
2387 | struct vm_area_struct *vma; | ||
2388 | |||
2389 | anon_vma = page_lock_anon_vma(page); | ||
2390 | if (!anon_vma) | ||
2391 | return; | ||
2392 | |||
2393 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | ||
2394 | printk(KERN_INFO "rescued: anon %s\n", | ||
2395 | vma->vm_mm->owner->comm); | ||
2396 | break; | ||
2397 | } | ||
2398 | page_unlock_anon_vma(anon_vma); | ||
2399 | #endif | ||
2400 | } | ||
2401 | } | ||
2402 | |||
2403 | |||
2404 | /** | 2374 | /** |
2405 | * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list | 2375 | * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list |
2406 | * @page: page to check evictability and move to appropriate lru list | 2376 | * @page: page to check evictability and move to appropriate lru list |
@@ -2421,8 +2391,6 @@ retry: | |||
2421 | if (page_evictable(page, NULL)) { | 2391 | if (page_evictable(page, NULL)) { |
2422 | enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); | 2392 | enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); |
2423 | 2393 | ||
2424 | show_page_path(page); | ||
2425 | |||
2426 | __dec_zone_state(zone, NR_UNEVICTABLE); | 2394 | __dec_zone_state(zone, NR_UNEVICTABLE); |
2427 | list_move(&page->lru, &zone->lru[l].list); | 2395 | list_move(&page->lru, &zone->lru[l].list); |
2428 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); | 2396 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); |