aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c54
-rw-r--r--mm/internal.h29
-rw-r--r--mm/memory.c15
-rw-r--r--mm/memory_hotplug.c3
-rw-r--r--mm/mempolicy.c18
-rw-r--r--mm/migrate.c7
-rw-r--r--mm/mlock.c18
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/oom_kill.c3
-rw-r--r--mm/page_alloc.c32
-rw-r--r--mm/page_cgroup.c4
-rw-r--r--mm/page_isolation.c5
-rw-r--r--mm/sparse-vmemmap.c2
-rw-r--r--mm/vmalloc.c30
-rw-r--r--mm/vmscan.c44
15 files changed, 178 insertions, 88 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 421aee99b84a..6058b53dcb89 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -354,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma)
354 return 0; 354 return 0;
355} 355}
356 356
357static void clear_gigantic_page(struct page *page,
358 unsigned long addr, unsigned long sz)
359{
360 int i;
361 struct page *p = page;
362
363 might_sleep();
364 for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) {
365 cond_resched();
366 clear_user_highpage(p, addr + i * PAGE_SIZE);
367 }
368}
357static void clear_huge_page(struct page *page, 369static void clear_huge_page(struct page *page,
358 unsigned long addr, unsigned long sz) 370 unsigned long addr, unsigned long sz)
359{ 371{
360 int i; 372 int i;
361 373
374 if (unlikely(sz > MAX_ORDER_NR_PAGES))
375 return clear_gigantic_page(page, addr, sz);
376
362 might_sleep(); 377 might_sleep();
363 for (i = 0; i < sz/PAGE_SIZE; i++) { 378 for (i = 0; i < sz/PAGE_SIZE; i++) {
364 cond_resched(); 379 cond_resched();
@@ -366,12 +381,32 @@ static void clear_huge_page(struct page *page,
366 } 381 }
367} 382}
368 383
384static void copy_gigantic_page(struct page *dst, struct page *src,
385 unsigned long addr, struct vm_area_struct *vma)
386{
387 int i;
388 struct hstate *h = hstate_vma(vma);
389 struct page *dst_base = dst;
390 struct page *src_base = src;
391 might_sleep();
392 for (i = 0; i < pages_per_huge_page(h); ) {
393 cond_resched();
394 copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
395
396 i++;
397 dst = mem_map_next(dst, dst_base, i);
398 src = mem_map_next(src, src_base, i);
399 }
400}
369static void copy_huge_page(struct page *dst, struct page *src, 401static void copy_huge_page(struct page *dst, struct page *src,
370 unsigned long addr, struct vm_area_struct *vma) 402 unsigned long addr, struct vm_area_struct *vma)
371{ 403{
372 int i; 404 int i;
373 struct hstate *h = hstate_vma(vma); 405 struct hstate *h = hstate_vma(vma);
374 406
407 if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES))
408 return copy_gigantic_page(dst, src, addr, vma);
409
375 might_sleep(); 410 might_sleep();
376 for (i = 0; i < pages_per_huge_page(h); i++) { 411 for (i = 0; i < pages_per_huge_page(h); i++) {
377 cond_resched(); 412 cond_resched();
@@ -456,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page)
456{ 491{
457 int i; 492 int i;
458 493
494 VM_BUG_ON(h->order >= MAX_ORDER);
495
459 h->nr_huge_pages--; 496 h->nr_huge_pages--;
460 h->nr_huge_pages_node[page_to_nid(page)]--; 497 h->nr_huge_pages_node[page_to_nid(page)]--;
461 for (i = 0; i < pages_per_huge_page(h); i++) { 498 for (i = 0; i < pages_per_huge_page(h); i++) {
@@ -970,6 +1007,14 @@ found:
970 return 1; 1007 return 1;
971} 1008}
972 1009
1010static void prep_compound_huge_page(struct page *page, int order)
1011{
1012 if (unlikely(order > (MAX_ORDER - 1)))
1013 prep_compound_gigantic_page(page, order);
1014 else
1015 prep_compound_page(page, order);
1016}
1017
973/* Put bootmem huge pages into the standard lists after mem_map is up */ 1018/* Put bootmem huge pages into the standard lists after mem_map is up */
974static void __init gather_bootmem_prealloc(void) 1019static void __init gather_bootmem_prealloc(void)
975{ 1020{
@@ -980,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void)
980 struct hstate *h = m->hstate; 1025 struct hstate *h = m->hstate;
981 __ClearPageReserved(page); 1026 __ClearPageReserved(page);
982 WARN_ON(page_count(page) != 1); 1027 WARN_ON(page_count(page) != 1);
983 prep_compound_page(page, h->order); 1028 prep_compound_huge_page(page, h->order);
984 prep_new_huge_page(h, page, page_to_nid(page)); 1029 prep_new_huge_page(h, page, page_to_nid(page));
985 } 1030 }
986} 1031}
@@ -1751,6 +1796,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
1751static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, 1796static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
1752 struct page *page, unsigned long address) 1797 struct page *page, unsigned long address)
1753{ 1798{
1799 struct hstate *h = hstate_vma(vma);
1754 struct vm_area_struct *iter_vma; 1800 struct vm_area_struct *iter_vma;
1755 struct address_space *mapping; 1801 struct address_space *mapping;
1756 struct prio_tree_iter iter; 1802 struct prio_tree_iter iter;
@@ -1760,7 +1806,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
1760 * vm_pgoff is in PAGE_SIZE units, hence the different calculation 1806 * vm_pgoff is in PAGE_SIZE units, hence the different calculation
1761 * from page cache lookup which is in HPAGE_SIZE units. 1807 * from page cache lookup which is in HPAGE_SIZE units.
1762 */ 1808 */
1763 address = address & huge_page_mask(hstate_vma(vma)); 1809 address = address & huge_page_mask(h);
1764 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) 1810 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
1765 + (vma->vm_pgoff >> PAGE_SHIFT); 1811 + (vma->vm_pgoff >> PAGE_SHIFT);
1766 mapping = (struct address_space *)page_private(page); 1812 mapping = (struct address_space *)page_private(page);
@@ -1779,7 +1825,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
1779 */ 1825 */
1780 if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) 1826 if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
1781 unmap_hugepage_range(iter_vma, 1827 unmap_hugepage_range(iter_vma,
1782 address, address + HPAGE_SIZE, 1828 address, address + huge_page_size(h),
1783 page); 1829 page);
1784 } 1830 }
1785 1831
@@ -2130,7 +2176,7 @@ same_page:
2130 if (zeropage_ok) 2176 if (zeropage_ok)
2131 pages[i] = ZERO_PAGE(0); 2177 pages[i] = ZERO_PAGE(0);
2132 else 2178 else
2133 pages[i] = page + pfn_offset; 2179 pages[i] = mem_map_offset(page, pfn_offset);
2134 get_page(pages[i]); 2180 get_page(pages[i]);
2135 } 2181 }
2136 2182
diff --git a/mm/internal.h b/mm/internal.h
index e4e728bdf324..13333bc2eb68 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -17,6 +17,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
17 unsigned long floor, unsigned long ceiling); 17 unsigned long floor, unsigned long ceiling);
18 18
19extern void prep_compound_page(struct page *page, unsigned long order); 19extern void prep_compound_page(struct page *page, unsigned long order);
20extern void prep_compound_gigantic_page(struct page *page, unsigned long order);
20 21
21static inline void set_page_count(struct page *page, int v) 22static inline void set_page_count(struct page *page, int v)
22{ 23{
@@ -176,6 +177,34 @@ static inline void free_page_mlock(struct page *page) { }
176#endif /* CONFIG_UNEVICTABLE_LRU */ 177#endif /* CONFIG_UNEVICTABLE_LRU */
177 178
178/* 179/*
180 * Return the mem_map entry representing the 'offset' subpage within
181 * the maximally aligned gigantic page 'base'. Handle any discontiguity
182 * in the mem_map at MAX_ORDER_NR_PAGES boundaries.
183 */
184static inline struct page *mem_map_offset(struct page *base, int offset)
185{
186 if (unlikely(offset >= MAX_ORDER_NR_PAGES))
187 return pfn_to_page(page_to_pfn(base) + offset);
188 return base + offset;
189}
190
191/*
192 * Iterator over all subpages withing the maximally aligned gigantic
193 * page 'base'. Handle any discontiguity in the mem_map.
194 */
195static inline struct page *mem_map_next(struct page *iter,
196 struct page *base, int offset)
197{
198 if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) {
199 unsigned long pfn = page_to_pfn(base) + offset;
200 if (!pfn_valid(pfn))
201 return NULL;
202 return pfn_to_page(pfn);
203 }
204 return iter + 1;
205}
206
207/*
179 * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, 208 * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
180 * so all functions starting at paging_init should be marked __init 209 * so all functions starting at paging_init should be marked __init
181 * in those cases. SPARSEMEM, however, allows for memory hotplug, 210 * in those cases. SPARSEMEM, however, allows for memory hotplug,
diff --git a/mm/memory.c b/mm/memory.c
index 164951c47305..fc031d68327e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3049,3 +3049,18 @@ void print_vma_addr(char *prefix, unsigned long ip)
3049 } 3049 }
3050 up_read(&current->mm->mmap_sem); 3050 up_read(&current->mm->mmap_sem);
3051} 3051}
3052
3053#ifdef CONFIG_PROVE_LOCKING
3054void might_fault(void)
3055{
3056 might_sleep();
3057 /*
3058 * it would be nicer only to annotate paths which are not under
3059 * pagefault_disable, however that requires a larger audit and
3060 * providing helpers like get_user_atomic.
3061 */
3062 if (!in_atomic() && current->mm)
3063 might_lock_read(&current->mm->mmap_sem);
3064}
3065EXPORT_SYMBOL(might_fault);
3066#endif
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6837a1014372..b5b2b15085a8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -22,7 +22,6 @@
22#include <linux/highmem.h> 22#include <linux/highmem.h>
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <linux/ioport.h> 24#include <linux/ioport.h>
25#include <linux/cpuset.h>
26#include <linux/delay.h> 25#include <linux/delay.h>
27#include <linux/migrate.h> 26#include <linux/migrate.h>
28#include <linux/page-isolation.h> 27#include <linux/page-isolation.h>
@@ -498,8 +497,6 @@ int add_memory(int nid, u64 start, u64 size)
498 /* we online node here. we can't roll back from here. */ 497 /* we online node here. we can't roll back from here. */
499 node_set_online(nid); 498 node_set_online(nid);
500 499
501 cpuset_track_online_nodes();
502
503 if (new_pgdat) { 500 if (new_pgdat) {
504 ret = register_one_node(nid); 501 ret = register_one_node(nid);
505 /* 502 /*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 36f42573a335..e9493b1c1117 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -489,12 +489,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
489 int err; 489 int err;
490 struct vm_area_struct *first, *vma, *prev; 490 struct vm_area_struct *first, *vma, *prev;
491 491
492 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
493
494 err = migrate_prep();
495 if (err)
496 return ERR_PTR(err);
497 }
498 492
499 first = find_vma(mm, start); 493 first = find_vma(mm, start);
500 if (!first) 494 if (!first)
@@ -809,9 +803,13 @@ int do_migrate_pages(struct mm_struct *mm,
809 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) 803 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
810{ 804{
811 int busy = 0; 805 int busy = 0;
812 int err = 0; 806 int err;
813 nodemask_t tmp; 807 nodemask_t tmp;
814 808
809 err = migrate_prep();
810 if (err)
811 return err;
812
815 down_read(&mm->mmap_sem); 813 down_read(&mm->mmap_sem);
816 814
817 err = migrate_vmas(mm, from_nodes, to_nodes, flags); 815 err = migrate_vmas(mm, from_nodes, to_nodes, flags);
@@ -974,6 +972,12 @@ static long do_mbind(unsigned long start, unsigned long len,
974 start, start + len, mode, mode_flags, 972 start, start + len, mode, mode_flags,
975 nmask ? nodes_addr(*nmask)[0] : -1); 973 nmask ? nodes_addr(*nmask)[0] : -1);
976 974
975 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
976
977 err = migrate_prep();
978 if (err)
979 return err;
980 }
977 down_write(&mm->mmap_sem); 981 down_write(&mm->mmap_sem);
978 vma = check_range(mm, start, end, nmask, 982 vma = check_range(mm, start, end, nmask,
979 flags | MPOL_MF_INVERT, &pagelist); 983 flags | MPOL_MF_INVERT, &pagelist);
diff --git a/mm/migrate.c b/mm/migrate.c
index 6602941bfab0..1e0d6b237f44 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -522,15 +522,12 @@ static int writeout(struct address_space *mapping, struct page *page)
522 remove_migration_ptes(page, page); 522 remove_migration_ptes(page, page);
523 523
524 rc = mapping->a_ops->writepage(page, &wbc); 524 rc = mapping->a_ops->writepage(page, &wbc);
525 if (rc < 0)
526 /* I/O Error writing */
527 return -EIO;
528 525
529 if (rc != AOP_WRITEPAGE_ACTIVATE) 526 if (rc != AOP_WRITEPAGE_ACTIVATE)
530 /* unlocked. Relock */ 527 /* unlocked. Relock */
531 lock_page(page); 528 lock_page(page);
532 529
533 return -EAGAIN; 530 return (rc < 0) ? -EIO : -EAGAIN;
534} 531}
535 532
536/* 533/*
@@ -841,12 +838,12 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
841 struct page_to_node *pp; 838 struct page_to_node *pp;
842 LIST_HEAD(pagelist); 839 LIST_HEAD(pagelist);
843 840
841 migrate_prep();
844 down_read(&mm->mmap_sem); 842 down_read(&mm->mmap_sem);
845 843
846 /* 844 /*
847 * Build a list of pages to migrate 845 * Build a list of pages to migrate
848 */ 846 */
849 migrate_prep();
850 for (pp = pm; pp->node != MAX_NUMNODES; pp++) { 847 for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
851 struct vm_area_struct *vma; 848 struct vm_area_struct *vma;
852 struct page *page; 849 struct page *page;
diff --git a/mm/mlock.c b/mm/mlock.c
index 008ea70b7afa..1ada366570cb 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -66,14 +66,10 @@ void __clear_page_mlock(struct page *page)
66 putback_lru_page(page); 66 putback_lru_page(page);
67 } else { 67 } else {
68 /* 68 /*
69 * Page not on the LRU yet. Flush all pagevecs and retry. 69 * We lost the race. the page already moved to evictable list.
70 */ 70 */
71 lru_add_drain_all(); 71 if (PageUnevictable(page))
72 if (!isolate_lru_page(page))
73 putback_lru_page(page);
74 else if (PageUnevictable(page))
75 count_vm_event(UNEVICTABLE_PGSTRANDED); 72 count_vm_event(UNEVICTABLE_PGSTRANDED);
76
77 } 73 }
78} 74}
79 75
@@ -166,7 +162,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
166 unsigned long addr = start; 162 unsigned long addr = start;
167 struct page *pages[16]; /* 16 gives a reasonable batch */ 163 struct page *pages[16]; /* 16 gives a reasonable batch */
168 int nr_pages = (end - start) / PAGE_SIZE; 164 int nr_pages = (end - start) / PAGE_SIZE;
169 int ret; 165 int ret = 0;
170 int gup_flags = 0; 166 int gup_flags = 0;
171 167
172 VM_BUG_ON(start & ~PAGE_MASK); 168 VM_BUG_ON(start & ~PAGE_MASK);
@@ -187,8 +183,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
187 if (vma->vm_flags & VM_WRITE) 183 if (vma->vm_flags & VM_WRITE)
188 gup_flags |= GUP_FLAGS_WRITE; 184 gup_flags |= GUP_FLAGS_WRITE;
189 185
190 lru_add_drain_all(); /* push cached pages to LRU */
191
192 while (nr_pages > 0) { 186 while (nr_pages > 0) {
193 int i; 187 int i;
194 188
@@ -251,8 +245,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
251 ret = 0; 245 ret = 0;
252 } 246 }
253 247
254 lru_add_drain_all(); /* to update stats */
255
256 return ret; /* count entire vma as locked_vm */ 248 return ret; /* count entire vma as locked_vm */
257} 249}
258 250
@@ -546,6 +538,8 @@ asmlinkage long sys_mlock(unsigned long start, size_t len)
546 if (!can_do_mlock()) 538 if (!can_do_mlock())
547 return -EPERM; 539 return -EPERM;
548 540
541 lru_add_drain_all(); /* flush pagevec */
542
549 down_write(&current->mm->mmap_sem); 543 down_write(&current->mm->mmap_sem);
550 len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); 544 len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
551 start &= PAGE_MASK; 545 start &= PAGE_MASK;
@@ -612,6 +606,8 @@ asmlinkage long sys_mlockall(int flags)
612 if (!can_do_mlock()) 606 if (!can_do_mlock())
613 goto out; 607 goto out;
614 608
609 lru_add_drain_all(); /* flush pagevec */
610
615 down_write(&current->mm->mmap_sem); 611 down_write(&current->mm->mmap_sem);
616 612
617 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; 613 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
diff --git a/mm/mmap.c b/mm/mmap.c
index de14ac21e5b5..d4855a682ab6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1704,7 +1704,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
1704 vma = find_vma_prev(mm, addr, &prev); 1704 vma = find_vma_prev(mm, addr, &prev);
1705 if (vma && (vma->vm_start <= addr)) 1705 if (vma && (vma->vm_start <= addr))
1706 return vma; 1706 return vma;
1707 if (expand_stack(prev, addr)) 1707 if (!prev || expand_stack(prev, addr))
1708 return NULL; 1708 return NULL;
1709 if (prev->vm_flags & VM_LOCKED) { 1709 if (prev->vm_flags & VM_LOCKED) {
1710 if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0) 1710 if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 64e5b4bcd964..a0a01902f551 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -38,7 +38,6 @@ static DEFINE_SPINLOCK(zone_scan_mutex);
38 * badness - calculate a numeric value for how bad this task has been 38 * badness - calculate a numeric value for how bad this task has been
39 * @p: task struct of which task we should calculate 39 * @p: task struct of which task we should calculate
40 * @uptime: current uptime in seconds 40 * @uptime: current uptime in seconds
41 * @mem: target memory controller
42 * 41 *
43 * The formula used is relatively simple and documented inline in the 42 * The formula used is relatively simple and documented inline in the
44 * function. The main rationale is that we want to select a good task 43 * function. The main rationale is that we want to select a good task
@@ -295,6 +294,8 @@ static void dump_tasks(const struct mem_cgroup *mem)
295 continue; 294 continue;
296 if (mem && !task_in_mem_cgroup(p, mem)) 295 if (mem && !task_in_mem_cgroup(p, mem))
297 continue; 296 continue;
297 if (!thread_group_leader(p))
298 continue;
298 299
299 task_lock(p); 300 task_lock(p);
300 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", 301 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d0a240fbb8bf..d8ac01474563 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -263,24 +263,39 @@ void prep_compound_page(struct page *page, unsigned long order)
263{ 263{
264 int i; 264 int i;
265 int nr_pages = 1 << order; 265 int nr_pages = 1 << order;
266
267 set_compound_page_dtor(page, free_compound_page);
268 set_compound_order(page, order);
269 __SetPageHead(page);
270 for (i = 1; i < nr_pages; i++) {
271 struct page *p = page + i;
272
273 __SetPageTail(p);
274 p->first_page = page;
275 }
276}
277
278#ifdef CONFIG_HUGETLBFS
279void prep_compound_gigantic_page(struct page *page, unsigned long order)
280{
281 int i;
282 int nr_pages = 1 << order;
266 struct page *p = page + 1; 283 struct page *p = page + 1;
267 284
268 set_compound_page_dtor(page, free_compound_page); 285 set_compound_page_dtor(page, free_compound_page);
269 set_compound_order(page, order); 286 set_compound_order(page, order);
270 __SetPageHead(page); 287 __SetPageHead(page);
271 for (i = 1; i < nr_pages; i++, p++) { 288 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
272 if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0))
273 p = pfn_to_page(page_to_pfn(page) + i);
274 __SetPageTail(p); 289 __SetPageTail(p);
275 p->first_page = page; 290 p->first_page = page;
276 } 291 }
277} 292}
293#endif
278 294
279static void destroy_compound_page(struct page *page, unsigned long order) 295static void destroy_compound_page(struct page *page, unsigned long order)
280{ 296{
281 int i; 297 int i;
282 int nr_pages = 1 << order; 298 int nr_pages = 1 << order;
283 struct page *p = page + 1;
284 299
285 if (unlikely(compound_order(page) != order)) 300 if (unlikely(compound_order(page) != order))
286 bad_page(page); 301 bad_page(page);
@@ -288,9 +303,8 @@ static void destroy_compound_page(struct page *page, unsigned long order)
288 if (unlikely(!PageHead(page))) 303 if (unlikely(!PageHead(page)))
289 bad_page(page); 304 bad_page(page);
290 __ClearPageHead(page); 305 __ClearPageHead(page);
291 for (i = 1; i < nr_pages; i++, p++) { 306 for (i = 1; i < nr_pages; i++) {
292 if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) 307 struct page *p = page + i;
293 p = pfn_to_page(page_to_pfn(page) + i);
294 308
295 if (unlikely(!PageTail(p) | 309 if (unlikely(!PageTail(p) |
296 (p->first_page != page))) 310 (p->first_page != page)))
@@ -1547,6 +1561,10 @@ nofail_alloc:
1547 1561
1548 /* We now go into synchronous reclaim */ 1562 /* We now go into synchronous reclaim */
1549 cpuset_memory_pressure_bump(); 1563 cpuset_memory_pressure_bump();
1564 /*
1565 * The task's cpuset might have expanded its set of allowable nodes
1566 */
1567 cpuset_update_task_memory_state();
1550 p->flags |= PF_MEMALLOC; 1568 p->flags |= PF_MEMALLOC;
1551 reclaim_state.reclaimed_slab = 0; 1569 reclaim_state.reclaimed_slab = 0;
1552 p->reclaim_state = &reclaim_state; 1570 p->reclaim_state = &reclaim_state;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index f59d797dc5a9..1223d927904d 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -165,7 +165,7 @@ int online_page_cgroup(unsigned long start_pfn,
165 unsigned long start, end, pfn; 165 unsigned long start, end, pfn;
166 int fail = 0; 166 int fail = 0;
167 167
168 start = start_pfn & (PAGES_PER_SECTION - 1); 168 start = start_pfn & ~(PAGES_PER_SECTION - 1);
169 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); 169 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
170 170
171 for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { 171 for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
@@ -188,7 +188,7 @@ int offline_page_cgroup(unsigned long start_pfn,
188{ 188{
189 unsigned long start, end, pfn; 189 unsigned long start, end, pfn;
190 190
191 start = start_pfn & (PAGES_PER_SECTION - 1); 191 start = start_pfn & ~(PAGES_PER_SECTION - 1);
192 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); 192 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
193 193
194 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 194 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index b70a7fec1ff6..5e0ffd967452 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -130,10 +130,11 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
130 if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 130 if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
131 break; 131 break;
132 } 132 }
133 if (pfn < end_pfn) 133 page = __first_valid_page(start_pfn, end_pfn - start_pfn);
134 if ((pfn < end_pfn) || !page)
134 return -EBUSY; 135 return -EBUSY;
135 /* Check all pages are free or Marked as ISOLATED */ 136 /* Check all pages are free or Marked as ISOLATED */
136 zone = page_zone(pfn_to_page(pfn)); 137 zone = page_zone(page);
137 spin_lock_irqsave(&zone->lock, flags); 138 spin_lock_irqsave(&zone->lock, flags);
138 ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn); 139 ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn);
139 spin_unlock_irqrestore(&zone->lock, flags); 140 spin_unlock_irqrestore(&zone->lock, flags);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index a91b5f8fcaf6..a13ea6401ae7 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -64,7 +64,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
64 unsigned long pfn = pte_pfn(*pte); 64 unsigned long pfn = pte_pfn(*pte);
65 int actual_node = early_pfn_to_nid(pfn); 65 int actual_node = early_pfn_to_nid(pfn);
66 66
67 if (actual_node != node) 67 if (node_distance(actual_node, node) > LOCAL_DISTANCE)
68 printk(KERN_WARNING "[%lx-%lx] potential offnode " 68 printk(KERN_WARNING "[%lx-%lx] potential offnode "
69 "page_structs\n", start, end - 1); 69 "page_structs\n", start, end - 1);
70} 70}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f1cc03bbf6ac..30f826d484f0 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -178,7 +178,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end,
178static inline int is_vmalloc_or_module_addr(const void *x) 178static inline int is_vmalloc_or_module_addr(const void *x)
179{ 179{
180 /* 180 /*
181 * x86-64 and sparc64 put modules in a special place, 181 * ARM, x86-64 and sparc64 put modules in a special place,
182 * and fall back on vmalloc() if that fails. Others 182 * and fall back on vmalloc() if that fails. Others
183 * just put it in the vmalloc space. 183 * just put it in the vmalloc space.
184 */ 184 */
@@ -324,14 +324,14 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
324 324
325 BUG_ON(size & ~PAGE_MASK); 325 BUG_ON(size & ~PAGE_MASK);
326 326
327 addr = ALIGN(vstart, align);
328
329 va = kmalloc_node(sizeof(struct vmap_area), 327 va = kmalloc_node(sizeof(struct vmap_area),
330 gfp_mask & GFP_RECLAIM_MASK, node); 328 gfp_mask & GFP_RECLAIM_MASK, node);
331 if (unlikely(!va)) 329 if (unlikely(!va))
332 return ERR_PTR(-ENOMEM); 330 return ERR_PTR(-ENOMEM);
333 331
334retry: 332retry:
333 addr = ALIGN(vstart, align);
334
335 spin_lock(&vmap_area_lock); 335 spin_lock(&vmap_area_lock);
336 /* XXX: could have a last_hole cache */ 336 /* XXX: could have a last_hole cache */
337 n = vmap_area_root.rb_node; 337 n = vmap_area_root.rb_node;
@@ -362,7 +362,7 @@ retry:
362 goto found; 362 goto found;
363 } 363 }
364 364
365 while (addr + size >= first->va_start && addr + size <= vend) { 365 while (addr + size > first->va_start && addr + size <= vend) {
366 addr = ALIGN(first->va_end + PAGE_SIZE, align); 366 addr = ALIGN(first->va_end + PAGE_SIZE, align);
367 367
368 n = rb_next(&first->rb_node); 368 n = rb_next(&first->rb_node);
@@ -522,13 +522,24 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
522} 522}
523 523
524/* 524/*
525 * Kick off a purge of the outstanding lazy areas. Don't bother if somebody
526 * is already purging.
527 */
528static void try_purge_vmap_area_lazy(void)
529{
530 unsigned long start = ULONG_MAX, end = 0;
531
532 __purge_vmap_area_lazy(&start, &end, 0, 0);
533}
534
535/*
525 * Kick off a purge of the outstanding lazy areas. 536 * Kick off a purge of the outstanding lazy areas.
526 */ 537 */
527static void purge_vmap_area_lazy(void) 538static void purge_vmap_area_lazy(void)
528{ 539{
529 unsigned long start = ULONG_MAX, end = 0; 540 unsigned long start = ULONG_MAX, end = 0;
530 541
531 __purge_vmap_area_lazy(&start, &end, 0, 0); 542 __purge_vmap_area_lazy(&start, &end, 1, 0);
532} 543}
533 544
534/* 545/*
@@ -539,7 +550,7 @@ static void free_unmap_vmap_area(struct vmap_area *va)
539 va->flags |= VM_LAZY_FREE; 550 va->flags |= VM_LAZY_FREE;
540 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); 551 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
541 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) 552 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
542 purge_vmap_area_lazy(); 553 try_purge_vmap_area_lazy();
543} 554}
544 555
545static struct vmap_area *find_vmap_area(unsigned long addr) 556static struct vmap_area *find_vmap_area(unsigned long addr)
@@ -592,6 +603,8 @@ static void free_unmap_vmap_area_addr(unsigned long addr)
592 603
593#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) 604#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
594 605
606static bool vmap_initialized __read_mostly = false;
607
595struct vmap_block_queue { 608struct vmap_block_queue {
596 spinlock_t lock; 609 spinlock_t lock;
597 struct list_head free; 610 struct list_head free;
@@ -828,6 +841,9 @@ void vm_unmap_aliases(void)
828 int cpu; 841 int cpu;
829 int flush = 0; 842 int flush = 0;
830 843
844 if (unlikely(!vmap_initialized))
845 return;
846
831 for_each_possible_cpu(cpu) { 847 for_each_possible_cpu(cpu) {
832 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); 848 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
833 struct vmap_block *vb; 849 struct vmap_block *vb;
@@ -942,6 +958,8 @@ void __init vmalloc_init(void)
942 INIT_LIST_HEAD(&vbq->dirty); 958 INIT_LIST_HEAD(&vbq->dirty);
943 vbq->nr_dirty = 0; 959 vbq->nr_dirty = 0;
944 } 960 }
961
962 vmap_initialized = true;
945} 963}
946 964
947void unmap_kernel_range(unsigned long addr, unsigned long size) 965void unmap_kernel_range(unsigned long addr, unsigned long size)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3b5860294bb6..7ea1440b53db 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -623,6 +623,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
623 * Try to allocate it some swap space here. 623 * Try to allocate it some swap space here.
624 */ 624 */
625 if (PageAnon(page) && !PageSwapCache(page)) { 625 if (PageAnon(page) && !PageSwapCache(page)) {
626 if (!(sc->gfp_mask & __GFP_IO))
627 goto keep_locked;
626 switch (try_to_munlock(page)) { 628 switch (try_to_munlock(page)) {
627 case SWAP_FAIL: /* shouldn't happen */ 629 case SWAP_FAIL: /* shouldn't happen */
628 case SWAP_AGAIN: 630 case SWAP_AGAIN:
@@ -634,6 +636,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
634 } 636 }
635 if (!add_to_swap(page, GFP_ATOMIC)) 637 if (!add_to_swap(page, GFP_ATOMIC))
636 goto activate_locked; 638 goto activate_locked;
639 may_enter_fs = 1;
637 } 640 }
638#endif /* CONFIG_SWAP */ 641#endif /* CONFIG_SWAP */
639 642
@@ -1386,9 +1389,9 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1386 file_prio = 200 - sc->swappiness; 1389 file_prio = 200 - sc->swappiness;
1387 1390
1388 /* 1391 /*
1389 * anon recent_rotated[0] 1392 * The amount of pressure on anon vs file pages is inversely
1390 * %anon = 100 * ----------- / ----------------- * IO cost 1393 * proportional to the fraction of recently scanned pages on
1391 * anon + file rotate_sum 1394 * each list that were recently referenced and in active use.
1392 */ 1395 */
1393 ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); 1396 ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1);
1394 ap /= zone->recent_rotated[0] + 1; 1397 ap /= zone->recent_rotated[0] + 1;
@@ -2368,39 +2371,6 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
2368 return 1; 2371 return 1;
2369} 2372}
2370 2373
2371static void show_page_path(struct page *page)
2372{
2373 char buf[256];
2374 if (page_is_file_cache(page)) {
2375 struct address_space *mapping = page->mapping;
2376 struct dentry *dentry;
2377 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
2378
2379 spin_lock(&mapping->i_mmap_lock);
2380 dentry = d_find_alias(mapping->host);
2381 printk(KERN_INFO "rescued: %s %lu\n",
2382 dentry_path(dentry, buf, 256), pgoff);
2383 spin_unlock(&mapping->i_mmap_lock);
2384 } else {
2385#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU)
2386 struct anon_vma *anon_vma;
2387 struct vm_area_struct *vma;
2388
2389 anon_vma = page_lock_anon_vma(page);
2390 if (!anon_vma)
2391 return;
2392
2393 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
2394 printk(KERN_INFO "rescued: anon %s\n",
2395 vma->vm_mm->owner->comm);
2396 break;
2397 }
2398 page_unlock_anon_vma(anon_vma);
2399#endif
2400 }
2401}
2402
2403
2404/** 2374/**
2405 * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list 2375 * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
2406 * @page: page to check evictability and move to appropriate lru list 2376 * @page: page to check evictability and move to appropriate lru list
@@ -2421,8 +2391,6 @@ retry:
2421 if (page_evictable(page, NULL)) { 2391 if (page_evictable(page, NULL)) {
2422 enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); 2392 enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
2423 2393
2424 show_page_path(page);
2425
2426 __dec_zone_state(zone, NR_UNEVICTABLE); 2394 __dec_zone_state(zone, NR_UNEVICTABLE);
2427 list_move(&page->lru, &zone->lru[l].list); 2395 list_move(&page->lru, &zone->lru[l].list);
2428 __inc_zone_state(zone, NR_INACTIVE_ANON + l); 2396 __inc_zone_state(zone, NR_INACTIVE_ANON + l);