aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c2
-rw-r--r--mm/fremap.c7
-rw-r--r--mm/hugetlb.c24
-rw-r--r--mm/ksm.c6
-rw-r--r--mm/memcontrol.c10
-rw-r--r--mm/memory-failure.c12
-rw-r--r--mm/mmap.c1
-rw-r--r--mm/oom_kill.c49
-rw-r--r--mm/page_alloc.c4
-rw-r--r--mm/percpu.c2
-rw-r--r--mm/rmap.c23
-rw-r--r--mm/vmscan.c43
12 files changed, 118 insertions, 65 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index c2bf86f470ed..65d420499a61 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -30,6 +30,7 @@ EXPORT_SYMBOL_GPL(default_backing_dev_info);
30 30
31struct backing_dev_info noop_backing_dev_info = { 31struct backing_dev_info noop_backing_dev_info = {
32 .name = "noop", 32 .name = "noop",
33 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
33}; 34};
34EXPORT_SYMBOL_GPL(noop_backing_dev_info); 35EXPORT_SYMBOL_GPL(noop_backing_dev_info);
35 36
@@ -243,6 +244,7 @@ static int __init default_bdi_init(void)
243 err = bdi_init(&default_backing_dev_info); 244 err = bdi_init(&default_backing_dev_info);
244 if (!err) 245 if (!err)
245 bdi_register(&default_backing_dev_info, NULL, "default"); 246 bdi_register(&default_backing_dev_info, NULL, "default");
247 err = bdi_init(&noop_backing_dev_info);
246 248
247 return err; 249 return err;
248} 250}
diff --git a/mm/fremap.c b/mm/fremap.c
index 46f5dacf90a2..ec520c7b28df 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -125,7 +125,6 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
125{ 125{
126 struct mm_struct *mm = current->mm; 126 struct mm_struct *mm = current->mm;
127 struct address_space *mapping; 127 struct address_space *mapping;
128 unsigned long end = start + size;
129 struct vm_area_struct *vma; 128 struct vm_area_struct *vma;
130 int err = -EINVAL; 129 int err = -EINVAL;
131 int has_write_lock = 0; 130 int has_write_lock = 0;
@@ -142,6 +141,10 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
142 if (start + size <= start) 141 if (start + size <= start)
143 return err; 142 return err;
144 143
144 /* Does pgoff wrap? */
145 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
146 return err;
147
145 /* Can we represent this offset inside this architecture's pte's? */ 148 /* Can we represent this offset inside this architecture's pte's? */
146#if PTE_FILE_MAX_BITS < BITS_PER_LONG 149#if PTE_FILE_MAX_BITS < BITS_PER_LONG
147 if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS)) 150 if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS))
@@ -168,7 +171,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
168 if (!(vma->vm_flags & VM_CAN_NONLINEAR)) 171 if (!(vma->vm_flags & VM_CAN_NONLINEAR))
169 goto out; 172 goto out;
170 173
171 if (end <= start || start < vma->vm_start || end > vma->vm_end) 174 if (start < vma->vm_start || start + size > vma->vm_end)
172 goto out; 175 goto out;
173 176
174 /* Must set VM_NONLINEAR before any pages are populated. */ 177 /* Must set VM_NONLINEAR before any pages are populated. */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index cc5be788a39f..c03273807182 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2324,11 +2324,8 @@ retry_avoidcopy:
2324 * and just make the page writable */ 2324 * and just make the page writable */
2325 avoidcopy = (page_mapcount(old_page) == 1); 2325 avoidcopy = (page_mapcount(old_page) == 1);
2326 if (avoidcopy) { 2326 if (avoidcopy) {
2327 if (!trylock_page(old_page)) { 2327 if (PageAnon(old_page))
2328 if (PageAnon(old_page)) 2328 page_move_anon_rmap(old_page, vma, address);
2329 page_move_anon_rmap(old_page, vma, address);
2330 } else
2331 unlock_page(old_page);
2332 set_huge_ptep_writable(vma, address, ptep); 2329 set_huge_ptep_writable(vma, address, ptep);
2333 return 0; 2330 return 0;
2334 } 2331 }
@@ -2404,7 +2401,7 @@ retry_avoidcopy:
2404 set_huge_pte_at(mm, address, ptep, 2401 set_huge_pte_at(mm, address, ptep,
2405 make_huge_pte(vma, new_page, 1)); 2402 make_huge_pte(vma, new_page, 1));
2406 page_remove_rmap(old_page); 2403 page_remove_rmap(old_page);
2407 hugepage_add_anon_rmap(new_page, vma, address); 2404 hugepage_add_new_anon_rmap(new_page, vma, address);
2408 /* Make the old page be freed below */ 2405 /* Make the old page be freed below */
2409 new_page = old_page; 2406 new_page = old_page;
2410 mmu_notifier_invalidate_range_end(mm, 2407 mmu_notifier_invalidate_range_end(mm,
@@ -2631,10 +2628,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2631 vma, address); 2628 vma, address);
2632 } 2629 }
2633 2630
2634 if (!pagecache_page) { 2631 /*
2635 page = pte_page(entry); 2632 * hugetlb_cow() requires page locks of pte_page(entry) and
2633 * pagecache_page, so here we need take the former one
2634 * when page != pagecache_page or !pagecache_page.
2635 * Note that locking order is always pagecache_page -> page,
2636 * so no worry about deadlock.
2637 */
2638 page = pte_page(entry);
2639 if (page != pagecache_page)
2636 lock_page(page); 2640 lock_page(page);
2637 }
2638 2641
2639 spin_lock(&mm->page_table_lock); 2642 spin_lock(&mm->page_table_lock);
2640 /* Check for a racing update before calling hugetlb_cow */ 2643 /* Check for a racing update before calling hugetlb_cow */
@@ -2661,9 +2664,8 @@ out_page_table_lock:
2661 if (pagecache_page) { 2664 if (pagecache_page) {
2662 unlock_page(pagecache_page); 2665 unlock_page(pagecache_page);
2663 put_page(pagecache_page); 2666 put_page(pagecache_page);
2664 } else {
2665 unlock_page(page);
2666 } 2667 }
2668 unlock_page(page);
2667 2669
2668out_mutex: 2670out_mutex:
2669 mutex_unlock(&hugetlb_instantiation_mutex); 2671 mutex_unlock(&hugetlb_instantiation_mutex);
diff --git a/mm/ksm.c b/mm/ksm.c
index b1873cf03ed9..65ab5c7067d9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -712,7 +712,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
712 if (!ptep) 712 if (!ptep)
713 goto out; 713 goto out;
714 714
715 if (pte_write(*ptep)) { 715 if (pte_write(*ptep) || pte_dirty(*ptep)) {
716 pte_t entry; 716 pte_t entry;
717 717
718 swapped = PageSwapCache(page); 718 swapped = PageSwapCache(page);
@@ -735,7 +735,9 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
735 set_pte_at(mm, addr, ptep, entry); 735 set_pte_at(mm, addr, ptep, entry);
736 goto out_unlock; 736 goto out_unlock;
737 } 737 }
738 entry = pte_wrprotect(entry); 738 if (pte_dirty(entry))
739 set_page_dirty(page);
740 entry = pte_mkclean(pte_wrprotect(entry));
739 set_pte_at_notify(mm, addr, ptep, entry); 741 set_pte_at_notify(mm, addr, ptep, entry);
740 } 742 }
741 *orig_pte = *ptep; 743 *orig_pte = *ptep;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3eed583895a6..9be3cf8a5da4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3587,9 +3587,13 @@ unlock:
3587 3587
3588static void mem_cgroup_threshold(struct mem_cgroup *memcg) 3588static void mem_cgroup_threshold(struct mem_cgroup *memcg)
3589{ 3589{
3590 __mem_cgroup_threshold(memcg, false); 3590 while (memcg) {
3591 if (do_swap_account) 3591 __mem_cgroup_threshold(memcg, false);
3592 __mem_cgroup_threshold(memcg, true); 3592 if (do_swap_account)
3593 __mem_cgroup_threshold(memcg, true);
3594
3595 memcg = parent_mem_cgroup(memcg);
3596 }
3593} 3597}
3594 3598
3595static int compare_thresholds(const void *a, const void *b) 3599static int compare_thresholds(const void *a, const void *b)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 9c26eeca1342..757f6b0accfe 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(hwpoison_filter);
183 * signal. 183 * signal.
184 */ 184 */
185static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, 185static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
186 unsigned long pfn) 186 unsigned long pfn, struct page *page)
187{ 187{
188 struct siginfo si; 188 struct siginfo si;
189 int ret; 189 int ret;
@@ -198,7 +198,7 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
198#ifdef __ARCH_SI_TRAPNO 198#ifdef __ARCH_SI_TRAPNO
199 si.si_trapno = trapno; 199 si.si_trapno = trapno;
200#endif 200#endif
201 si.si_addr_lsb = PAGE_SHIFT; 201 si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
202 /* 202 /*
203 * Don't use force here, it's convenient if the signal 203 * Don't use force here, it's convenient if the signal
204 * can be temporarily blocked. 204 * can be temporarily blocked.
@@ -235,7 +235,7 @@ void shake_page(struct page *p, int access)
235 int nr; 235 int nr;
236 do { 236 do {
237 nr = shrink_slab(1000, GFP_KERNEL, 1000); 237 nr = shrink_slab(1000, GFP_KERNEL, 1000);
238 if (page_count(p) == 0) 238 if (page_count(p) == 1)
239 break; 239 break;
240 } while (nr > 10); 240 } while (nr > 10);
241 } 241 }
@@ -327,7 +327,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
327 * wrong earlier. 327 * wrong earlier.
328 */ 328 */
329static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, 329static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
330 int fail, unsigned long pfn) 330 int fail, struct page *page, unsigned long pfn)
331{ 331{
332 struct to_kill *tk, *next; 332 struct to_kill *tk, *next;
333 333
@@ -352,7 +352,7 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
352 * process anyways. 352 * process anyways.
353 */ 353 */
354 else if (kill_proc_ao(tk->tsk, tk->addr, trapno, 354 else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
355 pfn) < 0) 355 pfn, page) < 0)
356 printk(KERN_ERR 356 printk(KERN_ERR
357 "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", 357 "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
358 pfn, tk->tsk->comm, tk->tsk->pid); 358 pfn, tk->tsk->comm, tk->tsk->pid);
@@ -928,7 +928,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
928 * any accesses to the poisoned memory. 928 * any accesses to the poisoned memory.
929 */ 929 */
930 kill_procs_ao(&tokill, !!PageDirty(hpage), trapno, 930 kill_procs_ao(&tokill, !!PageDirty(hpage), trapno,
931 ret != SWAP_SUCCESS, pfn); 931 ret != SWAP_SUCCESS, p, pfn);
932 932
933 return ret; 933 return ret;
934} 934}
diff --git a/mm/mmap.c b/mm/mmap.c
index 6128dc8e5ede..00161a48a451 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2009,6 +2009,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
2009 removed_exe_file_vma(mm); 2009 removed_exe_file_vma(mm);
2010 fput(new->vm_file); 2010 fput(new->vm_file);
2011 } 2011 }
2012 unlink_anon_vmas(new);
2012 out_free_mpol: 2013 out_free_mpol:
2013 mpol_put(pol); 2014 mpol_put(pol);
2014 out_free_vma: 2015 out_free_vma:
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index fc81cb22869e..4029583a1024 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -121,8 +121,8 @@ struct task_struct *find_lock_task_mm(struct task_struct *p)
121} 121}
122 122
123/* return true if the task is not adequate as candidate victim task. */ 123/* return true if the task is not adequate as candidate victim task. */
124static bool oom_unkillable_task(struct task_struct *p, struct mem_cgroup *mem, 124static bool oom_unkillable_task(struct task_struct *p,
125 const nodemask_t *nodemask) 125 const struct mem_cgroup *mem, const nodemask_t *nodemask)
126{ 126{
127 if (is_global_init(p)) 127 if (is_global_init(p))
128 return true; 128 return true;
@@ -208,8 +208,13 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
208 */ 208 */
209 points += p->signal->oom_score_adj; 209 points += p->signal->oom_score_adj;
210 210
211 if (points < 0) 211 /*
212 return 0; 212 * Never return 0 for an eligible task that may be killed since it's
213 * possible that no single user task uses more than 0.1% of memory and
214 * no single admin tasks uses more than 3.0%.
215 */
216 if (points <= 0)
217 return 1;
213 return (points < 1000) ? points : 1000; 218 return (points < 1000) ? points : 1000;
214} 219}
215 220
@@ -339,26 +344,24 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
339/** 344/**
340 * dump_tasks - dump current memory state of all system tasks 345 * dump_tasks - dump current memory state of all system tasks
341 * @mem: current's memory controller, if constrained 346 * @mem: current's memory controller, if constrained
347 * @nodemask: nodemask passed to page allocator for mempolicy ooms
342 * 348 *
343 * Dumps the current memory state of all system tasks, excluding kernel threads. 349 * Dumps the current memory state of all eligible tasks. Tasks not in the same
350 * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
351 * are not shown.
344 * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj 352 * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj
345 * value, oom_score_adj value, and name. 353 * value, oom_score_adj value, and name.
346 * 354 *
347 * If the actual is non-NULL, only tasks that are a member of the mem_cgroup are
348 * shown.
349 *
350 * Call with tasklist_lock read-locked. 355 * Call with tasklist_lock read-locked.
351 */ 356 */
352static void dump_tasks(const struct mem_cgroup *mem) 357static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask)
353{ 358{
354 struct task_struct *p; 359 struct task_struct *p;
355 struct task_struct *task; 360 struct task_struct *task;
356 361
357 pr_info("[ pid ] uid tgid total_vm rss cpu oom_adj oom_score_adj name\n"); 362 pr_info("[ pid ] uid tgid total_vm rss cpu oom_adj oom_score_adj name\n");
358 for_each_process(p) { 363 for_each_process(p) {
359 if (p->flags & PF_KTHREAD) 364 if (oom_unkillable_task(p, mem, nodemask))
360 continue;
361 if (mem && !task_in_mem_cgroup(p, mem))
362 continue; 365 continue;
363 366
364 task = find_lock_task_mm(p); 367 task = find_lock_task_mm(p);
@@ -381,7 +384,7 @@ static void dump_tasks(const struct mem_cgroup *mem)
381} 384}
382 385
383static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, 386static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
384 struct mem_cgroup *mem) 387 struct mem_cgroup *mem, const nodemask_t *nodemask)
385{ 388{
386 task_lock(current); 389 task_lock(current);
387 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " 390 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
@@ -394,7 +397,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
394 mem_cgroup_print_oom_info(mem, p); 397 mem_cgroup_print_oom_info(mem, p);
395 show_mem(); 398 show_mem();
396 if (sysctl_oom_dump_tasks) 399 if (sysctl_oom_dump_tasks)
397 dump_tasks(mem); 400 dump_tasks(mem, nodemask);
398} 401}
399 402
400#define K(x) ((x) << (PAGE_SHIFT-10)) 403#define K(x) ((x) << (PAGE_SHIFT-10))
@@ -436,7 +439,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
436 unsigned int victim_points = 0; 439 unsigned int victim_points = 0;
437 440
438 if (printk_ratelimit()) 441 if (printk_ratelimit())
439 dump_header(p, gfp_mask, order, mem); 442 dump_header(p, gfp_mask, order, mem, nodemask);
440 443
441 /* 444 /*
442 * If the task is already exiting, don't alarm the sysadmin or kill 445 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -482,7 +485,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
482 * Determines whether the kernel must panic because of the panic_on_oom sysctl. 485 * Determines whether the kernel must panic because of the panic_on_oom sysctl.
483 */ 486 */
484static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, 487static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
485 int order) 488 int order, const nodemask_t *nodemask)
486{ 489{
487 if (likely(!sysctl_panic_on_oom)) 490 if (likely(!sysctl_panic_on_oom))
488 return; 491 return;
@@ -496,7 +499,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
496 return; 499 return;
497 } 500 }
498 read_lock(&tasklist_lock); 501 read_lock(&tasklist_lock);
499 dump_header(NULL, gfp_mask, order, NULL); 502 dump_header(NULL, gfp_mask, order, NULL, nodemask);
500 read_unlock(&tasklist_lock); 503 read_unlock(&tasklist_lock);
501 panic("Out of memory: %s panic_on_oom is enabled\n", 504 panic("Out of memory: %s panic_on_oom is enabled\n",
502 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); 505 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
@@ -509,7 +512,7 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
509 unsigned int points = 0; 512 unsigned int points = 0;
510 struct task_struct *p; 513 struct task_struct *p;
511 514
512 check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0); 515 check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL);
513 limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT; 516 limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT;
514 read_lock(&tasklist_lock); 517 read_lock(&tasklist_lock);
515retry: 518retry:
@@ -641,6 +644,7 @@ static void clear_system_oom(void)
641void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, 644void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
642 int order, nodemask_t *nodemask) 645 int order, nodemask_t *nodemask)
643{ 646{
647 const nodemask_t *mpol_mask;
644 struct task_struct *p; 648 struct task_struct *p;
645 unsigned long totalpages; 649 unsigned long totalpages;
646 unsigned long freed = 0; 650 unsigned long freed = 0;
@@ -670,7 +674,8 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
670 */ 674 */
671 constraint = constrained_alloc(zonelist, gfp_mask, nodemask, 675 constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
672 &totalpages); 676 &totalpages);
673 check_panic_on_oom(constraint, gfp_mask, order); 677 mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
678 check_panic_on_oom(constraint, gfp_mask, order, mpol_mask);
674 679
675 read_lock(&tasklist_lock); 680 read_lock(&tasklist_lock);
676 if (sysctl_oom_kill_allocating_task && 681 if (sysctl_oom_kill_allocating_task &&
@@ -688,15 +693,13 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
688 } 693 }
689 694
690retry: 695retry:
691 p = select_bad_process(&points, totalpages, NULL, 696 p = select_bad_process(&points, totalpages, NULL, mpol_mask);
692 constraint == CONSTRAINT_MEMORY_POLICY ? nodemask :
693 NULL);
694 if (PTR_ERR(p) == -1UL) 697 if (PTR_ERR(p) == -1UL)
695 goto out; 698 goto out;
696 699
697 /* Found nothing?!?! Either we hang forever, or we panic. */ 700 /* Found nothing?!?! Either we hang forever, or we panic. */
698 if (!p) { 701 if (!p) {
699 dump_header(NULL, gfp_mask, order, NULL); 702 dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
700 read_unlock(&tasklist_lock); 703 read_unlock(&tasklist_lock);
701 panic("Out of memory and no killable processes...\n"); 704 panic("Out of memory and no killable processes...\n");
702 } 705 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a8cfa9cc6e86..f12ad1836abe 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5182,9 +5182,9 @@ void *__init alloc_large_system_hash(const char *tablename,
5182 if (!table) 5182 if (!table)
5183 panic("Failed to allocate %s hash table\n", tablename); 5183 panic("Failed to allocate %s hash table\n", tablename);
5184 5184
5185 printk(KERN_INFO "%s hash table entries: %d (order: %d, %lu bytes)\n", 5185 printk(KERN_INFO "%s hash table entries: %ld (order: %d, %lu bytes)\n",
5186 tablename, 5186 tablename,
5187 (1U << log2qty), 5187 (1UL << log2qty),
5188 ilog2(size) - PAGE_SHIFT, 5188 ilog2(size) - PAGE_SHIFT,
5189 size); 5189 size);
5190 5190
diff --git a/mm/percpu.c b/mm/percpu.c
index 58c572b18b07..c76ef3891e0d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1401,9 +1401,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1401 1401
1402 if (pcpu_first_unit_cpu == NR_CPUS) 1402 if (pcpu_first_unit_cpu == NR_CPUS)
1403 pcpu_first_unit_cpu = cpu; 1403 pcpu_first_unit_cpu = cpu;
1404 pcpu_last_unit_cpu = cpu;
1404 } 1405 }
1405 } 1406 }
1406 pcpu_last_unit_cpu = cpu;
1407 pcpu_nr_units = unit; 1407 pcpu_nr_units = unit;
1408 1408
1409 for_each_possible_cpu(cpu) 1409 for_each_possible_cpu(cpu)
diff --git a/mm/rmap.c b/mm/rmap.c
index f6f0d2dda2ea..92e6757f196e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -381,7 +381,13 @@ vma_address(struct page *page, struct vm_area_struct *vma)
381unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) 381unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
382{ 382{
383 if (PageAnon(page)) { 383 if (PageAnon(page)) {
384 if (vma->anon_vma->root != page_anon_vma(page)->root) 384 struct anon_vma *page__anon_vma = page_anon_vma(page);
385 /*
386 * Note: swapoff's unuse_vma() is more efficient with this
387 * check, and needs it to match anon_vma when KSM is active.
388 */
389 if (!vma->anon_vma || !page__anon_vma ||
390 vma->anon_vma->root != page__anon_vma->root)
385 return -EFAULT; 391 return -EFAULT;
386 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { 392 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
387 if (!vma->vm_file || 393 if (!vma->vm_file ||
@@ -1564,13 +1570,14 @@ static void __hugepage_set_anon_rmap(struct page *page,
1564 struct vm_area_struct *vma, unsigned long address, int exclusive) 1570 struct vm_area_struct *vma, unsigned long address, int exclusive)
1565{ 1571{
1566 struct anon_vma *anon_vma = vma->anon_vma; 1572 struct anon_vma *anon_vma = vma->anon_vma;
1573
1567 BUG_ON(!anon_vma); 1574 BUG_ON(!anon_vma);
1568 if (!exclusive) { 1575
1569 struct anon_vma_chain *avc; 1576 if (PageAnon(page))
1570 avc = list_entry(vma->anon_vma_chain.prev, 1577 return;
1571 struct anon_vma_chain, same_vma); 1578 if (!exclusive)
1572 anon_vma = avc->anon_vma; 1579 anon_vma = anon_vma->root;
1573 } 1580
1574 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; 1581 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1575 page->mapping = (struct address_space *) anon_vma; 1582 page->mapping = (struct address_space *) anon_vma;
1576 page->index = linear_page_index(vma, address); 1583 page->index = linear_page_index(vma, address);
@@ -1581,6 +1588,8 @@ void hugepage_add_anon_rmap(struct page *page,
1581{ 1588{
1582 struct anon_vma *anon_vma = vma->anon_vma; 1589 struct anon_vma *anon_vma = vma->anon_vma;
1583 int first; 1590 int first;
1591
1592 BUG_ON(!PageLocked(page));
1584 BUG_ON(!anon_vma); 1593 BUG_ON(!anon_vma);
1585 BUG_ON(address < vma->vm_start || address >= vma->vm_end); 1594 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1586 first = atomic_inc_and_test(&page->_mapcount); 1595 first = atomic_inc_and_test(&page->_mapcount);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c391c320dbaf..c5dfabf25f11 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1804,12 +1804,11 @@ static void shrink_zone(int priority, struct zone *zone,
1804 * If a zone is deemed to be full of pinned pages then just give it a light 1804 * If a zone is deemed to be full of pinned pages then just give it a light
1805 * scan then give up on it. 1805 * scan then give up on it.
1806 */ 1806 */
1807static bool shrink_zones(int priority, struct zonelist *zonelist, 1807static void shrink_zones(int priority, struct zonelist *zonelist,
1808 struct scan_control *sc) 1808 struct scan_control *sc)
1809{ 1809{
1810 struct zoneref *z; 1810 struct zoneref *z;
1811 struct zone *zone; 1811 struct zone *zone;
1812 bool all_unreclaimable = true;
1813 1812
1814 for_each_zone_zonelist_nodemask(zone, z, zonelist, 1813 for_each_zone_zonelist_nodemask(zone, z, zonelist,
1815 gfp_zone(sc->gfp_mask), sc->nodemask) { 1814 gfp_zone(sc->gfp_mask), sc->nodemask) {
@@ -1827,8 +1826,38 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
1827 } 1826 }
1828 1827
1829 shrink_zone(priority, zone, sc); 1828 shrink_zone(priority, zone, sc);
1830 all_unreclaimable = false;
1831 } 1829 }
1830}
1831
1832static bool zone_reclaimable(struct zone *zone)
1833{
1834 return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
1835}
1836
1837/*
1838 * As hibernation is going on, kswapd is freezed so that it can't mark
1839 * the zone into all_unreclaimable. It can't handle OOM during hibernation.
1840 * So let's check zone's unreclaimable in direct reclaim as well as kswapd.
1841 */
1842static bool all_unreclaimable(struct zonelist *zonelist,
1843 struct scan_control *sc)
1844{
1845 struct zoneref *z;
1846 struct zone *zone;
1847 bool all_unreclaimable = true;
1848
1849 for_each_zone_zonelist_nodemask(zone, z, zonelist,
1850 gfp_zone(sc->gfp_mask), sc->nodemask) {
1851 if (!populated_zone(zone))
1852 continue;
1853 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1854 continue;
1855 if (zone_reclaimable(zone)) {
1856 all_unreclaimable = false;
1857 break;
1858 }
1859 }
1860
1832 return all_unreclaimable; 1861 return all_unreclaimable;
1833} 1862}
1834 1863
@@ -1852,7 +1881,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1852 struct scan_control *sc) 1881 struct scan_control *sc)
1853{ 1882{
1854 int priority; 1883 int priority;
1855 bool all_unreclaimable;
1856 unsigned long total_scanned = 0; 1884 unsigned long total_scanned = 0;
1857 struct reclaim_state *reclaim_state = current->reclaim_state; 1885 struct reclaim_state *reclaim_state = current->reclaim_state;
1858 struct zoneref *z; 1886 struct zoneref *z;
@@ -1869,7 +1897,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1869 sc->nr_scanned = 0; 1897 sc->nr_scanned = 0;
1870 if (!priority) 1898 if (!priority)
1871 disable_swap_token(); 1899 disable_swap_token();
1872 all_unreclaimable = shrink_zones(priority, zonelist, sc); 1900 shrink_zones(priority, zonelist, sc);
1873 /* 1901 /*
1874 * Don't shrink slabs when reclaiming memory from 1902 * Don't shrink slabs when reclaiming memory from
1875 * over limit cgroups 1903 * over limit cgroups
@@ -1931,7 +1959,7 @@ out:
1931 return sc->nr_reclaimed; 1959 return sc->nr_reclaimed;
1932 1960
1933 /* top priority shrink_zones still had more to do? don't OOM, then */ 1961 /* top priority shrink_zones still had more to do? don't OOM, then */
1934 if (scanning_global_lru(sc) && !all_unreclaimable) 1962 if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
1935 return 1; 1963 return 1;
1936 1964
1937 return 0; 1965 return 0;
@@ -2197,8 +2225,7 @@ loop_again:
2197 total_scanned += sc.nr_scanned; 2225 total_scanned += sc.nr_scanned;
2198 if (zone->all_unreclaimable) 2226 if (zone->all_unreclaimable)
2199 continue; 2227 continue;
2200 if (nr_slab == 0 && 2228 if (nr_slab == 0 && !zone_reclaimable(zone))
2201 zone->pages_scanned >= (zone_reclaimable_pages(zone) * 6))
2202 zone->all_unreclaimable = 1; 2229 zone->all_unreclaimable = 1;
2203 /* 2230 /*
2204 * If we've done a decent amount of scanning and 2231 * If we've done a decent amount of scanning and