aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJames Morris <jmorris@namei.org>2011-04-19 07:32:41 -0400
committerJames Morris <jmorris@namei.org>2011-04-19 07:32:41 -0400
commitd4ab4e6a23f805abb8fc3cc34525eec3788aeca1 (patch)
treeeefd82c155bc27469a85667d759cd90facf4a6e3 /mm
parentc0fa797ae6cd02ff87c0bfe0d509368a3b45640e (diff)
parent96fd2d57b8252e16dfacf8941f7a74a6119197f5 (diff)
Merge branch 'master'; commit 'v2.6.39-rc3' into next
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c2
-rw-r--r--mm/huge_memory.c49
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/hwpoison-inject.c2
-rw-r--r--mm/internal.h2
-rw-r--r--mm/kmemleak.c6
-rw-r--r--mm/ksm.c2
-rw-r--r--mm/memcontrol.c8
-rw-r--r--mm/memory-failure.c6
-rw-r--r--mm/memory.c28
-rw-r--r--mm/memory_hotplug.c4
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mlock.c13
-rw-r--r--mm/mmap.c15
-rw-r--r--mm/mremap.c11
-rw-r--r--mm/nobootmem.c2
-rw-r--r--mm/oom_kill.c28
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/page_cgroup.c2
-rw-r--r--mm/percpu.c10
-rw-r--r--mm/shmem.c6
-rw-r--r--mm/slab.c4
-rw-r--r--mm/slub.c8
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/util.c2
-rw-r--r--mm/vmscan.c28
-rw-r--r--mm/vmstat.c18
27 files changed, 146 insertions, 130 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0d9a036ada66..befc87531e4f 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -787,7 +787,7 @@ EXPORT_SYMBOL(congestion_wait);
787 * jiffies for either a BDI to exit congestion of the given @sync queue 787 * jiffies for either a BDI to exit congestion of the given @sync queue
788 * or a write to complete. 788 * or a write to complete.
789 * 789 *
790 * In the absense of zone congestion, cond_resched() is called to yield 790 * In the absence of zone congestion, cond_resched() is called to yield
791 * the processor if necessary but otherwise does not sleep. 791 * the processor if necessary but otherwise does not sleep.
792 * 792 *
793 * The return value is 0 if the sleep is for the full timeout. Otherwise, 793 * The return value is 0 if the sleep is for the full timeout. Otherwise,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0a619e0e2e0b..470dcda10add 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -244,24 +244,28 @@ static ssize_t single_flag_show(struct kobject *kobj,
244 struct kobj_attribute *attr, char *buf, 244 struct kobj_attribute *attr, char *buf,
245 enum transparent_hugepage_flag flag) 245 enum transparent_hugepage_flag flag)
246{ 246{
247 if (test_bit(flag, &transparent_hugepage_flags)) 247 return sprintf(buf, "%d\n",
248 return sprintf(buf, "[yes] no\n"); 248 !!test_bit(flag, &transparent_hugepage_flags));
249 else
250 return sprintf(buf, "yes [no]\n");
251} 249}
250
252static ssize_t single_flag_store(struct kobject *kobj, 251static ssize_t single_flag_store(struct kobject *kobj,
253 struct kobj_attribute *attr, 252 struct kobj_attribute *attr,
254 const char *buf, size_t count, 253 const char *buf, size_t count,
255 enum transparent_hugepage_flag flag) 254 enum transparent_hugepage_flag flag)
256{ 255{
257 if (!memcmp("yes", buf, 256 unsigned long value;
258 min(sizeof("yes")-1, count))) { 257 int ret;
258
259 ret = kstrtoul(buf, 10, &value);
260 if (ret < 0)
261 return ret;
262 if (value > 1)
263 return -EINVAL;
264
265 if (value)
259 set_bit(flag, &transparent_hugepage_flags); 266 set_bit(flag, &transparent_hugepage_flags);
260 } else if (!memcmp("no", buf, 267 else
261 min(sizeof("no")-1, count))) {
262 clear_bit(flag, &transparent_hugepage_flags); 268 clear_bit(flag, &transparent_hugepage_flags);
263 } else
264 return -EINVAL;
265 269
266 return count; 270 return count;
267} 271}
@@ -680,8 +684,11 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
680 return VM_FAULT_OOM; 684 return VM_FAULT_OOM;
681 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 685 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
682 vma, haddr, numa_node_id(), 0); 686 vma, haddr, numa_node_id(), 0);
683 if (unlikely(!page)) 687 if (unlikely(!page)) {
688 count_vm_event(THP_FAULT_FALLBACK);
684 goto out; 689 goto out;
690 }
691 count_vm_event(THP_FAULT_ALLOC);
685 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { 692 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
686 put_page(page); 693 put_page(page);
687 goto out; 694 goto out;
@@ -909,11 +916,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
909 new_page = NULL; 916 new_page = NULL;
910 917
911 if (unlikely(!new_page)) { 918 if (unlikely(!new_page)) {
919 count_vm_event(THP_FAULT_FALLBACK);
912 ret = do_huge_pmd_wp_page_fallback(mm, vma, address, 920 ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
913 pmd, orig_pmd, page, haddr); 921 pmd, orig_pmd, page, haddr);
914 put_page(page); 922 put_page(page);
915 goto out; 923 goto out;
916 } 924 }
925 count_vm_event(THP_FAULT_ALLOC);
917 926
918 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 927 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
919 put_page(new_page); 928 put_page(new_page);
@@ -1390,6 +1399,7 @@ int split_huge_page(struct page *page)
1390 1399
1391 BUG_ON(!PageSwapBacked(page)); 1400 BUG_ON(!PageSwapBacked(page));
1392 __split_huge_page(page, anon_vma); 1401 __split_huge_page(page, anon_vma);
1402 count_vm_event(THP_SPLIT);
1393 1403
1394 BUG_ON(PageCompound(page)); 1404 BUG_ON(PageCompound(page));
1395out_unlock: 1405out_unlock:
@@ -1784,9 +1794,11 @@ static void collapse_huge_page(struct mm_struct *mm,
1784 node, __GFP_OTHER_NODE); 1794 node, __GFP_OTHER_NODE);
1785 if (unlikely(!new_page)) { 1795 if (unlikely(!new_page)) {
1786 up_read(&mm->mmap_sem); 1796 up_read(&mm->mmap_sem);
1797 count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
1787 *hpage = ERR_PTR(-ENOMEM); 1798 *hpage = ERR_PTR(-ENOMEM);
1788 return; 1799 return;
1789 } 1800 }
1801 count_vm_event(THP_COLLAPSE_ALLOC);
1790 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 1802 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1791 up_read(&mm->mmap_sem); 1803 up_read(&mm->mmap_sem);
1792 put_page(new_page); 1804 put_page(new_page);
@@ -2151,8 +2163,11 @@ static void khugepaged_do_scan(struct page **hpage)
2151#ifndef CONFIG_NUMA 2163#ifndef CONFIG_NUMA
2152 if (!*hpage) { 2164 if (!*hpage) {
2153 *hpage = alloc_hugepage(khugepaged_defrag()); 2165 *hpage = alloc_hugepage(khugepaged_defrag());
2154 if (unlikely(!*hpage)) 2166 if (unlikely(!*hpage)) {
2167 count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
2155 break; 2168 break;
2169 }
2170 count_vm_event(THP_COLLAPSE_ALLOC);
2156 } 2171 }
2157#else 2172#else
2158 if (IS_ERR(*hpage)) 2173 if (IS_ERR(*hpage))
@@ -2192,8 +2207,11 @@ static struct page *khugepaged_alloc_hugepage(void)
2192 2207
2193 do { 2208 do {
2194 hpage = alloc_hugepage(khugepaged_defrag()); 2209 hpage = alloc_hugepage(khugepaged_defrag());
2195 if (!hpage) 2210 if (!hpage) {
2211 count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
2196 khugepaged_alloc_sleep(); 2212 khugepaged_alloc_sleep();
2213 } else
2214 count_vm_event(THP_COLLAPSE_ALLOC);
2197 } while (unlikely(!hpage) && 2215 } while (unlikely(!hpage) &&
2198 likely(khugepaged_enabled())); 2216 likely(khugepaged_enabled()));
2199 return hpage; 2217 return hpage;
@@ -2210,8 +2228,11 @@ static void khugepaged_loop(void)
2210 while (likely(khugepaged_enabled())) { 2228 while (likely(khugepaged_enabled())) {
2211#ifndef CONFIG_NUMA 2229#ifndef CONFIG_NUMA
2212 hpage = khugepaged_alloc_hugepage(); 2230 hpage = khugepaged_alloc_hugepage();
2213 if (unlikely(!hpage)) 2231 if (unlikely(!hpage)) {
2232 count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
2214 break; 2233 break;
2234 }
2235 count_vm_event(THP_COLLAPSE_ALLOC);
2215#else 2236#else
2216 if (IS_ERR(hpage)) { 2237 if (IS_ERR(hpage)) {
2217 khugepaged_alloc_sleep(); 2238 khugepaged_alloc_sleep();
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 06de5aa4d644..8ee3bd8ec5b5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -146,7 +146,7 @@ static long region_chg(struct list_head *head, long f, long t)
146 if (rg->from > t) 146 if (rg->from > t)
147 return chg; 147 return chg;
148 148
149 /* We overlap with this area, if it extends futher than 149 /* We overlap with this area, if it extends further than
150 * us then we must extend ourselves. Account for its 150 * us then we must extend ourselves. Account for its
151 * existing reservation. */ 151 * existing reservation. */
152 if (rg->to > t) { 152 if (rg->to > t) {
@@ -842,7 +842,7 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
842} 842}
843 843
844/* 844/*
845 * Increase the hugetlb pool such that it can accomodate a reservation 845 * Increase the hugetlb pool such that it can accommodate a reservation
846 * of size 'delta'. 846 * of size 'delta'.
847 */ 847 */
848static int gather_surplus_pages(struct hstate *h, int delta) 848static int gather_surplus_pages(struct hstate *h, int delta)
@@ -890,7 +890,7 @@ retry:
890 890
891 /* 891 /*
892 * The surplus_list now contains _at_least_ the number of extra pages 892 * The surplus_list now contains _at_least_ the number of extra pages
893 * needed to accomodate the reservation. Add the appropriate number 893 * needed to accommodate the reservation. Add the appropriate number
894 * of pages to the hugetlb pool and free the extras back to the buddy 894 * of pages to the hugetlb pool and free the extras back to the buddy
895 * allocator. Commit the entire reservation here to prevent another 895 * allocator. Commit the entire reservation here to prevent another
896 * process from stealing the pages as they are added to the pool but 896 * process from stealing the pages as they are added to the pool but
@@ -2043,7 +2043,7 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
2043 * This new VMA should share its siblings reservation map if present. 2043 * This new VMA should share its siblings reservation map if present.
2044 * The VMA will only ever have a valid reservation map pointer where 2044 * The VMA will only ever have a valid reservation map pointer where
2045 * it is being copied for another still existing VMA. As that VMA 2045 * it is being copied for another still existing VMA. As that VMA
2046 * has a reference to the reservation map it cannot dissappear until 2046 * has a reference to the reservation map it cannot disappear until
2047 * after this open call completes. It is therefore safe to take a 2047 * after this open call completes. It is therefore safe to take a
2048 * new reference here without additional locking. 2048 * new reference here without additional locking.
2049 */ 2049 */
@@ -2490,7 +2490,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2490 /* 2490 /*
2491 * Currently, we are forced to kill the process in the event the 2491 * Currently, we are forced to kill the process in the event the
2492 * original mapper has unmapped pages from the child due to a failed 2492 * original mapper has unmapped pages from the child due to a failed
2493 * COW. Warn that such a situation has occured as it may not be obvious 2493 * COW. Warn that such a situation has occurred as it may not be obvious
2494 */ 2494 */
2495 if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) { 2495 if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) {
2496 printk(KERN_WARNING 2496 printk(KERN_WARNING
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index 0948f1072d6b..c7fc7fd00e32 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -1,4 +1,4 @@
1/* Inject a hwpoison memory failure on a arbitary pfn */ 1/* Inject a hwpoison memory failure on a arbitrary pfn */
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/debugfs.h> 3#include <linux/debugfs.h>
4#include <linux/kernel.h> 4#include <linux/kernel.h>
diff --git a/mm/internal.h b/mm/internal.h
index 3438dd43a062..9d0ced8e505e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -162,7 +162,7 @@ static inline struct page *mem_map_offset(struct page *base, int offset)
162} 162}
163 163
164/* 164/*
165 * Iterator over all subpages withing the maximally aligned gigantic 165 * Iterator over all subpages within the maximally aligned gigantic
166 * page 'base'. Handle any discontiguity in the mem_map. 166 * page 'base'. Handle any discontiguity in the mem_map.
167 */ 167 */
168static inline struct page *mem_map_next(struct page *iter, 168static inline struct page *mem_map_next(struct page *iter,
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 84225f3b7190..c1d5867543e4 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -265,7 +265,7 @@ static void kmemleak_disable(void);
265} while (0) 265} while (0)
266 266
267/* 267/*
268 * Macro invoked when a serious kmemleak condition occured and cannot be 268 * Macro invoked when a serious kmemleak condition occurred and cannot be
269 * recovered from. Kmemleak will be disabled and further allocation/freeing 269 * recovered from. Kmemleak will be disabled and further allocation/freeing
270 * tracing no longer available. 270 * tracing no longer available.
271 */ 271 */
@@ -1006,7 +1006,7 @@ static bool update_checksum(struct kmemleak_object *object)
1006 1006
1007/* 1007/*
1008 * Memory scanning is a long process and it needs to be interruptable. This 1008 * Memory scanning is a long process and it needs to be interruptable. This
1009 * function checks whether such interrupt condition occured. 1009 * function checks whether such interrupt condition occurred.
1010 */ 1010 */
1011static int scan_should_stop(void) 1011static int scan_should_stop(void)
1012{ 1012{
@@ -1733,7 +1733,7 @@ static int __init kmemleak_late_init(void)
1733 1733
1734 if (atomic_read(&kmemleak_error)) { 1734 if (atomic_read(&kmemleak_error)) {
1735 /* 1735 /*
1736 * Some error occured and kmemleak was disabled. There is a 1736 * Some error occurred and kmemleak was disabled. There is a
1737 * small chance that kmemleak_disable() was called immediately 1737 * small chance that kmemleak_disable() was called immediately
1738 * after setting kmemleak_initialized and we may end up with 1738 * after setting kmemleak_initialized and we may end up with
1739 * two clean-up threads but serialized by scan_mutex. 1739 * two clean-up threads but serialized by scan_mutex.
diff --git a/mm/ksm.c b/mm/ksm.c
index 1bbe785aa559..942dfc73a2ff 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -720,7 +720,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
720 swapped = PageSwapCache(page); 720 swapped = PageSwapCache(page);
721 flush_cache_page(vma, addr, page_to_pfn(page)); 721 flush_cache_page(vma, addr, page_to_pfn(page));
722 /* 722 /*
723 * Ok this is tricky, when get_user_pages_fast() run it doesnt 723 * Ok this is tricky, when get_user_pages_fast() run it doesn't
724 * take any lock, therefore the check that we are going to make 724 * take any lock, therefore the check that we are going to make
725 * with the pagecount against the mapcount is racey and 725 * with the pagecount against the mapcount is racey and
726 * O_DIRECT can happen right after the check. 726 * O_DIRECT can happen right after the check.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1f0b460fe58c..010f9166fa6e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1466,7 +1466,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1466 break; 1466 break;
1467 } 1467 }
1468 /* 1468 /*
1469 * We want to do more targetted reclaim. 1469 * We want to do more targeted reclaim.
1470 * excess >> 2 is not to excessive so as to 1470 * excess >> 2 is not to excessive so as to
1471 * reclaim too much, nor too less that we keep 1471 * reclaim too much, nor too less that we keep
1472 * coming back to reclaim from this cgroup 1472 * coming back to reclaim from this cgroup
@@ -2265,7 +2265,7 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
2265 * - compound_lock is held when nr_pages > 1 2265 * - compound_lock is held when nr_pages > 1
2266 * 2266 *
2267 * This function doesn't do "charge" nor css_get to new cgroup. It should be 2267 * This function doesn't do "charge" nor css_get to new cgroup. It should be
2268 * done by a caller(__mem_cgroup_try_charge would be usefull). If @uncharge is 2268 * done by a caller(__mem_cgroup_try_charge would be useful). If @uncharge is
2269 * true, this function does "uncharge" from old cgroup, but it doesn't if 2269 * true, this function does "uncharge" from old cgroup, but it doesn't if
2270 * @uncharge is false, so a caller should do "uncharge". 2270 * @uncharge is false, so a caller should do "uncharge".
2271 */ 2271 */
@@ -2318,7 +2318,7 @@ static int mem_cgroup_move_account(struct page *page,
2318 * We charges against "to" which may not have any tasks. Then, "to" 2318 * We charges against "to" which may not have any tasks. Then, "to"
2319 * can be under rmdir(). But in current implementation, caller of 2319 * can be under rmdir(). But in current implementation, caller of
2320 * this function is just force_empty() and move charge, so it's 2320 * this function is just force_empty() and move charge, so it's
2321 * garanteed that "to" is never removed. So, we don't check rmdir 2321 * guaranteed that "to" is never removed. So, we don't check rmdir
2322 * status here. 2322 * status here.
2323 */ 2323 */
2324 move_unlock_page_cgroup(pc, &flags); 2324 move_unlock_page_cgroup(pc, &flags);
@@ -2648,7 +2648,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
2648 batch->memcg = mem; 2648 batch->memcg = mem;
2649 /* 2649 /*
2650 * do_batch > 0 when unmapping pages or inode invalidate/truncate. 2650 * do_batch > 0 when unmapping pages or inode invalidate/truncate.
2651 * In those cases, all pages freed continously can be expected to be in 2651 * In those cases, all pages freed continuously can be expected to be in
2652 * the same cgroup and we have chance to coalesce uncharges. 2652 * the same cgroup and we have chance to coalesce uncharges.
2653 * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE) 2653 * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
2654 * because we want to do uncharge as soon as possible. 2654 * because we want to do uncharge as soon as possible.
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 37feb9fec228..2b9a5eef39e0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -208,7 +208,7 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
208 * Don't use force here, it's convenient if the signal 208 * Don't use force here, it's convenient if the signal
209 * can be temporarily blocked. 209 * can be temporarily blocked.
210 * This could cause a loop when the user sets SIGBUS 210 * This could cause a loop when the user sets SIGBUS
211 * to SIG_IGN, but hopefully noone will do that? 211 * to SIG_IGN, but hopefully no one will do that?
212 */ 212 */
213 ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ 213 ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
214 if (ret < 0) 214 if (ret < 0)
@@ -634,7 +634,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
634 * when the page is reread or dropped. If an 634 * when the page is reread or dropped. If an
635 * application assumes it will always get error on 635 * application assumes it will always get error on
636 * fsync, but does other operations on the fd before 636 * fsync, but does other operations on the fd before
637 * and the page is dropped inbetween then the error 637 * and the page is dropped between then the error
638 * will not be properly reported. 638 * will not be properly reported.
639 * 639 *
640 * This can already happen even without hwpoisoned 640 * This can already happen even without hwpoisoned
@@ -728,7 +728,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
728 * The table matches them in order and calls the right handler. 728 * The table matches them in order and calls the right handler.
729 * 729 *
730 * This is quite tricky because we can access page at any time 730 * This is quite tricky because we can access page at any time
731 * in its live cycle, so all accesses have to be extremly careful. 731 * in its live cycle, so all accesses have to be extremely careful.
732 * 732 *
733 * This is not complete. More states could be added. 733 * This is not complete. More states could be added.
734 * For any missing state don't attempt recovery. 734 * For any missing state don't attempt recovery.
diff --git a/mm/memory.c b/mm/memory.c
index 9da8cab1b1b0..ce22a250926f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1410,6 +1410,13 @@ no_page_table:
1410 return page; 1410 return page;
1411} 1411}
1412 1412
1413static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
1414{
1415 return (vma->vm_flags & VM_GROWSDOWN) &&
1416 (vma->vm_start == addr) &&
1417 !vma_stack_continue(vma->vm_prev, addr);
1418}
1419
1413/** 1420/**
1414 * __get_user_pages() - pin user pages in memory 1421 * __get_user_pages() - pin user pages in memory
1415 * @tsk: task_struct of target task 1422 * @tsk: task_struct of target task
@@ -1488,7 +1495,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1488 vma = find_extend_vma(mm, start); 1495 vma = find_extend_vma(mm, start);
1489 if (!vma && in_gate_area(mm, start)) { 1496 if (!vma && in_gate_area(mm, start)) {
1490 unsigned long pg = start & PAGE_MASK; 1497 unsigned long pg = start & PAGE_MASK;
1491 struct vm_area_struct *gate_vma = get_gate_vma(mm);
1492 pgd_t *pgd; 1498 pgd_t *pgd;
1493 pud_t *pud; 1499 pud_t *pud;
1494 pmd_t *pmd; 1500 pmd_t *pmd;
@@ -1513,10 +1519,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1513 pte_unmap(pte); 1519 pte_unmap(pte);
1514 return i ? : -EFAULT; 1520 return i ? : -EFAULT;
1515 } 1521 }
1522 vma = get_gate_vma(mm);
1516 if (pages) { 1523 if (pages) {
1517 struct page *page; 1524 struct page *page;
1518 1525
1519 page = vm_normal_page(gate_vma, start, *pte); 1526 page = vm_normal_page(vma, start, *pte);
1520 if (!page) { 1527 if (!page) {
1521 if (!(gup_flags & FOLL_DUMP) && 1528 if (!(gup_flags & FOLL_DUMP) &&
1522 is_zero_pfn(pte_pfn(*pte))) 1529 is_zero_pfn(pte_pfn(*pte)))
@@ -1530,12 +1537,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1530 get_page(page); 1537 get_page(page);
1531 } 1538 }
1532 pte_unmap(pte); 1539 pte_unmap(pte);
1533 if (vmas) 1540 goto next_page;
1534 vmas[i] = gate_vma;
1535 i++;
1536 start += PAGE_SIZE;
1537 nr_pages--;
1538 continue;
1539 } 1541 }
1540 1542
1541 if (!vma || 1543 if (!vma ||
@@ -1549,6 +1551,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1549 continue; 1551 continue;
1550 } 1552 }
1551 1553
1554 /*
1555 * If we don't actually want the page itself,
1556 * and it's the stack guard page, just skip it.
1557 */
1558 if (!pages && stack_guard_page(vma, start))
1559 goto next_page;
1560
1552 do { 1561 do {
1553 struct page *page; 1562 struct page *page;
1554 unsigned int foll_flags = gup_flags; 1563 unsigned int foll_flags = gup_flags;
@@ -1631,6 +1640,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1631 flush_anon_page(vma, page, start); 1640 flush_anon_page(vma, page, start);
1632 flush_dcache_page(page); 1641 flush_dcache_page(page);
1633 } 1642 }
1643next_page:
1634 if (vmas) 1644 if (vmas)
1635 vmas[i] = vma; 1645 vmas[i] = vma;
1636 i++; 1646 i++;
@@ -3678,7 +3688,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
3678 */ 3688 */
3679#ifdef CONFIG_HAVE_IOREMAP_PROT 3689#ifdef CONFIG_HAVE_IOREMAP_PROT
3680 vma = find_vma(mm, addr); 3690 vma = find_vma(mm, addr);
3681 if (!vma) 3691 if (!vma || vma->vm_start > addr)
3682 break; 3692 break;
3683 if (vma->vm_ops && vma->vm_ops->access) 3693 if (vma->vm_ops && vma->vm_ops->access)
3684 ret = vma->vm_ops->access(vma, addr, buf, 3694 ret = vma->vm_ops->access(vma, addr, buf,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 321fc7455df7..9ca1d604f7cd 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -375,7 +375,7 @@ void online_page(struct page *page)
375#endif 375#endif
376 376
377#ifdef CONFIG_FLATMEM 377#ifdef CONFIG_FLATMEM
378 max_mapnr = max(page_to_pfn(page), max_mapnr); 378 max_mapnr = max(pfn, max_mapnr);
379#endif 379#endif
380 380
381 ClearPageReserved(page); 381 ClearPageReserved(page);
@@ -724,7 +724,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
724 pfn); 724 pfn);
725 dump_page(page); 725 dump_page(page);
726#endif 726#endif
727 /* Becasue we don't have big zone->lock. we should 727 /* Because we don't have big zone->lock. we should
728 check this again here. */ 728 check this again here. */
729 if (page_count(page)) { 729 if (page_count(page)) {
730 not_managed++; 730 not_managed++;
diff --git a/mm/migrate.c b/mm/migrate.c
index b0406d739ea7..34132f8e9109 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -375,7 +375,7 @@ void migrate_page_copy(struct page *newpage, struct page *page)
375 * redo the accounting that clear_page_dirty_for_io undid, 375 * redo the accounting that clear_page_dirty_for_io undid,
376 * but we can't use set_page_dirty because that function 376 * but we can't use set_page_dirty because that function
377 * is actually a signal that all of the page has become dirty. 377 * is actually a signal that all of the page has become dirty.
378 * Wheras only part of our page may be dirty. 378 * Whereas only part of our page may be dirty.
379 */ 379 */
380 __set_page_dirty_nobuffers(newpage); 380 __set_page_dirty_nobuffers(newpage);
381 } 381 }
diff --git a/mm/mlock.c b/mm/mlock.c
index 2689a08c79af..6b55e3efe0df 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -135,13 +135,6 @@ void munlock_vma_page(struct page *page)
135 } 135 }
136} 136}
137 137
138static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
139{
140 return (vma->vm_flags & VM_GROWSDOWN) &&
141 (vma->vm_start == addr) &&
142 !vma_stack_continue(vma->vm_prev, addr);
143}
144
145/** 138/**
146 * __mlock_vma_pages_range() - mlock a range of pages in the vma. 139 * __mlock_vma_pages_range() - mlock a range of pages in the vma.
147 * @vma: target vma 140 * @vma: target vma
@@ -188,12 +181,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
188 if (vma->vm_flags & VM_LOCKED) 181 if (vma->vm_flags & VM_LOCKED)
189 gup_flags |= FOLL_MLOCK; 182 gup_flags |= FOLL_MLOCK;
190 183
191 /* We don't try to access the guard page of a stack vma */
192 if (stack_guard_page(vma, start)) {
193 addr += PAGE_SIZE;
194 nr_pages--;
195 }
196
197 return __get_user_pages(current, mm, addr, nr_pages, gup_flags, 184 return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
198 NULL, NULL, nonblocking); 185 NULL, NULL, nonblocking);
199} 186}
diff --git a/mm/mmap.c b/mm/mmap.c
index 2ec8eb5a9cdd..e27e0cf0de03 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -259,7 +259,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
259 * randomize_va_space to 2, which will still cause mm->start_brk 259 * randomize_va_space to 2, which will still cause mm->start_brk
260 * to be arbitrarily shifted 260 * to be arbitrarily shifted
261 */ 261 */
262 if (mm->start_brk > PAGE_ALIGN(mm->end_data)) 262 if (current->brk_randomized)
263 min_brk = mm->start_brk; 263 min_brk = mm->start_brk;
264 else 264 else
265 min_brk = mm->end_data; 265 min_brk = mm->end_data;
@@ -1814,11 +1814,14 @@ static int expand_downwards(struct vm_area_struct *vma,
1814 size = vma->vm_end - address; 1814 size = vma->vm_end - address;
1815 grow = (vma->vm_start - address) >> PAGE_SHIFT; 1815 grow = (vma->vm_start - address) >> PAGE_SHIFT;
1816 1816
1817 error = acct_stack_growth(vma, size, grow); 1817 error = -ENOMEM;
1818 if (!error) { 1818 if (grow <= vma->vm_pgoff) {
1819 vma->vm_start = address; 1819 error = acct_stack_growth(vma, size, grow);
1820 vma->vm_pgoff -= grow; 1820 if (!error) {
1821 perf_event_mmap(vma); 1821 vma->vm_start = address;
1822 vma->vm_pgoff -= grow;
1823 perf_event_mmap(vma);
1824 }
1822 } 1825 }
1823 } 1826 }
1824 vma_unlock_anon_vma(vma); 1827 vma_unlock_anon_vma(vma);
diff --git a/mm/mremap.c b/mm/mremap.c
index 1de98d492ddc..a7c1f9f9b941 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -277,9 +277,16 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
277 if (old_len > vma->vm_end - addr) 277 if (old_len > vma->vm_end - addr)
278 goto Efault; 278 goto Efault;
279 279
280 if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) { 280 /* Need to be careful about a growing mapping */
281 if (new_len > old_len) 281 if (new_len > old_len) {
282 unsigned long pgoff;
283
284 if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
282 goto Efault; 285 goto Efault;
286 pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
287 pgoff += vma->vm_pgoff;
288 if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
289 goto Einval;
283 } 290 }
284 291
285 if (vma->vm_flags & VM_LOCKED) { 292 if (vma->vm_flags & VM_LOCKED) {
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index e99f6cd1da1f..9109049f0bbc 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -150,7 +150,7 @@ unsigned long __init free_all_bootmem(void)
150{ 150{
151 /* 151 /*
152 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id 152 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
153 * because in some case like Node0 doesnt have RAM installed 153 * because in some case like Node0 doesn't have RAM installed
154 * low ram will be on Node1 154 * low ram will be on Node1
155 * Use MAX_NUMNODES will make sure all ranges in early_node_map[] 155 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
156 * will be used instead of only Node0 related 156 * will be used instead of only Node0 related
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 6a819d1b2c7d..83fb72c108b7 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -84,24 +84,6 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk,
84#endif /* CONFIG_NUMA */ 84#endif /* CONFIG_NUMA */
85 85
86/* 86/*
87 * If this is a system OOM (not a memcg OOM) and the task selected to be
88 * killed is not already running at high (RT) priorities, speed up the
89 * recovery by boosting the dying task to the lowest FIFO priority.
90 * That helps with the recovery and avoids interfering with RT tasks.
91 */
92static void boost_dying_task_prio(struct task_struct *p,
93 struct mem_cgroup *mem)
94{
95 struct sched_param param = { .sched_priority = 1 };
96
97 if (mem)
98 return;
99
100 if (!rt_task(p))
101 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
102}
103
104/*
105 * The process p may have detached its own ->mm while exiting or through 87 * The process p may have detached its own ->mm while exiting or through
106 * use_mm(), but one or more of its subthreads may still have a valid 88 * use_mm(), but one or more of its subthreads may still have a valid
107 * pointer. Return p, or any of its subthreads with a valid ->mm, with 89 * pointer. Return p, or any of its subthreads with a valid ->mm, with
@@ -452,13 +434,6 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem)
452 set_tsk_thread_flag(p, TIF_MEMDIE); 434 set_tsk_thread_flag(p, TIF_MEMDIE);
453 force_sig(SIGKILL, p); 435 force_sig(SIGKILL, p);
454 436
455 /*
456 * We give our sacrificial lamb high priority and access to
457 * all the memory it needs. That way it should be able to
458 * exit() and clear out its resources quickly...
459 */
460 boost_dying_task_prio(p, mem);
461
462 return 0; 437 return 0;
463} 438}
464#undef K 439#undef K
@@ -482,7 +457,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
482 */ 457 */
483 if (p->flags & PF_EXITING) { 458 if (p->flags & PF_EXITING) {
484 set_tsk_thread_flag(p, TIF_MEMDIE); 459 set_tsk_thread_flag(p, TIF_MEMDIE);
485 boost_dying_task_prio(p, mem);
486 return 0; 460 return 0;
487 } 461 }
488 462
@@ -556,7 +530,6 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
556 */ 530 */
557 if (fatal_signal_pending(current)) { 531 if (fatal_signal_pending(current)) {
558 set_thread_flag(TIF_MEMDIE); 532 set_thread_flag(TIF_MEMDIE);
559 boost_dying_task_prio(current, NULL);
560 return; 533 return;
561 } 534 }
562 535
@@ -712,7 +685,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
712 */ 685 */
713 if (fatal_signal_pending(current)) { 686 if (fatal_signal_pending(current)) {
714 set_thread_flag(TIF_MEMDIE); 687 set_thread_flag(TIF_MEMDIE);
715 boost_dying_task_prio(current, NULL);
716 return; 688 return;
717 } 689 }
718 690
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d6e7ba7373be..9f8a97b9a350 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -942,7 +942,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
942 * If breaking a large block of pages, move all free 942 * If breaking a large block of pages, move all free
943 * pages to the preferred allocation list. If falling 943 * pages to the preferred allocation list. If falling
944 * back for a reclaimable kernel allocation, be more 944 * back for a reclaimable kernel allocation, be more
945 * agressive about taking ownership of free pages 945 * aggressive about taking ownership of free pages
946 */ 946 */
947 if (unlikely(current_order >= (pageblock_order >> 1)) || 947 if (unlikely(current_order >= (pageblock_order >> 1)) ||
948 start_migratetype == MIGRATE_RECLAIMABLE || 948 start_migratetype == MIGRATE_RECLAIMABLE ||
@@ -3176,7 +3176,7 @@ static __init_refok int __build_all_zonelists(void *data)
3176 * Called with zonelists_mutex held always 3176 * Called with zonelists_mutex held always
3177 * unless system_state == SYSTEM_BOOTING. 3177 * unless system_state == SYSTEM_BOOTING.
3178 */ 3178 */
3179void build_all_zonelists(void *data) 3179void __ref build_all_zonelists(void *data)
3180{ 3180{
3181 set_zonelist_order(); 3181 set_zonelist_order();
3182 3182
@@ -3926,7 +3926,7 @@ static void __init find_usable_zone_for_movable(void)
3926 3926
3927/* 3927/*
3928 * The zone ranges provided by the architecture do not include ZONE_MOVABLE 3928 * The zone ranges provided by the architecture do not include ZONE_MOVABLE
3929 * because it is sized independant of architecture. Unlike the other zones, 3929 * because it is sized independent of architecture. Unlike the other zones,
3930 * the starting point for ZONE_MOVABLE is not fixed. It may be different 3930 * the starting point for ZONE_MOVABLE is not fixed. It may be different
3931 * in each node depending on the size of each node and how evenly kernelcore 3931 * in each node depending on the size of each node and how evenly kernelcore
3932 * is distributed. This helper function adjusts the zone ranges 3932 * is distributed. This helper function adjusts the zone ranges
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index a12cc3fa9859..99055010cece 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -377,7 +377,7 @@ not_enough_page:
377 * @new: new id 377 * @new: new id
378 * 378 *
379 * Returns old id at success, 0 at failure. 379 * Returns old id at success, 0 at failure.
380 * (There is no mem_cgroup useing 0 as its id) 380 * (There is no mem_cgroup using 0 as its id)
381 */ 381 */
382unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, 382unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
383 unsigned short old, unsigned short new) 383 unsigned short old, unsigned short new)
diff --git a/mm/percpu.c b/mm/percpu.c
index 55d4d113fbd3..a160db39b810 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -342,7 +342,7 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
342 * @chunk: chunk of interest 342 * @chunk: chunk of interest
343 * 343 *
344 * Determine whether area map of @chunk needs to be extended to 344 * Determine whether area map of @chunk needs to be extended to
345 * accomodate a new allocation. 345 * accommodate a new allocation.
346 * 346 *
347 * CONTEXT: 347 * CONTEXT:
348 * pcpu_lock. 348 * pcpu_lock.
@@ -431,7 +431,7 @@ out_unlock:
431 * depending on @head, is reduced by @tail bytes and @tail byte block 431 * depending on @head, is reduced by @tail bytes and @tail byte block
432 * is inserted after the target block. 432 * is inserted after the target block.
433 * 433 *
434 * @chunk->map must have enough free slots to accomodate the split. 434 * @chunk->map must have enough free slots to accommodate the split.
435 * 435 *
436 * CONTEXT: 436 * CONTEXT:
437 * pcpu_lock. 437 * pcpu_lock.
@@ -1435,7 +1435,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1435 /* 1435 /*
1436 * Determine min_unit_size, alloc_size and max_upa such that 1436 * Determine min_unit_size, alloc_size and max_upa such that
1437 * alloc_size is multiple of atom_size and is the smallest 1437 * alloc_size is multiple of atom_size and is the smallest
1438 * which can accomodate 4k aligned segments which are equal to 1438 * which can accommodate 4k aligned segments which are equal to
1439 * or larger than min_unit_size. 1439 * or larger than min_unit_size.
1440 */ 1440 */
1441 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); 1441 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
@@ -1550,7 +1550,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1550 * @atom_size: allocation atom size 1550 * @atom_size: allocation atom size
1551 * @cpu_distance_fn: callback to determine distance between cpus, optional 1551 * @cpu_distance_fn: callback to determine distance between cpus, optional
1552 * @alloc_fn: function to allocate percpu page 1552 * @alloc_fn: function to allocate percpu page
1553 * @free_fn: funtion to free percpu page 1553 * @free_fn: function to free percpu page
1554 * 1554 *
1555 * This is a helper to ease setting up embedded first percpu chunk and 1555 * This is a helper to ease setting up embedded first percpu chunk and
1556 * can be called where pcpu_setup_first_chunk() is expected. 1556 * can be called where pcpu_setup_first_chunk() is expected.
@@ -1678,7 +1678,7 @@ out_free:
1678 * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages 1678 * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
1679 * @reserved_size: the size of reserved percpu area in bytes 1679 * @reserved_size: the size of reserved percpu area in bytes
1680 * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE 1680 * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
1681 * @free_fn: funtion to free percpu page, always called with PAGE_SIZE 1681 * @free_fn: function to free percpu page, always called with PAGE_SIZE
1682 * @populate_pte_fn: function to populate pte 1682 * @populate_pte_fn: function to populate pte
1683 * 1683 *
1684 * This is a helper to ease setting up page-remapped first percpu 1684 * This is a helper to ease setting up page-remapped first percpu
diff --git a/mm/shmem.c b/mm/shmem.c
index 58da7c150ba6..8fa27e4e582a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -421,7 +421,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
421 * a waste to allocate index if we cannot allocate data. 421 * a waste to allocate index if we cannot allocate data.
422 */ 422 */
423 if (sbinfo->max_blocks) { 423 if (sbinfo->max_blocks) {
424 if (percpu_counter_compare(&sbinfo->used_blocks, (sbinfo->max_blocks - 1)) > 0) 424 if (percpu_counter_compare(&sbinfo->used_blocks,
425 sbinfo->max_blocks - 1) >= 0)
425 return ERR_PTR(-ENOSPC); 426 return ERR_PTR(-ENOSPC);
426 percpu_counter_inc(&sbinfo->used_blocks); 427 percpu_counter_inc(&sbinfo->used_blocks);
427 spin_lock(&inode->i_lock); 428 spin_lock(&inode->i_lock);
@@ -1397,7 +1398,8 @@ repeat:
1397 shmem_swp_unmap(entry); 1398 shmem_swp_unmap(entry);
1398 sbinfo = SHMEM_SB(inode->i_sb); 1399 sbinfo = SHMEM_SB(inode->i_sb);
1399 if (sbinfo->max_blocks) { 1400 if (sbinfo->max_blocks) {
1400 if ((percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) > 0) || 1401 if (percpu_counter_compare(&sbinfo->used_blocks,
1402 sbinfo->max_blocks) >= 0 ||
1401 shmem_acct_block(info->flags)) { 1403 shmem_acct_block(info->flags)) {
1402 spin_unlock(&info->lock); 1404 spin_unlock(&info->lock);
1403 error = -ENOSPC; 1405 error = -ENOSPC;
diff --git a/mm/slab.c b/mm/slab.c
index 568803f121a8..46a9c163a92f 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -878,7 +878,7 @@ static struct array_cache *alloc_arraycache(int node, int entries,
878 nc = kmalloc_node(memsize, gfp, node); 878 nc = kmalloc_node(memsize, gfp, node);
879 /* 879 /*
880 * The array_cache structures contain pointers to free object. 880 * The array_cache structures contain pointers to free object.
881 * However, when such objects are allocated or transfered to another 881 * However, when such objects are allocated or transferred to another
882 * cache the pointers are not cleared and they could be counted as 882 * cache the pointers are not cleared and they could be counted as
883 * valid references during a kmemleak scan. Therefore, kmemleak must 883 * valid references during a kmemleak scan. Therefore, kmemleak must
884 * not scan such objects. 884 * not scan such objects.
@@ -2606,7 +2606,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
2606 * 2606 *
2607 * The cache must be empty before calling this function. 2607 * The cache must be empty before calling this function.
2608 * 2608 *
2609 * The caller must guarantee that noone will allocate memory from the cache 2609 * The caller must guarantee that no one will allocate memory from the cache
2610 * during the kmem_cache_destroy(). 2610 * during the kmem_cache_destroy().
2611 */ 2611 */
2612void kmem_cache_destroy(struct kmem_cache *cachep) 2612void kmem_cache_destroy(struct kmem_cache *cachep)
diff --git a/mm/slub.c b/mm/slub.c
index f881874843a5..94d2a33a866e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -64,7 +64,7 @@
64 * we must stay away from it for a while since we may cause a bouncing 64 * we must stay away from it for a while since we may cause a bouncing
65 * cacheline if we try to acquire the lock. So go onto the next slab. 65 * cacheline if we try to acquire the lock. So go onto the next slab.
66 * If all pages are busy then we may allocate a new slab instead of reusing 66 * If all pages are busy then we may allocate a new slab instead of reusing
67 * a partial slab. A new slab has noone operating on it and thus there is 67 * a partial slab. A new slab has no one operating on it and thus there is
68 * no danger of cacheline contention. 68 * no danger of cacheline contention.
69 * 69 *
70 * Interrupts are disabled during allocation and deallocation in order to 70 * Interrupts are disabled during allocation and deallocation in order to
@@ -1929,7 +1929,7 @@ redo:
1929 else { 1929 else {
1930#ifdef CONFIG_CMPXCHG_LOCAL 1930#ifdef CONFIG_CMPXCHG_LOCAL
1931 /* 1931 /*
1932 * The cmpxchg will only match if there was no additonal 1932 * The cmpxchg will only match if there was no additional
1933 * operation and if we are on the right processor. 1933 * operation and if we are on the right processor.
1934 * 1934 *
1935 * The cmpxchg does the following atomically (without lock semantics!) 1935 * The cmpxchg does the following atomically (without lock semantics!)
@@ -3547,7 +3547,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3547 3547
3548 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); 3548 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
3549 3549
3550 /* Honor the call site pointer we recieved. */ 3550 /* Honor the call site pointer we received. */
3551 trace_kmalloc(caller, ret, size, s->size, gfpflags); 3551 trace_kmalloc(caller, ret, size, s->size, gfpflags);
3552 3552
3553 return ret; 3553 return ret;
@@ -3577,7 +3577,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3577 3577
3578 ret = slab_alloc(s, gfpflags, node, caller); 3578 ret = slab_alloc(s, gfpflags, node, caller);
3579 3579
3580 /* Honor the call site pointer we recieved. */ 3580 /* Honor the call site pointer we received. */
3581 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); 3581 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
3582 3582
3583 return ret; 3583 return ret;
diff --git a/mm/sparse.c b/mm/sparse.c
index 93250207c5cf..aa64b12831a2 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -500,7 +500,7 @@ void __init sparse_init(void)
500 * so alloc 2M (with 2M align) and 24 bytes in turn will 500 * so alloc 2M (with 2M align) and 24 bytes in turn will
501 * make next 2M slip to one more 2M later. 501 * make next 2M slip to one more 2M later.
502 * then in big system, the memory will have a lot of holes... 502 * then in big system, the memory will have a lot of holes...
503 * here try to allocate 2M pages continously. 503 * here try to allocate 2M pages continuously.
504 * 504 *
505 * powerpc need to call sparse_init_one_section right after each 505 * powerpc need to call sparse_init_one_section right after each
506 * sparse_early_mem_map_alloc, so allocate usemap_map at first. 506 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
diff --git a/mm/util.c b/mm/util.c
index f126975ef23e..e7b103a6fd21 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -227,7 +227,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
227/* 227/*
228 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall 228 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
229 * back to the regular GUP. 229 * back to the regular GUP.
230 * If the architecture not support this fucntion, simply return with no 230 * If the architecture not support this function, simply return with no
231 * page pinned 231 * page pinned
232 */ 232 */
233int __attribute__((weak)) __get_user_pages_fast(unsigned long start, 233int __attribute__((weak)) __get_user_pages_fast(unsigned long start,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f73b8657c2d0..f6b435c80079 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -41,6 +41,7 @@
41#include <linux/memcontrol.h> 41#include <linux/memcontrol.h>
42#include <linux/delayacct.h> 42#include <linux/delayacct.h>
43#include <linux/sysctl.h> 43#include <linux/sysctl.h>
44#include <linux/oom.h>
44 45
45#include <asm/tlbflush.h> 46#include <asm/tlbflush.h>
46#include <asm/div64.h> 47#include <asm/div64.h>
@@ -1065,7 +1066,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1065 * surrounding the tag page. Only take those pages of 1066 * surrounding the tag page. Only take those pages of
1066 * the same active state as that tag page. We may safely 1067 * the same active state as that tag page. We may safely
1067 * round the target page pfn down to the requested order 1068 * round the target page pfn down to the requested order
1068 * as the mem_map is guarenteed valid out to MAX_ORDER, 1069 * as the mem_map is guaranteed valid out to MAX_ORDER,
1069 * where that page is in a different zone we will detect 1070 * where that page is in a different zone we will detect
1070 * it from its zone id and abort this block scan. 1071 * it from its zone id and abort this block scan.
1071 */ 1072 */
@@ -1988,17 +1989,12 @@ static bool zone_reclaimable(struct zone *zone)
1988 return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; 1989 return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
1989} 1990}
1990 1991
1991/* 1992/* All zones in zonelist are unreclaimable? */
1992 * As hibernation is going on, kswapd is freezed so that it can't mark
1993 * the zone into all_unreclaimable. It can't handle OOM during hibernation.
1994 * So let's check zone's unreclaimable in direct reclaim as well as kswapd.
1995 */
1996static bool all_unreclaimable(struct zonelist *zonelist, 1993static bool all_unreclaimable(struct zonelist *zonelist,
1997 struct scan_control *sc) 1994 struct scan_control *sc)
1998{ 1995{
1999 struct zoneref *z; 1996 struct zoneref *z;
2000 struct zone *zone; 1997 struct zone *zone;
2001 bool all_unreclaimable = true;
2002 1998
2003 for_each_zone_zonelist_nodemask(zone, z, zonelist, 1999 for_each_zone_zonelist_nodemask(zone, z, zonelist,
2004 gfp_zone(sc->gfp_mask), sc->nodemask) { 2000 gfp_zone(sc->gfp_mask), sc->nodemask) {
@@ -2006,13 +2002,11 @@ static bool all_unreclaimable(struct zonelist *zonelist,
2006 continue; 2002 continue;
2007 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 2003 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2008 continue; 2004 continue;
2009 if (zone_reclaimable(zone)) { 2005 if (!zone->all_unreclaimable)
2010 all_unreclaimable = false; 2006 return false;
2011 break;
2012 }
2013 } 2007 }
2014 2008
2015 return all_unreclaimable; 2009 return true;
2016} 2010}
2017 2011
2018/* 2012/*
@@ -2108,6 +2102,14 @@ out:
2108 if (sc->nr_reclaimed) 2102 if (sc->nr_reclaimed)
2109 return sc->nr_reclaimed; 2103 return sc->nr_reclaimed;
2110 2104
2105 /*
2106 * As hibernation is going on, kswapd is freezed so that it can't mark
2107 * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
2108 * check.
2109 */
2110 if (oom_killer_disabled)
2111 return 0;
2112
2111 /* top priority shrink_zones still had more to do? don't OOM, then */ 2113 /* top priority shrink_zones still had more to do? don't OOM, then */
2112 if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) 2114 if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
2113 return 1; 2115 return 1;
@@ -2224,7 +2226,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2224 * o a 16M DMA zone that is balanced will not balance a zone on any 2226 * o a 16M DMA zone that is balanced will not balance a zone on any
2225 * reasonable sized machine 2227 * reasonable sized machine
2226 * o On all other machines, the top zone must be at least a reasonable 2228 * o On all other machines, the top zone must be at least a reasonable
2227 * precentage of the middle zones. For example, on 32-bit x86, highmem 2229 * percentage of the middle zones. For example, on 32-bit x86, highmem
2228 * would need to be at least 256M for it to be balance a whole node. 2230 * would need to be at least 256M for it to be balance a whole node.
2229 * Similarly, on x86-64 the Normal zone would need to be at least 1G 2231 * Similarly, on x86-64 the Normal zone would need to be at least 1G
2230 * to balance a node on its own. These seemed like reasonable ratios. 2232 * to balance a node on its own. These seemed like reasonable ratios.
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 772b39b87d95..897ea9e88238 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -321,9 +321,12 @@ static inline void mod_state(struct zone *zone,
321 /* 321 /*
322 * The fetching of the stat_threshold is racy. We may apply 322 * The fetching of the stat_threshold is racy. We may apply
323 * a counter threshold to the wrong the cpu if we get 323 * a counter threshold to the wrong the cpu if we get
324 * rescheduled while executing here. However, the following 324 * rescheduled while executing here. However, the next
325 * will apply the threshold again and therefore bring the 325 * counter update will apply the threshold again and
326 * counter under the threshold. 326 * therefore bring the counter under the threshold again.
327 *
328 * Most of the time the thresholds are the same anyways
329 * for all cpus in a zone.
327 */ 330 */
328 t = this_cpu_read(pcp->stat_threshold); 331 t = this_cpu_read(pcp->stat_threshold);
329 332
@@ -945,7 +948,16 @@ static const char * const vmstat_text[] = {
945 "unevictable_pgs_cleared", 948 "unevictable_pgs_cleared",
946 "unevictable_pgs_stranded", 949 "unevictable_pgs_stranded",
947 "unevictable_pgs_mlockfreed", 950 "unevictable_pgs_mlockfreed",
951
952#ifdef CONFIG_TRANSPARENT_HUGEPAGE
953 "thp_fault_alloc",
954 "thp_fault_fallback",
955 "thp_collapse_alloc",
956 "thp_collapse_alloc_failed",
957 "thp_split",
948#endif 958#endif
959
960#endif /* CONFIG_VM_EVENTS_COUNTERS */
949}; 961};
950 962
951static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, 963static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,