diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 2 | ||||
-rw-r--r-- | mm/huge_memory.c | 49 | ||||
-rw-r--r-- | mm/hugetlb.c | 10 | ||||
-rw-r--r-- | mm/hwpoison-inject.c | 2 | ||||
-rw-r--r-- | mm/internal.h | 2 | ||||
-rw-r--r-- | mm/kmemleak.c | 6 | ||||
-rw-r--r-- | mm/ksm.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 8 | ||||
-rw-r--r-- | mm/memory-failure.c | 6 | ||||
-rw-r--r-- | mm/memory.c | 28 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 4 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mlock.c | 13 | ||||
-rw-r--r-- | mm/mmap.c | 15 | ||||
-rw-r--r-- | mm/mremap.c | 11 | ||||
-rw-r--r-- | mm/nobootmem.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 28 | ||||
-rw-r--r-- | mm/page_alloc.c | 6 | ||||
-rw-r--r-- | mm/page_cgroup.c | 2 | ||||
-rw-r--r-- | mm/percpu.c | 10 | ||||
-rw-r--r-- | mm/shmem.c | 6 | ||||
-rw-r--r-- | mm/slab.c | 4 | ||||
-rw-r--r-- | mm/slub.c | 8 | ||||
-rw-r--r-- | mm/sparse.c | 2 | ||||
-rw-r--r-- | mm/util.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 28 | ||||
-rw-r--r-- | mm/vmstat.c | 18 |
27 files changed, 146 insertions, 130 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 0d9a036ada66..befc87531e4f 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -787,7 +787,7 @@ EXPORT_SYMBOL(congestion_wait); | |||
787 | * jiffies for either a BDI to exit congestion of the given @sync queue | 787 | * jiffies for either a BDI to exit congestion of the given @sync queue |
788 | * or a write to complete. | 788 | * or a write to complete. |
789 | * | 789 | * |
790 | * In the absense of zone congestion, cond_resched() is called to yield | 790 | * In the absence of zone congestion, cond_resched() is called to yield |
791 | * the processor if necessary but otherwise does not sleep. | 791 | * the processor if necessary but otherwise does not sleep. |
792 | * | 792 | * |
793 | * The return value is 0 if the sleep is for the full timeout. Otherwise, | 793 | * The return value is 0 if the sleep is for the full timeout. Otherwise, |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0a619e0e2e0b..470dcda10add 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -244,24 +244,28 @@ static ssize_t single_flag_show(struct kobject *kobj, | |||
244 | struct kobj_attribute *attr, char *buf, | 244 | struct kobj_attribute *attr, char *buf, |
245 | enum transparent_hugepage_flag flag) | 245 | enum transparent_hugepage_flag flag) |
246 | { | 246 | { |
247 | if (test_bit(flag, &transparent_hugepage_flags)) | 247 | return sprintf(buf, "%d\n", |
248 | return sprintf(buf, "[yes] no\n"); | 248 | !!test_bit(flag, &transparent_hugepage_flags)); |
249 | else | ||
250 | return sprintf(buf, "yes [no]\n"); | ||
251 | } | 249 | } |
250 | |||
252 | static ssize_t single_flag_store(struct kobject *kobj, | 251 | static ssize_t single_flag_store(struct kobject *kobj, |
253 | struct kobj_attribute *attr, | 252 | struct kobj_attribute *attr, |
254 | const char *buf, size_t count, | 253 | const char *buf, size_t count, |
255 | enum transparent_hugepage_flag flag) | 254 | enum transparent_hugepage_flag flag) |
256 | { | 255 | { |
257 | if (!memcmp("yes", buf, | 256 | unsigned long value; |
258 | min(sizeof("yes")-1, count))) { | 257 | int ret; |
258 | |||
259 | ret = kstrtoul(buf, 10, &value); | ||
260 | if (ret < 0) | ||
261 | return ret; | ||
262 | if (value > 1) | ||
263 | return -EINVAL; | ||
264 | |||
265 | if (value) | ||
259 | set_bit(flag, &transparent_hugepage_flags); | 266 | set_bit(flag, &transparent_hugepage_flags); |
260 | } else if (!memcmp("no", buf, | 267 | else |
261 | min(sizeof("no")-1, count))) { | ||
262 | clear_bit(flag, &transparent_hugepage_flags); | 268 | clear_bit(flag, &transparent_hugepage_flags); |
263 | } else | ||
264 | return -EINVAL; | ||
265 | 269 | ||
266 | return count; | 270 | return count; |
267 | } | 271 | } |
@@ -680,8 +684,11 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
680 | return VM_FAULT_OOM; | 684 | return VM_FAULT_OOM; |
681 | page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), | 685 | page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), |
682 | vma, haddr, numa_node_id(), 0); | 686 | vma, haddr, numa_node_id(), 0); |
683 | if (unlikely(!page)) | 687 | if (unlikely(!page)) { |
688 | count_vm_event(THP_FAULT_FALLBACK); | ||
684 | goto out; | 689 | goto out; |
690 | } | ||
691 | count_vm_event(THP_FAULT_ALLOC); | ||
685 | if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { | 692 | if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { |
686 | put_page(page); | 693 | put_page(page); |
687 | goto out; | 694 | goto out; |
@@ -909,11 +916,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
909 | new_page = NULL; | 916 | new_page = NULL; |
910 | 917 | ||
911 | if (unlikely(!new_page)) { | 918 | if (unlikely(!new_page)) { |
919 | count_vm_event(THP_FAULT_FALLBACK); | ||
912 | ret = do_huge_pmd_wp_page_fallback(mm, vma, address, | 920 | ret = do_huge_pmd_wp_page_fallback(mm, vma, address, |
913 | pmd, orig_pmd, page, haddr); | 921 | pmd, orig_pmd, page, haddr); |
914 | put_page(page); | 922 | put_page(page); |
915 | goto out; | 923 | goto out; |
916 | } | 924 | } |
925 | count_vm_event(THP_FAULT_ALLOC); | ||
917 | 926 | ||
918 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { | 927 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { |
919 | put_page(new_page); | 928 | put_page(new_page); |
@@ -1390,6 +1399,7 @@ int split_huge_page(struct page *page) | |||
1390 | 1399 | ||
1391 | BUG_ON(!PageSwapBacked(page)); | 1400 | BUG_ON(!PageSwapBacked(page)); |
1392 | __split_huge_page(page, anon_vma); | 1401 | __split_huge_page(page, anon_vma); |
1402 | count_vm_event(THP_SPLIT); | ||
1393 | 1403 | ||
1394 | BUG_ON(PageCompound(page)); | 1404 | BUG_ON(PageCompound(page)); |
1395 | out_unlock: | 1405 | out_unlock: |
@@ -1784,9 +1794,11 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1784 | node, __GFP_OTHER_NODE); | 1794 | node, __GFP_OTHER_NODE); |
1785 | if (unlikely(!new_page)) { | 1795 | if (unlikely(!new_page)) { |
1786 | up_read(&mm->mmap_sem); | 1796 | up_read(&mm->mmap_sem); |
1797 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); | ||
1787 | *hpage = ERR_PTR(-ENOMEM); | 1798 | *hpage = ERR_PTR(-ENOMEM); |
1788 | return; | 1799 | return; |
1789 | } | 1800 | } |
1801 | count_vm_event(THP_COLLAPSE_ALLOC); | ||
1790 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { | 1802 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { |
1791 | up_read(&mm->mmap_sem); | 1803 | up_read(&mm->mmap_sem); |
1792 | put_page(new_page); | 1804 | put_page(new_page); |
@@ -2151,8 +2163,11 @@ static void khugepaged_do_scan(struct page **hpage) | |||
2151 | #ifndef CONFIG_NUMA | 2163 | #ifndef CONFIG_NUMA |
2152 | if (!*hpage) { | 2164 | if (!*hpage) { |
2153 | *hpage = alloc_hugepage(khugepaged_defrag()); | 2165 | *hpage = alloc_hugepage(khugepaged_defrag()); |
2154 | if (unlikely(!*hpage)) | 2166 | if (unlikely(!*hpage)) { |
2167 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); | ||
2155 | break; | 2168 | break; |
2169 | } | ||
2170 | count_vm_event(THP_COLLAPSE_ALLOC); | ||
2156 | } | 2171 | } |
2157 | #else | 2172 | #else |
2158 | if (IS_ERR(*hpage)) | 2173 | if (IS_ERR(*hpage)) |
@@ -2192,8 +2207,11 @@ static struct page *khugepaged_alloc_hugepage(void) | |||
2192 | 2207 | ||
2193 | do { | 2208 | do { |
2194 | hpage = alloc_hugepage(khugepaged_defrag()); | 2209 | hpage = alloc_hugepage(khugepaged_defrag()); |
2195 | if (!hpage) | 2210 | if (!hpage) { |
2211 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); | ||
2196 | khugepaged_alloc_sleep(); | 2212 | khugepaged_alloc_sleep(); |
2213 | } else | ||
2214 | count_vm_event(THP_COLLAPSE_ALLOC); | ||
2197 | } while (unlikely(!hpage) && | 2215 | } while (unlikely(!hpage) && |
2198 | likely(khugepaged_enabled())); | 2216 | likely(khugepaged_enabled())); |
2199 | return hpage; | 2217 | return hpage; |
@@ -2210,8 +2228,11 @@ static void khugepaged_loop(void) | |||
2210 | while (likely(khugepaged_enabled())) { | 2228 | while (likely(khugepaged_enabled())) { |
2211 | #ifndef CONFIG_NUMA | 2229 | #ifndef CONFIG_NUMA |
2212 | hpage = khugepaged_alloc_hugepage(); | 2230 | hpage = khugepaged_alloc_hugepage(); |
2213 | if (unlikely(!hpage)) | 2231 | if (unlikely(!hpage)) { |
2232 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); | ||
2214 | break; | 2233 | break; |
2234 | } | ||
2235 | count_vm_event(THP_COLLAPSE_ALLOC); | ||
2215 | #else | 2236 | #else |
2216 | if (IS_ERR(hpage)) { | 2237 | if (IS_ERR(hpage)) { |
2217 | khugepaged_alloc_sleep(); | 2238 | khugepaged_alloc_sleep(); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 06de5aa4d644..8ee3bd8ec5b5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -146,7 +146,7 @@ static long region_chg(struct list_head *head, long f, long t) | |||
146 | if (rg->from > t) | 146 | if (rg->from > t) |
147 | return chg; | 147 | return chg; |
148 | 148 | ||
149 | /* We overlap with this area, if it extends futher than | 149 | /* We overlap with this area, if it extends further than |
150 | * us then we must extend ourselves. Account for its | 150 | * us then we must extend ourselves. Account for its |
151 | * existing reservation. */ | 151 | * existing reservation. */ |
152 | if (rg->to > t) { | 152 | if (rg->to > t) { |
@@ -842,7 +842,7 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid) | |||
842 | } | 842 | } |
843 | 843 | ||
844 | /* | 844 | /* |
845 | * Increase the hugetlb pool such that it can accomodate a reservation | 845 | * Increase the hugetlb pool such that it can accommodate a reservation |
846 | * of size 'delta'. | 846 | * of size 'delta'. |
847 | */ | 847 | */ |
848 | static int gather_surplus_pages(struct hstate *h, int delta) | 848 | static int gather_surplus_pages(struct hstate *h, int delta) |
@@ -890,7 +890,7 @@ retry: | |||
890 | 890 | ||
891 | /* | 891 | /* |
892 | * The surplus_list now contains _at_least_ the number of extra pages | 892 | * The surplus_list now contains _at_least_ the number of extra pages |
893 | * needed to accomodate the reservation. Add the appropriate number | 893 | * needed to accommodate the reservation. Add the appropriate number |
894 | * of pages to the hugetlb pool and free the extras back to the buddy | 894 | * of pages to the hugetlb pool and free the extras back to the buddy |
895 | * allocator. Commit the entire reservation here to prevent another | 895 | * allocator. Commit the entire reservation here to prevent another |
896 | * process from stealing the pages as they are added to the pool but | 896 | * process from stealing the pages as they are added to the pool but |
@@ -2043,7 +2043,7 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma) | |||
2043 | * This new VMA should share its siblings reservation map if present. | 2043 | * This new VMA should share its siblings reservation map if present. |
2044 | * The VMA will only ever have a valid reservation map pointer where | 2044 | * The VMA will only ever have a valid reservation map pointer where |
2045 | * it is being copied for another still existing VMA. As that VMA | 2045 | * it is being copied for another still existing VMA. As that VMA |
2046 | * has a reference to the reservation map it cannot dissappear until | 2046 | * has a reference to the reservation map it cannot disappear until |
2047 | * after this open call completes. It is therefore safe to take a | 2047 | * after this open call completes. It is therefore safe to take a |
2048 | * new reference here without additional locking. | 2048 | * new reference here without additional locking. |
2049 | */ | 2049 | */ |
@@ -2490,7 +2490,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2490 | /* | 2490 | /* |
2491 | * Currently, we are forced to kill the process in the event the | 2491 | * Currently, we are forced to kill the process in the event the |
2492 | * original mapper has unmapped pages from the child due to a failed | 2492 | * original mapper has unmapped pages from the child due to a failed |
2493 | * COW. Warn that such a situation has occured as it may not be obvious | 2493 | * COW. Warn that such a situation has occurred as it may not be obvious |
2494 | */ | 2494 | */ |
2495 | if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) { | 2495 | if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) { |
2496 | printk(KERN_WARNING | 2496 | printk(KERN_WARNING |
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index 0948f1072d6b..c7fc7fd00e32 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* Inject a hwpoison memory failure on a arbitary pfn */ | 1 | /* Inject a hwpoison memory failure on a arbitrary pfn */ |
2 | #include <linux/module.h> | 2 | #include <linux/module.h> |
3 | #include <linux/debugfs.h> | 3 | #include <linux/debugfs.h> |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
diff --git a/mm/internal.h b/mm/internal.h index 3438dd43a062..9d0ced8e505e 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -162,7 +162,7 @@ static inline struct page *mem_map_offset(struct page *base, int offset) | |||
162 | } | 162 | } |
163 | 163 | ||
164 | /* | 164 | /* |
165 | * Iterator over all subpages withing the maximally aligned gigantic | 165 | * Iterator over all subpages within the maximally aligned gigantic |
166 | * page 'base'. Handle any discontiguity in the mem_map. | 166 | * page 'base'. Handle any discontiguity in the mem_map. |
167 | */ | 167 | */ |
168 | static inline struct page *mem_map_next(struct page *iter, | 168 | static inline struct page *mem_map_next(struct page *iter, |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 84225f3b7190..c1d5867543e4 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -265,7 +265,7 @@ static void kmemleak_disable(void); | |||
265 | } while (0) | 265 | } while (0) |
266 | 266 | ||
267 | /* | 267 | /* |
268 | * Macro invoked when a serious kmemleak condition occured and cannot be | 268 | * Macro invoked when a serious kmemleak condition occurred and cannot be |
269 | * recovered from. Kmemleak will be disabled and further allocation/freeing | 269 | * recovered from. Kmemleak will be disabled and further allocation/freeing |
270 | * tracing no longer available. | 270 | * tracing no longer available. |
271 | */ | 271 | */ |
@@ -1006,7 +1006,7 @@ static bool update_checksum(struct kmemleak_object *object) | |||
1006 | 1006 | ||
1007 | /* | 1007 | /* |
1008 | * Memory scanning is a long process and it needs to be interruptable. This | 1008 | * Memory scanning is a long process and it needs to be interruptable. This |
1009 | * function checks whether such interrupt condition occured. | 1009 | * function checks whether such interrupt condition occurred. |
1010 | */ | 1010 | */ |
1011 | static int scan_should_stop(void) | 1011 | static int scan_should_stop(void) |
1012 | { | 1012 | { |
@@ -1733,7 +1733,7 @@ static int __init kmemleak_late_init(void) | |||
1733 | 1733 | ||
1734 | if (atomic_read(&kmemleak_error)) { | 1734 | if (atomic_read(&kmemleak_error)) { |
1735 | /* | 1735 | /* |
1736 | * Some error occured and kmemleak was disabled. There is a | 1736 | * Some error occurred and kmemleak was disabled. There is a |
1737 | * small chance that kmemleak_disable() was called immediately | 1737 | * small chance that kmemleak_disable() was called immediately |
1738 | * after setting kmemleak_initialized and we may end up with | 1738 | * after setting kmemleak_initialized and we may end up with |
1739 | * two clean-up threads but serialized by scan_mutex. | 1739 | * two clean-up threads but serialized by scan_mutex. |
@@ -720,7 +720,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, | |||
720 | swapped = PageSwapCache(page); | 720 | swapped = PageSwapCache(page); |
721 | flush_cache_page(vma, addr, page_to_pfn(page)); | 721 | flush_cache_page(vma, addr, page_to_pfn(page)); |
722 | /* | 722 | /* |
723 | * Ok this is tricky, when get_user_pages_fast() run it doesnt | 723 | * Ok this is tricky, when get_user_pages_fast() run it doesn't |
724 | * take any lock, therefore the check that we are going to make | 724 | * take any lock, therefore the check that we are going to make |
725 | * with the pagecount against the mapcount is racey and | 725 | * with the pagecount against the mapcount is racey and |
726 | * O_DIRECT can happen right after the check. | 726 | * O_DIRECT can happen right after the check. |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1f0b460fe58c..010f9166fa6e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1466,7 +1466,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1466 | break; | 1466 | break; |
1467 | } | 1467 | } |
1468 | /* | 1468 | /* |
1469 | * We want to do more targetted reclaim. | 1469 | * We want to do more targeted reclaim. |
1470 | * excess >> 2 is not to excessive so as to | 1470 | * excess >> 2 is not to excessive so as to |
1471 | * reclaim too much, nor too less that we keep | 1471 | * reclaim too much, nor too less that we keep |
1472 | * coming back to reclaim from this cgroup | 1472 | * coming back to reclaim from this cgroup |
@@ -2265,7 +2265,7 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) | |||
2265 | * - compound_lock is held when nr_pages > 1 | 2265 | * - compound_lock is held when nr_pages > 1 |
2266 | * | 2266 | * |
2267 | * This function doesn't do "charge" nor css_get to new cgroup. It should be | 2267 | * This function doesn't do "charge" nor css_get to new cgroup. It should be |
2268 | * done by a caller(__mem_cgroup_try_charge would be usefull). If @uncharge is | 2268 | * done by a caller(__mem_cgroup_try_charge would be useful). If @uncharge is |
2269 | * true, this function does "uncharge" from old cgroup, but it doesn't if | 2269 | * true, this function does "uncharge" from old cgroup, but it doesn't if |
2270 | * @uncharge is false, so a caller should do "uncharge". | 2270 | * @uncharge is false, so a caller should do "uncharge". |
2271 | */ | 2271 | */ |
@@ -2318,7 +2318,7 @@ static int mem_cgroup_move_account(struct page *page, | |||
2318 | * We charges against "to" which may not have any tasks. Then, "to" | 2318 | * We charges against "to" which may not have any tasks. Then, "to" |
2319 | * can be under rmdir(). But in current implementation, caller of | 2319 | * can be under rmdir(). But in current implementation, caller of |
2320 | * this function is just force_empty() and move charge, so it's | 2320 | * this function is just force_empty() and move charge, so it's |
2321 | * garanteed that "to" is never removed. So, we don't check rmdir | 2321 | * guaranteed that "to" is never removed. So, we don't check rmdir |
2322 | * status here. | 2322 | * status here. |
2323 | */ | 2323 | */ |
2324 | move_unlock_page_cgroup(pc, &flags); | 2324 | move_unlock_page_cgroup(pc, &flags); |
@@ -2648,7 +2648,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, | |||
2648 | batch->memcg = mem; | 2648 | batch->memcg = mem; |
2649 | /* | 2649 | /* |
2650 | * do_batch > 0 when unmapping pages or inode invalidate/truncate. | 2650 | * do_batch > 0 when unmapping pages or inode invalidate/truncate. |
2651 | * In those cases, all pages freed continously can be expected to be in | 2651 | * In those cases, all pages freed continuously can be expected to be in |
2652 | * the same cgroup and we have chance to coalesce uncharges. | 2652 | * the same cgroup and we have chance to coalesce uncharges. |
2653 | * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE) | 2653 | * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE) |
2654 | * because we want to do uncharge as soon as possible. | 2654 | * because we want to do uncharge as soon as possible. |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 37feb9fec228..2b9a5eef39e0 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -208,7 +208,7 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, | |||
208 | * Don't use force here, it's convenient if the signal | 208 | * Don't use force here, it's convenient if the signal |
209 | * can be temporarily blocked. | 209 | * can be temporarily blocked. |
210 | * This could cause a loop when the user sets SIGBUS | 210 | * This could cause a loop when the user sets SIGBUS |
211 | * to SIG_IGN, but hopefully noone will do that? | 211 | * to SIG_IGN, but hopefully no one will do that? |
212 | */ | 212 | */ |
213 | ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ | 213 | ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ |
214 | if (ret < 0) | 214 | if (ret < 0) |
@@ -634,7 +634,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) | |||
634 | * when the page is reread or dropped. If an | 634 | * when the page is reread or dropped. If an |
635 | * application assumes it will always get error on | 635 | * application assumes it will always get error on |
636 | * fsync, but does other operations on the fd before | 636 | * fsync, but does other operations on the fd before |
637 | * and the page is dropped inbetween then the error | 637 | * and the page is dropped between then the error |
638 | * will not be properly reported. | 638 | * will not be properly reported. |
639 | * | 639 | * |
640 | * This can already happen even without hwpoisoned | 640 | * This can already happen even without hwpoisoned |
@@ -728,7 +728,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) | |||
728 | * The table matches them in order and calls the right handler. | 728 | * The table matches them in order and calls the right handler. |
729 | * | 729 | * |
730 | * This is quite tricky because we can access page at any time | 730 | * This is quite tricky because we can access page at any time |
731 | * in its live cycle, so all accesses have to be extremly careful. | 731 | * in its live cycle, so all accesses have to be extremely careful. |
732 | * | 732 | * |
733 | * This is not complete. More states could be added. | 733 | * This is not complete. More states could be added. |
734 | * For any missing state don't attempt recovery. | 734 | * For any missing state don't attempt recovery. |
diff --git a/mm/memory.c b/mm/memory.c index 9da8cab1b1b0..ce22a250926f 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1410,6 +1410,13 @@ no_page_table: | |||
1410 | return page; | 1410 | return page; |
1411 | } | 1411 | } |
1412 | 1412 | ||
1413 | static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) | ||
1414 | { | ||
1415 | return (vma->vm_flags & VM_GROWSDOWN) && | ||
1416 | (vma->vm_start == addr) && | ||
1417 | !vma_stack_continue(vma->vm_prev, addr); | ||
1418 | } | ||
1419 | |||
1413 | /** | 1420 | /** |
1414 | * __get_user_pages() - pin user pages in memory | 1421 | * __get_user_pages() - pin user pages in memory |
1415 | * @tsk: task_struct of target task | 1422 | * @tsk: task_struct of target task |
@@ -1488,7 +1495,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1488 | vma = find_extend_vma(mm, start); | 1495 | vma = find_extend_vma(mm, start); |
1489 | if (!vma && in_gate_area(mm, start)) { | 1496 | if (!vma && in_gate_area(mm, start)) { |
1490 | unsigned long pg = start & PAGE_MASK; | 1497 | unsigned long pg = start & PAGE_MASK; |
1491 | struct vm_area_struct *gate_vma = get_gate_vma(mm); | ||
1492 | pgd_t *pgd; | 1498 | pgd_t *pgd; |
1493 | pud_t *pud; | 1499 | pud_t *pud; |
1494 | pmd_t *pmd; | 1500 | pmd_t *pmd; |
@@ -1513,10 +1519,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1513 | pte_unmap(pte); | 1519 | pte_unmap(pte); |
1514 | return i ? : -EFAULT; | 1520 | return i ? : -EFAULT; |
1515 | } | 1521 | } |
1522 | vma = get_gate_vma(mm); | ||
1516 | if (pages) { | 1523 | if (pages) { |
1517 | struct page *page; | 1524 | struct page *page; |
1518 | 1525 | ||
1519 | page = vm_normal_page(gate_vma, start, *pte); | 1526 | page = vm_normal_page(vma, start, *pte); |
1520 | if (!page) { | 1527 | if (!page) { |
1521 | if (!(gup_flags & FOLL_DUMP) && | 1528 | if (!(gup_flags & FOLL_DUMP) && |
1522 | is_zero_pfn(pte_pfn(*pte))) | 1529 | is_zero_pfn(pte_pfn(*pte))) |
@@ -1530,12 +1537,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1530 | get_page(page); | 1537 | get_page(page); |
1531 | } | 1538 | } |
1532 | pte_unmap(pte); | 1539 | pte_unmap(pte); |
1533 | if (vmas) | 1540 | goto next_page; |
1534 | vmas[i] = gate_vma; | ||
1535 | i++; | ||
1536 | start += PAGE_SIZE; | ||
1537 | nr_pages--; | ||
1538 | continue; | ||
1539 | } | 1541 | } |
1540 | 1542 | ||
1541 | if (!vma || | 1543 | if (!vma || |
@@ -1549,6 +1551,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1549 | continue; | 1551 | continue; |
1550 | } | 1552 | } |
1551 | 1553 | ||
1554 | /* | ||
1555 | * If we don't actually want the page itself, | ||
1556 | * and it's the stack guard page, just skip it. | ||
1557 | */ | ||
1558 | if (!pages && stack_guard_page(vma, start)) | ||
1559 | goto next_page; | ||
1560 | |||
1552 | do { | 1561 | do { |
1553 | struct page *page; | 1562 | struct page *page; |
1554 | unsigned int foll_flags = gup_flags; | 1563 | unsigned int foll_flags = gup_flags; |
@@ -1631,6 +1640,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1631 | flush_anon_page(vma, page, start); | 1640 | flush_anon_page(vma, page, start); |
1632 | flush_dcache_page(page); | 1641 | flush_dcache_page(page); |
1633 | } | 1642 | } |
1643 | next_page: | ||
1634 | if (vmas) | 1644 | if (vmas) |
1635 | vmas[i] = vma; | 1645 | vmas[i] = vma; |
1636 | i++; | 1646 | i++; |
@@ -3678,7 +3688,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, | |||
3678 | */ | 3688 | */ |
3679 | #ifdef CONFIG_HAVE_IOREMAP_PROT | 3689 | #ifdef CONFIG_HAVE_IOREMAP_PROT |
3680 | vma = find_vma(mm, addr); | 3690 | vma = find_vma(mm, addr); |
3681 | if (!vma) | 3691 | if (!vma || vma->vm_start > addr) |
3682 | break; | 3692 | break; |
3683 | if (vma->vm_ops && vma->vm_ops->access) | 3693 | if (vma->vm_ops && vma->vm_ops->access) |
3684 | ret = vma->vm_ops->access(vma, addr, buf, | 3694 | ret = vma->vm_ops->access(vma, addr, buf, |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 321fc7455df7..9ca1d604f7cd 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -375,7 +375,7 @@ void online_page(struct page *page) | |||
375 | #endif | 375 | #endif |
376 | 376 | ||
377 | #ifdef CONFIG_FLATMEM | 377 | #ifdef CONFIG_FLATMEM |
378 | max_mapnr = max(page_to_pfn(page), max_mapnr); | 378 | max_mapnr = max(pfn, max_mapnr); |
379 | #endif | 379 | #endif |
380 | 380 | ||
381 | ClearPageReserved(page); | 381 | ClearPageReserved(page); |
@@ -724,7 +724,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
724 | pfn); | 724 | pfn); |
725 | dump_page(page); | 725 | dump_page(page); |
726 | #endif | 726 | #endif |
727 | /* Becasue we don't have big zone->lock. we should | 727 | /* Because we don't have big zone->lock. we should |
728 | check this again here. */ | 728 | check this again here. */ |
729 | if (page_count(page)) { | 729 | if (page_count(page)) { |
730 | not_managed++; | 730 | not_managed++; |
diff --git a/mm/migrate.c b/mm/migrate.c index b0406d739ea7..34132f8e9109 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -375,7 +375,7 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
375 | * redo the accounting that clear_page_dirty_for_io undid, | 375 | * redo the accounting that clear_page_dirty_for_io undid, |
376 | * but we can't use set_page_dirty because that function | 376 | * but we can't use set_page_dirty because that function |
377 | * is actually a signal that all of the page has become dirty. | 377 | * is actually a signal that all of the page has become dirty. |
378 | * Wheras only part of our page may be dirty. | 378 | * Whereas only part of our page may be dirty. |
379 | */ | 379 | */ |
380 | __set_page_dirty_nobuffers(newpage); | 380 | __set_page_dirty_nobuffers(newpage); |
381 | } | 381 | } |
diff --git a/mm/mlock.c b/mm/mlock.c index 2689a08c79af..6b55e3efe0df 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -135,13 +135,6 @@ void munlock_vma_page(struct page *page) | |||
135 | } | 135 | } |
136 | } | 136 | } |
137 | 137 | ||
138 | static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) | ||
139 | { | ||
140 | return (vma->vm_flags & VM_GROWSDOWN) && | ||
141 | (vma->vm_start == addr) && | ||
142 | !vma_stack_continue(vma->vm_prev, addr); | ||
143 | } | ||
144 | |||
145 | /** | 138 | /** |
146 | * __mlock_vma_pages_range() - mlock a range of pages in the vma. | 139 | * __mlock_vma_pages_range() - mlock a range of pages in the vma. |
147 | * @vma: target vma | 140 | * @vma: target vma |
@@ -188,12 +181,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
188 | if (vma->vm_flags & VM_LOCKED) | 181 | if (vma->vm_flags & VM_LOCKED) |
189 | gup_flags |= FOLL_MLOCK; | 182 | gup_flags |= FOLL_MLOCK; |
190 | 183 | ||
191 | /* We don't try to access the guard page of a stack vma */ | ||
192 | if (stack_guard_page(vma, start)) { | ||
193 | addr += PAGE_SIZE; | ||
194 | nr_pages--; | ||
195 | } | ||
196 | |||
197 | return __get_user_pages(current, mm, addr, nr_pages, gup_flags, | 184 | return __get_user_pages(current, mm, addr, nr_pages, gup_flags, |
198 | NULL, NULL, nonblocking); | 185 | NULL, NULL, nonblocking); |
199 | } | 186 | } |
@@ -259,7 +259,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
259 | * randomize_va_space to 2, which will still cause mm->start_brk | 259 | * randomize_va_space to 2, which will still cause mm->start_brk |
260 | * to be arbitrarily shifted | 260 | * to be arbitrarily shifted |
261 | */ | 261 | */ |
262 | if (mm->start_brk > PAGE_ALIGN(mm->end_data)) | 262 | if (current->brk_randomized) |
263 | min_brk = mm->start_brk; | 263 | min_brk = mm->start_brk; |
264 | else | 264 | else |
265 | min_brk = mm->end_data; | 265 | min_brk = mm->end_data; |
@@ -1814,11 +1814,14 @@ static int expand_downwards(struct vm_area_struct *vma, | |||
1814 | size = vma->vm_end - address; | 1814 | size = vma->vm_end - address; |
1815 | grow = (vma->vm_start - address) >> PAGE_SHIFT; | 1815 | grow = (vma->vm_start - address) >> PAGE_SHIFT; |
1816 | 1816 | ||
1817 | error = acct_stack_growth(vma, size, grow); | 1817 | error = -ENOMEM; |
1818 | if (!error) { | 1818 | if (grow <= vma->vm_pgoff) { |
1819 | vma->vm_start = address; | 1819 | error = acct_stack_growth(vma, size, grow); |
1820 | vma->vm_pgoff -= grow; | 1820 | if (!error) { |
1821 | perf_event_mmap(vma); | 1821 | vma->vm_start = address; |
1822 | vma->vm_pgoff -= grow; | ||
1823 | perf_event_mmap(vma); | ||
1824 | } | ||
1822 | } | 1825 | } |
1823 | } | 1826 | } |
1824 | vma_unlock_anon_vma(vma); | 1827 | vma_unlock_anon_vma(vma); |
diff --git a/mm/mremap.c b/mm/mremap.c index 1de98d492ddc..a7c1f9f9b941 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -277,9 +277,16 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, | |||
277 | if (old_len > vma->vm_end - addr) | 277 | if (old_len > vma->vm_end - addr) |
278 | goto Efault; | 278 | goto Efault; |
279 | 279 | ||
280 | if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) { | 280 | /* Need to be careful about a growing mapping */ |
281 | if (new_len > old_len) | 281 | if (new_len > old_len) { |
282 | unsigned long pgoff; | ||
283 | |||
284 | if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) | ||
282 | goto Efault; | 285 | goto Efault; |
286 | pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; | ||
287 | pgoff += vma->vm_pgoff; | ||
288 | if (pgoff + (new_len >> PAGE_SHIFT) < pgoff) | ||
289 | goto Einval; | ||
283 | } | 290 | } |
284 | 291 | ||
285 | if (vma->vm_flags & VM_LOCKED) { | 292 | if (vma->vm_flags & VM_LOCKED) { |
diff --git a/mm/nobootmem.c b/mm/nobootmem.c index e99f6cd1da1f..9109049f0bbc 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c | |||
@@ -150,7 +150,7 @@ unsigned long __init free_all_bootmem(void) | |||
150 | { | 150 | { |
151 | /* | 151 | /* |
152 | * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id | 152 | * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id |
153 | * because in some case like Node0 doesnt have RAM installed | 153 | * because in some case like Node0 doesn't have RAM installed |
154 | * low ram will be on Node1 | 154 | * low ram will be on Node1 |
155 | * Use MAX_NUMNODES will make sure all ranges in early_node_map[] | 155 | * Use MAX_NUMNODES will make sure all ranges in early_node_map[] |
156 | * will be used instead of only Node0 related | 156 | * will be used instead of only Node0 related |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6a819d1b2c7d..83fb72c108b7 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -84,24 +84,6 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk, | |||
84 | #endif /* CONFIG_NUMA */ | 84 | #endif /* CONFIG_NUMA */ |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * If this is a system OOM (not a memcg OOM) and the task selected to be | ||
88 | * killed is not already running at high (RT) priorities, speed up the | ||
89 | * recovery by boosting the dying task to the lowest FIFO priority. | ||
90 | * That helps with the recovery and avoids interfering with RT tasks. | ||
91 | */ | ||
92 | static void boost_dying_task_prio(struct task_struct *p, | ||
93 | struct mem_cgroup *mem) | ||
94 | { | ||
95 | struct sched_param param = { .sched_priority = 1 }; | ||
96 | |||
97 | if (mem) | ||
98 | return; | ||
99 | |||
100 | if (!rt_task(p)) | ||
101 | sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * The process p may have detached its own ->mm while exiting or through | 87 | * The process p may have detached its own ->mm while exiting or through |
106 | * use_mm(), but one or more of its subthreads may still have a valid | 88 | * use_mm(), but one or more of its subthreads may still have a valid |
107 | * pointer. Return p, or any of its subthreads with a valid ->mm, with | 89 | * pointer. Return p, or any of its subthreads with a valid ->mm, with |
@@ -452,13 +434,6 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) | |||
452 | set_tsk_thread_flag(p, TIF_MEMDIE); | 434 | set_tsk_thread_flag(p, TIF_MEMDIE); |
453 | force_sig(SIGKILL, p); | 435 | force_sig(SIGKILL, p); |
454 | 436 | ||
455 | /* | ||
456 | * We give our sacrificial lamb high priority and access to | ||
457 | * all the memory it needs. That way it should be able to | ||
458 | * exit() and clear out its resources quickly... | ||
459 | */ | ||
460 | boost_dying_task_prio(p, mem); | ||
461 | |||
462 | return 0; | 437 | return 0; |
463 | } | 438 | } |
464 | #undef K | 439 | #undef K |
@@ -482,7 +457,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
482 | */ | 457 | */ |
483 | if (p->flags & PF_EXITING) { | 458 | if (p->flags & PF_EXITING) { |
484 | set_tsk_thread_flag(p, TIF_MEMDIE); | 459 | set_tsk_thread_flag(p, TIF_MEMDIE); |
485 | boost_dying_task_prio(p, mem); | ||
486 | return 0; | 460 | return 0; |
487 | } | 461 | } |
488 | 462 | ||
@@ -556,7 +530,6 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) | |||
556 | */ | 530 | */ |
557 | if (fatal_signal_pending(current)) { | 531 | if (fatal_signal_pending(current)) { |
558 | set_thread_flag(TIF_MEMDIE); | 532 | set_thread_flag(TIF_MEMDIE); |
559 | boost_dying_task_prio(current, NULL); | ||
560 | return; | 533 | return; |
561 | } | 534 | } |
562 | 535 | ||
@@ -712,7 +685,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
712 | */ | 685 | */ |
713 | if (fatal_signal_pending(current)) { | 686 | if (fatal_signal_pending(current)) { |
714 | set_thread_flag(TIF_MEMDIE); | 687 | set_thread_flag(TIF_MEMDIE); |
715 | boost_dying_task_prio(current, NULL); | ||
716 | return; | 688 | return; |
717 | } | 689 | } |
718 | 690 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d6e7ba7373be..9f8a97b9a350 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -942,7 +942,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | |||
942 | * If breaking a large block of pages, move all free | 942 | * If breaking a large block of pages, move all free |
943 | * pages to the preferred allocation list. If falling | 943 | * pages to the preferred allocation list. If falling |
944 | * back for a reclaimable kernel allocation, be more | 944 | * back for a reclaimable kernel allocation, be more |
945 | * agressive about taking ownership of free pages | 945 | * aggressive about taking ownership of free pages |
946 | */ | 946 | */ |
947 | if (unlikely(current_order >= (pageblock_order >> 1)) || | 947 | if (unlikely(current_order >= (pageblock_order >> 1)) || |
948 | start_migratetype == MIGRATE_RECLAIMABLE || | 948 | start_migratetype == MIGRATE_RECLAIMABLE || |
@@ -3176,7 +3176,7 @@ static __init_refok int __build_all_zonelists(void *data) | |||
3176 | * Called with zonelists_mutex held always | 3176 | * Called with zonelists_mutex held always |
3177 | * unless system_state == SYSTEM_BOOTING. | 3177 | * unless system_state == SYSTEM_BOOTING. |
3178 | */ | 3178 | */ |
3179 | void build_all_zonelists(void *data) | 3179 | void __ref build_all_zonelists(void *data) |
3180 | { | 3180 | { |
3181 | set_zonelist_order(); | 3181 | set_zonelist_order(); |
3182 | 3182 | ||
@@ -3926,7 +3926,7 @@ static void __init find_usable_zone_for_movable(void) | |||
3926 | 3926 | ||
3927 | /* | 3927 | /* |
3928 | * The zone ranges provided by the architecture do not include ZONE_MOVABLE | 3928 | * The zone ranges provided by the architecture do not include ZONE_MOVABLE |
3929 | * because it is sized independant of architecture. Unlike the other zones, | 3929 | * because it is sized independent of architecture. Unlike the other zones, |
3930 | * the starting point for ZONE_MOVABLE is not fixed. It may be different | 3930 | * the starting point for ZONE_MOVABLE is not fixed. It may be different |
3931 | * in each node depending on the size of each node and how evenly kernelcore | 3931 | * in each node depending on the size of each node and how evenly kernelcore |
3932 | * is distributed. This helper function adjusts the zone ranges | 3932 | * is distributed. This helper function adjusts the zone ranges |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index a12cc3fa9859..99055010cece 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -377,7 +377,7 @@ not_enough_page: | |||
377 | * @new: new id | 377 | * @new: new id |
378 | * | 378 | * |
379 | * Returns old id at success, 0 at failure. | 379 | * Returns old id at success, 0 at failure. |
380 | * (There is no mem_cgroup useing 0 as its id) | 380 | * (There is no mem_cgroup using 0 as its id) |
381 | */ | 381 | */ |
382 | unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, | 382 | unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, |
383 | unsigned short old, unsigned short new) | 383 | unsigned short old, unsigned short new) |
diff --git a/mm/percpu.c b/mm/percpu.c index 55d4d113fbd3..a160db39b810 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -342,7 +342,7 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) | |||
342 | * @chunk: chunk of interest | 342 | * @chunk: chunk of interest |
343 | * | 343 | * |
344 | * Determine whether area map of @chunk needs to be extended to | 344 | * Determine whether area map of @chunk needs to be extended to |
345 | * accomodate a new allocation. | 345 | * accommodate a new allocation. |
346 | * | 346 | * |
347 | * CONTEXT: | 347 | * CONTEXT: |
348 | * pcpu_lock. | 348 | * pcpu_lock. |
@@ -431,7 +431,7 @@ out_unlock: | |||
431 | * depending on @head, is reduced by @tail bytes and @tail byte block | 431 | * depending on @head, is reduced by @tail bytes and @tail byte block |
432 | * is inserted after the target block. | 432 | * is inserted after the target block. |
433 | * | 433 | * |
434 | * @chunk->map must have enough free slots to accomodate the split. | 434 | * @chunk->map must have enough free slots to accommodate the split. |
435 | * | 435 | * |
436 | * CONTEXT: | 436 | * CONTEXT: |
437 | * pcpu_lock. | 437 | * pcpu_lock. |
@@ -1435,7 +1435,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( | |||
1435 | /* | 1435 | /* |
1436 | * Determine min_unit_size, alloc_size and max_upa such that | 1436 | * Determine min_unit_size, alloc_size and max_upa such that |
1437 | * alloc_size is multiple of atom_size and is the smallest | 1437 | * alloc_size is multiple of atom_size and is the smallest |
1438 | * which can accomodate 4k aligned segments which are equal to | 1438 | * which can accommodate 4k aligned segments which are equal to |
1439 | * or larger than min_unit_size. | 1439 | * or larger than min_unit_size. |
1440 | */ | 1440 | */ |
1441 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | 1441 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); |
@@ -1550,7 +1550,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( | |||
1550 | * @atom_size: allocation atom size | 1550 | * @atom_size: allocation atom size |
1551 | * @cpu_distance_fn: callback to determine distance between cpus, optional | 1551 | * @cpu_distance_fn: callback to determine distance between cpus, optional |
1552 | * @alloc_fn: function to allocate percpu page | 1552 | * @alloc_fn: function to allocate percpu page |
1553 | * @free_fn: funtion to free percpu page | 1553 | * @free_fn: function to free percpu page |
1554 | * | 1554 | * |
1555 | * This is a helper to ease setting up embedded first percpu chunk and | 1555 | * This is a helper to ease setting up embedded first percpu chunk and |
1556 | * can be called where pcpu_setup_first_chunk() is expected. | 1556 | * can be called where pcpu_setup_first_chunk() is expected. |
@@ -1678,7 +1678,7 @@ out_free: | |||
1678 | * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages | 1678 | * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages |
1679 | * @reserved_size: the size of reserved percpu area in bytes | 1679 | * @reserved_size: the size of reserved percpu area in bytes |
1680 | * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE | 1680 | * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE |
1681 | * @free_fn: funtion to free percpu page, always called with PAGE_SIZE | 1681 | * @free_fn: function to free percpu page, always called with PAGE_SIZE |
1682 | * @populate_pte_fn: function to populate pte | 1682 | * @populate_pte_fn: function to populate pte |
1683 | * | 1683 | * |
1684 | * This is a helper to ease setting up page-remapped first percpu | 1684 | * This is a helper to ease setting up page-remapped first percpu |
diff --git a/mm/shmem.c b/mm/shmem.c index 58da7c150ba6..8fa27e4e582a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -421,7 +421,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long | |||
421 | * a waste to allocate index if we cannot allocate data. | 421 | * a waste to allocate index if we cannot allocate data. |
422 | */ | 422 | */ |
423 | if (sbinfo->max_blocks) { | 423 | if (sbinfo->max_blocks) { |
424 | if (percpu_counter_compare(&sbinfo->used_blocks, (sbinfo->max_blocks - 1)) > 0) | 424 | if (percpu_counter_compare(&sbinfo->used_blocks, |
425 | sbinfo->max_blocks - 1) >= 0) | ||
425 | return ERR_PTR(-ENOSPC); | 426 | return ERR_PTR(-ENOSPC); |
426 | percpu_counter_inc(&sbinfo->used_blocks); | 427 | percpu_counter_inc(&sbinfo->used_blocks); |
427 | spin_lock(&inode->i_lock); | 428 | spin_lock(&inode->i_lock); |
@@ -1397,7 +1398,8 @@ repeat: | |||
1397 | shmem_swp_unmap(entry); | 1398 | shmem_swp_unmap(entry); |
1398 | sbinfo = SHMEM_SB(inode->i_sb); | 1399 | sbinfo = SHMEM_SB(inode->i_sb); |
1399 | if (sbinfo->max_blocks) { | 1400 | if (sbinfo->max_blocks) { |
1400 | if ((percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) > 0) || | 1401 | if (percpu_counter_compare(&sbinfo->used_blocks, |
1402 | sbinfo->max_blocks) >= 0 || | ||
1401 | shmem_acct_block(info->flags)) { | 1403 | shmem_acct_block(info->flags)) { |
1402 | spin_unlock(&info->lock); | 1404 | spin_unlock(&info->lock); |
1403 | error = -ENOSPC; | 1405 | error = -ENOSPC; |
@@ -878,7 +878,7 @@ static struct array_cache *alloc_arraycache(int node, int entries, | |||
878 | nc = kmalloc_node(memsize, gfp, node); | 878 | nc = kmalloc_node(memsize, gfp, node); |
879 | /* | 879 | /* |
880 | * The array_cache structures contain pointers to free object. | 880 | * The array_cache structures contain pointers to free object. |
881 | * However, when such objects are allocated or transfered to another | 881 | * However, when such objects are allocated or transferred to another |
882 | * cache the pointers are not cleared and they could be counted as | 882 | * cache the pointers are not cleared and they could be counted as |
883 | * valid references during a kmemleak scan. Therefore, kmemleak must | 883 | * valid references during a kmemleak scan. Therefore, kmemleak must |
884 | * not scan such objects. | 884 | * not scan such objects. |
@@ -2606,7 +2606,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); | |||
2606 | * | 2606 | * |
2607 | * The cache must be empty before calling this function. | 2607 | * The cache must be empty before calling this function. |
2608 | * | 2608 | * |
2609 | * The caller must guarantee that noone will allocate memory from the cache | 2609 | * The caller must guarantee that no one will allocate memory from the cache |
2610 | * during the kmem_cache_destroy(). | 2610 | * during the kmem_cache_destroy(). |
2611 | */ | 2611 | */ |
2612 | void kmem_cache_destroy(struct kmem_cache *cachep) | 2612 | void kmem_cache_destroy(struct kmem_cache *cachep) |
@@ -64,7 +64,7 @@ | |||
64 | * we must stay away from it for a while since we may cause a bouncing | 64 | * we must stay away from it for a while since we may cause a bouncing |
65 | * cacheline if we try to acquire the lock. So go onto the next slab. | 65 | * cacheline if we try to acquire the lock. So go onto the next slab. |
66 | * If all pages are busy then we may allocate a new slab instead of reusing | 66 | * If all pages are busy then we may allocate a new slab instead of reusing |
67 | * a partial slab. A new slab has noone operating on it and thus there is | 67 | * a partial slab. A new slab has no one operating on it and thus there is |
68 | * no danger of cacheline contention. | 68 | * no danger of cacheline contention. |
69 | * | 69 | * |
70 | * Interrupts are disabled during allocation and deallocation in order to | 70 | * Interrupts are disabled during allocation and deallocation in order to |
@@ -1929,7 +1929,7 @@ redo: | |||
1929 | else { | 1929 | else { |
1930 | #ifdef CONFIG_CMPXCHG_LOCAL | 1930 | #ifdef CONFIG_CMPXCHG_LOCAL |
1931 | /* | 1931 | /* |
1932 | * The cmpxchg will only match if there was no additonal | 1932 | * The cmpxchg will only match if there was no additional |
1933 | * operation and if we are on the right processor. | 1933 | * operation and if we are on the right processor. |
1934 | * | 1934 | * |
1935 | * The cmpxchg does the following atomically (without lock semantics!) | 1935 | * The cmpxchg does the following atomically (without lock semantics!) |
@@ -3547,7 +3547,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) | |||
3547 | 3547 | ||
3548 | ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); | 3548 | ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); |
3549 | 3549 | ||
3550 | /* Honor the call site pointer we recieved. */ | 3550 | /* Honor the call site pointer we received. */ |
3551 | trace_kmalloc(caller, ret, size, s->size, gfpflags); | 3551 | trace_kmalloc(caller, ret, size, s->size, gfpflags); |
3552 | 3552 | ||
3553 | return ret; | 3553 | return ret; |
@@ -3577,7 +3577,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
3577 | 3577 | ||
3578 | ret = slab_alloc(s, gfpflags, node, caller); | 3578 | ret = slab_alloc(s, gfpflags, node, caller); |
3579 | 3579 | ||
3580 | /* Honor the call site pointer we recieved. */ | 3580 | /* Honor the call site pointer we received. */ |
3581 | trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); | 3581 | trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); |
3582 | 3582 | ||
3583 | return ret; | 3583 | return ret; |
diff --git a/mm/sparse.c b/mm/sparse.c index 93250207c5cf..aa64b12831a2 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -500,7 +500,7 @@ void __init sparse_init(void) | |||
500 | * so alloc 2M (with 2M align) and 24 bytes in turn will | 500 | * so alloc 2M (with 2M align) and 24 bytes in turn will |
501 | * make next 2M slip to one more 2M later. | 501 | * make next 2M slip to one more 2M later. |
502 | * then in big system, the memory will have a lot of holes... | 502 | * then in big system, the memory will have a lot of holes... |
503 | * here try to allocate 2M pages continously. | 503 | * here try to allocate 2M pages continuously. |
504 | * | 504 | * |
505 | * powerpc need to call sparse_init_one_section right after each | 505 | * powerpc need to call sparse_init_one_section right after each |
506 | * sparse_early_mem_map_alloc, so allocate usemap_map at first. | 506 | * sparse_early_mem_map_alloc, so allocate usemap_map at first. |
@@ -227,7 +227,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
227 | /* | 227 | /* |
228 | * Like get_user_pages_fast() except its IRQ-safe in that it won't fall | 228 | * Like get_user_pages_fast() except its IRQ-safe in that it won't fall |
229 | * back to the regular GUP. | 229 | * back to the regular GUP. |
230 | * If the architecture not support this fucntion, simply return with no | 230 | * If the architecture not support this function, simply return with no |
231 | * page pinned | 231 | * page pinned |
232 | */ | 232 | */ |
233 | int __attribute__((weak)) __get_user_pages_fast(unsigned long start, | 233 | int __attribute__((weak)) __get_user_pages_fast(unsigned long start, |
diff --git a/mm/vmscan.c b/mm/vmscan.c index f73b8657c2d0..f6b435c80079 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/memcontrol.h> | 41 | #include <linux/memcontrol.h> |
42 | #include <linux/delayacct.h> | 42 | #include <linux/delayacct.h> |
43 | #include <linux/sysctl.h> | 43 | #include <linux/sysctl.h> |
44 | #include <linux/oom.h> | ||
44 | 45 | ||
45 | #include <asm/tlbflush.h> | 46 | #include <asm/tlbflush.h> |
46 | #include <asm/div64.h> | 47 | #include <asm/div64.h> |
@@ -1065,7 +1066,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1065 | * surrounding the tag page. Only take those pages of | 1066 | * surrounding the tag page. Only take those pages of |
1066 | * the same active state as that tag page. We may safely | 1067 | * the same active state as that tag page. We may safely |
1067 | * round the target page pfn down to the requested order | 1068 | * round the target page pfn down to the requested order |
1068 | * as the mem_map is guarenteed valid out to MAX_ORDER, | 1069 | * as the mem_map is guaranteed valid out to MAX_ORDER, |
1069 | * where that page is in a different zone we will detect | 1070 | * where that page is in a different zone we will detect |
1070 | * it from its zone id and abort this block scan. | 1071 | * it from its zone id and abort this block scan. |
1071 | */ | 1072 | */ |
@@ -1988,17 +1989,12 @@ static bool zone_reclaimable(struct zone *zone) | |||
1988 | return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; | 1989 | return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; |
1989 | } | 1990 | } |
1990 | 1991 | ||
1991 | /* | 1992 | /* All zones in zonelist are unreclaimable? */ |
1992 | * As hibernation is going on, kswapd is freezed so that it can't mark | ||
1993 | * the zone into all_unreclaimable. It can't handle OOM during hibernation. | ||
1994 | * So let's check zone's unreclaimable in direct reclaim as well as kswapd. | ||
1995 | */ | ||
1996 | static bool all_unreclaimable(struct zonelist *zonelist, | 1993 | static bool all_unreclaimable(struct zonelist *zonelist, |
1997 | struct scan_control *sc) | 1994 | struct scan_control *sc) |
1998 | { | 1995 | { |
1999 | struct zoneref *z; | 1996 | struct zoneref *z; |
2000 | struct zone *zone; | 1997 | struct zone *zone; |
2001 | bool all_unreclaimable = true; | ||
2002 | 1998 | ||
2003 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 1999 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
2004 | gfp_zone(sc->gfp_mask), sc->nodemask) { | 2000 | gfp_zone(sc->gfp_mask), sc->nodemask) { |
@@ -2006,13 +2002,11 @@ static bool all_unreclaimable(struct zonelist *zonelist, | |||
2006 | continue; | 2002 | continue; |
2007 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 2003 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
2008 | continue; | 2004 | continue; |
2009 | if (zone_reclaimable(zone)) { | 2005 | if (!zone->all_unreclaimable) |
2010 | all_unreclaimable = false; | 2006 | return false; |
2011 | break; | ||
2012 | } | ||
2013 | } | 2007 | } |
2014 | 2008 | ||
2015 | return all_unreclaimable; | 2009 | return true; |
2016 | } | 2010 | } |
2017 | 2011 | ||
2018 | /* | 2012 | /* |
@@ -2108,6 +2102,14 @@ out: | |||
2108 | if (sc->nr_reclaimed) | 2102 | if (sc->nr_reclaimed) |
2109 | return sc->nr_reclaimed; | 2103 | return sc->nr_reclaimed; |
2110 | 2104 | ||
2105 | /* | ||
2106 | * As hibernation is going on, kswapd is freezed so that it can't mark | ||
2107 | * the zone into all_unreclaimable. Thus bypassing all_unreclaimable | ||
2108 | * check. | ||
2109 | */ | ||
2110 | if (oom_killer_disabled) | ||
2111 | return 0; | ||
2112 | |||
2111 | /* top priority shrink_zones still had more to do? don't OOM, then */ | 2113 | /* top priority shrink_zones still had more to do? don't OOM, then */ |
2112 | if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) | 2114 | if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) |
2113 | return 1; | 2115 | return 1; |
@@ -2224,7 +2226,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2224 | * o a 16M DMA zone that is balanced will not balance a zone on any | 2226 | * o a 16M DMA zone that is balanced will not balance a zone on any |
2225 | * reasonable sized machine | 2227 | * reasonable sized machine |
2226 | * o On all other machines, the top zone must be at least a reasonable | 2228 | * o On all other machines, the top zone must be at least a reasonable |
2227 | * precentage of the middle zones. For example, on 32-bit x86, highmem | 2229 | * percentage of the middle zones. For example, on 32-bit x86, highmem |
2228 | * would need to be at least 256M for it to be balance a whole node. | 2230 | * would need to be at least 256M for it to be balance a whole node. |
2229 | * Similarly, on x86-64 the Normal zone would need to be at least 1G | 2231 | * Similarly, on x86-64 the Normal zone would need to be at least 1G |
2230 | * to balance a node on its own. These seemed like reasonable ratios. | 2232 | * to balance a node on its own. These seemed like reasonable ratios. |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 772b39b87d95..897ea9e88238 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -321,9 +321,12 @@ static inline void mod_state(struct zone *zone, | |||
321 | /* | 321 | /* |
322 | * The fetching of the stat_threshold is racy. We may apply | 322 | * The fetching of the stat_threshold is racy. We may apply |
323 | * a counter threshold to the wrong the cpu if we get | 323 | * a counter threshold to the wrong the cpu if we get |
324 | * rescheduled while executing here. However, the following | 324 | * rescheduled while executing here. However, the next |
325 | * will apply the threshold again and therefore bring the | 325 | * counter update will apply the threshold again and |
326 | * counter under the threshold. | 326 | * therefore bring the counter under the threshold again. |
327 | * | ||
328 | * Most of the time the thresholds are the same anyways | ||
329 | * for all cpus in a zone. | ||
327 | */ | 330 | */ |
328 | t = this_cpu_read(pcp->stat_threshold); | 331 | t = this_cpu_read(pcp->stat_threshold); |
329 | 332 | ||
@@ -945,7 +948,16 @@ static const char * const vmstat_text[] = { | |||
945 | "unevictable_pgs_cleared", | 948 | "unevictable_pgs_cleared", |
946 | "unevictable_pgs_stranded", | 949 | "unevictable_pgs_stranded", |
947 | "unevictable_pgs_mlockfreed", | 950 | "unevictable_pgs_mlockfreed", |
951 | |||
952 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
953 | "thp_fault_alloc", | ||
954 | "thp_fault_fallback", | ||
955 | "thp_collapse_alloc", | ||
956 | "thp_collapse_alloc_failed", | ||
957 | "thp_split", | ||
948 | #endif | 958 | #endif |
959 | |||
960 | #endif /* CONFIG_VM_EVENTS_COUNTERS */ | ||
949 | }; | 961 | }; |
950 | 962 | ||
951 | static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, | 963 | static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, |