diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/frontswap.c | 2 | ||||
-rw-r--r-- | mm/huge_memory.c | 7 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 28 | ||||
-rw-r--r-- | mm/memory.c | 9 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 9 | ||||
-rw-r--r-- | mm/migrate.c | 25 | ||||
-rw-r--r-- | mm/mmu_notifier.c | 79 | ||||
-rw-r--r-- | mm/page_alloc.c | 8 | ||||
-rw-r--r-- | mm/pagewalk.c | 70 | ||||
-rw-r--r-- | mm/slab_common.c | 4 | ||||
-rw-r--r-- | mm/swap_state.c | 18 | ||||
-rw-r--r-- | mm/swapfile.c | 2 |
13 files changed, 151 insertions, 112 deletions
diff --git a/mm/frontswap.c b/mm/frontswap.c index 538367ef1372..1b24bdcb3197 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c | |||
@@ -319,7 +319,7 @@ void __frontswap_invalidate_area(unsigned type) | |||
319 | return; | 319 | return; |
320 | frontswap_ops->invalidate_area(type); | 320 | frontswap_ops->invalidate_area(type); |
321 | atomic_set(&sis->frontswap_pages, 0); | 321 | atomic_set(&sis->frontswap_pages, 0); |
322 | memset(sis->frontswap_map, 0, sis->max / sizeof(long)); | 322 | bitmap_zero(sis->frontswap_map, sis->max); |
323 | } | 323 | } |
324 | clear_bit(type, need_init); | 324 | clear_bit(type, need_init); |
325 | } | 325 | } |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 03a89a2f464b..362c329b83fe 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -2325,7 +2325,12 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2325 | pte_unmap(pte); | 2325 | pte_unmap(pte); |
2326 | spin_lock(&mm->page_table_lock); | 2326 | spin_lock(&mm->page_table_lock); |
2327 | BUG_ON(!pmd_none(*pmd)); | 2327 | BUG_ON(!pmd_none(*pmd)); |
2328 | set_pmd_at(mm, address, pmd, _pmd); | 2328 | /* |
2329 | * We can only use set_pmd_at when establishing | ||
2330 | * hugepmds and never for establishing regular pmds that | ||
2331 | * points to regular pagetables. Use pmd_populate for that | ||
2332 | */ | ||
2333 | pmd_populate(mm, pmd, pmd_pgtable(_pmd)); | ||
2329 | spin_unlock(&mm->page_table_lock); | 2334 | spin_unlock(&mm->page_table_lock); |
2330 | anon_vma_unlock_write(vma->anon_vma); | 2335 | anon_vma_unlock_write(vma->anon_vma); |
2331 | goto out; | 2336 | goto out; |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f8feeeca6686..e2bfbf73a551 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2839,7 +2839,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2839 | if (ptep) { | 2839 | if (ptep) { |
2840 | entry = huge_ptep_get(ptep); | 2840 | entry = huge_ptep_get(ptep); |
2841 | if (unlikely(is_hugetlb_entry_migration(entry))) { | 2841 | if (unlikely(is_hugetlb_entry_migration(entry))) { |
2842 | migration_entry_wait(mm, (pmd_t *)ptep, address); | 2842 | migration_entry_wait_huge(mm, ptep); |
2843 | return 0; | 2843 | return 0; |
2844 | } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) | 2844 | } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) |
2845 | return VM_FAULT_HWPOISON_LARGE | | 2845 | return VM_FAULT_HWPOISON_LARGE | |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cb1c9dedf9b6..194721839cf5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1199,7 +1199,6 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1199 | 1199 | ||
1200 | mz = mem_cgroup_zoneinfo(root, nid, zid); | 1200 | mz = mem_cgroup_zoneinfo(root, nid, zid); |
1201 | iter = &mz->reclaim_iter[reclaim->priority]; | 1201 | iter = &mz->reclaim_iter[reclaim->priority]; |
1202 | last_visited = iter->last_visited; | ||
1203 | if (prev && reclaim->generation != iter->generation) { | 1202 | if (prev && reclaim->generation != iter->generation) { |
1204 | iter->last_visited = NULL; | 1203 | iter->last_visited = NULL; |
1205 | goto out_unlock; | 1204 | goto out_unlock; |
@@ -1218,13 +1217,12 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1218 | * is alive. | 1217 | * is alive. |
1219 | */ | 1218 | */ |
1220 | dead_count = atomic_read(&root->dead_count); | 1219 | dead_count = atomic_read(&root->dead_count); |
1221 | smp_rmb(); | 1220 | if (dead_count == iter->last_dead_count) { |
1222 | last_visited = iter->last_visited; | 1221 | smp_rmb(); |
1223 | if (last_visited) { | 1222 | last_visited = iter->last_visited; |
1224 | if ((dead_count != iter->last_dead_count) || | 1223 | if (last_visited && |
1225 | !css_tryget(&last_visited->css)) { | 1224 | !css_tryget(&last_visited->css)) |
1226 | last_visited = NULL; | 1225 | last_visited = NULL; |
1227 | } | ||
1228 | } | 1226 | } |
1229 | } | 1227 | } |
1230 | 1228 | ||
@@ -3141,8 +3139,6 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) | |||
3141 | return -ENOMEM; | 3139 | return -ENOMEM; |
3142 | } | 3140 | } |
3143 | 3141 | ||
3144 | INIT_WORK(&s->memcg_params->destroy, | ||
3145 | kmem_cache_destroy_work_func); | ||
3146 | s->memcg_params->is_root_cache = true; | 3142 | s->memcg_params->is_root_cache = true; |
3147 | 3143 | ||
3148 | /* | 3144 | /* |
@@ -4108,8 +4104,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, | |||
4108 | if (mem_cgroup_disabled()) | 4104 | if (mem_cgroup_disabled()) |
4109 | return NULL; | 4105 | return NULL; |
4110 | 4106 | ||
4111 | VM_BUG_ON(PageSwapCache(page)); | ||
4112 | |||
4113 | if (PageTransHuge(page)) { | 4107 | if (PageTransHuge(page)) { |
4114 | nr_pages <<= compound_order(page); | 4108 | nr_pages <<= compound_order(page); |
4115 | VM_BUG_ON(!PageTransHuge(page)); | 4109 | VM_BUG_ON(!PageTransHuge(page)); |
@@ -4205,6 +4199,18 @@ void mem_cgroup_uncharge_page(struct page *page) | |||
4205 | if (page_mapped(page)) | 4199 | if (page_mapped(page)) |
4206 | return; | 4200 | return; |
4207 | VM_BUG_ON(page->mapping && !PageAnon(page)); | 4201 | VM_BUG_ON(page->mapping && !PageAnon(page)); |
4202 | /* | ||
4203 | * If the page is in swap cache, uncharge should be deferred | ||
4204 | * to the swap path, which also properly accounts swap usage | ||
4205 | * and handles memcg lifetime. | ||
4206 | * | ||
4207 | * Note that this check is not stable and reclaim may add the | ||
4208 | * page to swap cache at any time after this. However, if the | ||
4209 | * page is not in swap cache by the time page->mapcount hits | ||
4210 | * 0, there won't be any page table references to the swap | ||
4211 | * slot, and reclaim will free it and not actually write the | ||
4212 | * page to disk. | ||
4213 | */ | ||
4208 | if (PageSwapCache(page)) | 4214 | if (PageSwapCache(page)) |
4209 | return; | 4215 | return; |
4210 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false); | 4216 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false); |
diff --git a/mm/memory.c b/mm/memory.c index 6dc1882fbd72..61a262b08e53 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -220,7 +220,6 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) | |||
220 | tlb->start = -1UL; | 220 | tlb->start = -1UL; |
221 | tlb->end = 0; | 221 | tlb->end = 0; |
222 | tlb->need_flush = 0; | 222 | tlb->need_flush = 0; |
223 | tlb->fast_mode = (num_possible_cpus() == 1); | ||
224 | tlb->local.next = NULL; | 223 | tlb->local.next = NULL; |
225 | tlb->local.nr = 0; | 224 | tlb->local.nr = 0; |
226 | tlb->local.max = ARRAY_SIZE(tlb->__pages); | 225 | tlb->local.max = ARRAY_SIZE(tlb->__pages); |
@@ -244,9 +243,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb) | |||
244 | tlb_table_flush(tlb); | 243 | tlb_table_flush(tlb); |
245 | #endif | 244 | #endif |
246 | 245 | ||
247 | if (tlb_fast_mode(tlb)) | ||
248 | return; | ||
249 | |||
250 | for (batch = &tlb->local; batch; batch = batch->next) { | 246 | for (batch = &tlb->local; batch; batch = batch->next) { |
251 | free_pages_and_swap_cache(batch->pages, batch->nr); | 247 | free_pages_and_swap_cache(batch->pages, batch->nr); |
252 | batch->nr = 0; | 248 | batch->nr = 0; |
@@ -288,11 +284,6 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) | |||
288 | 284 | ||
289 | VM_BUG_ON(!tlb->need_flush); | 285 | VM_BUG_ON(!tlb->need_flush); |
290 | 286 | ||
291 | if (tlb_fast_mode(tlb)) { | ||
292 | free_page_and_swap_cache(page); | ||
293 | return 1; /* avoid calling tlb_flush_mmu() */ | ||
294 | } | ||
295 | |||
296 | batch = tlb->active; | 287 | batch = tlb->active; |
297 | batch->pages[batch->nr++] = page; | 288 | batch->pages[batch->nr++] = page; |
298 | if (batch->nr == batch->max) { | 289 | if (batch->nr == batch->max) { |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a221fac1f47d..1ad92b46753e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -720,9 +720,12 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, | |||
720 | start = phys_start_pfn << PAGE_SHIFT; | 720 | start = phys_start_pfn << PAGE_SHIFT; |
721 | size = nr_pages * PAGE_SIZE; | 721 | size = nr_pages * PAGE_SIZE; |
722 | ret = release_mem_region_adjustable(&iomem_resource, start, size); | 722 | ret = release_mem_region_adjustable(&iomem_resource, start, size); |
723 | if (ret) | 723 | if (ret) { |
724 | pr_warn("Unable to release resource <%016llx-%016llx> (%d)\n", | 724 | resource_size_t endres = start + size - 1; |
725 | start, start + size - 1, ret); | 725 | |
726 | pr_warn("Unable to release resource <%pa-%pa> (%d)\n", | ||
727 | &start, &endres, ret); | ||
728 | } | ||
726 | 729 | ||
727 | sections_to_remove = nr_pages / PAGES_PER_SECTION; | 730 | sections_to_remove = nr_pages / PAGES_PER_SECTION; |
728 | for (i = 0; i < sections_to_remove; i++) { | 731 | for (i = 0; i < sections_to_remove; i++) { |
diff --git a/mm/migrate.c b/mm/migrate.c index 27ed22579fd9..6f0c24438bba 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -165,7 +165,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
165 | pte = arch_make_huge_pte(pte, vma, new, 0); | 165 | pte = arch_make_huge_pte(pte, vma, new, 0); |
166 | } | 166 | } |
167 | #endif | 167 | #endif |
168 | flush_cache_page(vma, addr, pte_pfn(pte)); | 168 | flush_dcache_page(new); |
169 | set_pte_at(mm, addr, ptep, pte); | 169 | set_pte_at(mm, addr, ptep, pte); |
170 | 170 | ||
171 | if (PageHuge(new)) { | 171 | if (PageHuge(new)) { |
@@ -200,15 +200,14 @@ static void remove_migration_ptes(struct page *old, struct page *new) | |||
200 | * get to the page and wait until migration is finished. | 200 | * get to the page and wait until migration is finished. |
201 | * When we return from this function the fault will be retried. | 201 | * When we return from this function the fault will be retried. |
202 | */ | 202 | */ |
203 | void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | 203 | static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, |
204 | unsigned long address) | 204 | spinlock_t *ptl) |
205 | { | 205 | { |
206 | pte_t *ptep, pte; | 206 | pte_t pte; |
207 | spinlock_t *ptl; | ||
208 | swp_entry_t entry; | 207 | swp_entry_t entry; |
209 | struct page *page; | 208 | struct page *page; |
210 | 209 | ||
211 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | 210 | spin_lock(ptl); |
212 | pte = *ptep; | 211 | pte = *ptep; |
213 | if (!is_swap_pte(pte)) | 212 | if (!is_swap_pte(pte)) |
214 | goto out; | 213 | goto out; |
@@ -236,6 +235,20 @@ out: | |||
236 | pte_unmap_unlock(ptep, ptl); | 235 | pte_unmap_unlock(ptep, ptl); |
237 | } | 236 | } |
238 | 237 | ||
238 | void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | ||
239 | unsigned long address) | ||
240 | { | ||
241 | spinlock_t *ptl = pte_lockptr(mm, pmd); | ||
242 | pte_t *ptep = pte_offset_map(pmd, address); | ||
243 | __migration_entry_wait(mm, ptep, ptl); | ||
244 | } | ||
245 | |||
246 | void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) | ||
247 | { | ||
248 | spinlock_t *ptl = &(mm)->page_table_lock; | ||
249 | __migration_entry_wait(mm, pte, ptl); | ||
250 | } | ||
251 | |||
239 | #ifdef CONFIG_BLOCK | 252 | #ifdef CONFIG_BLOCK |
240 | /* Returns true if all buffers are successfully locked */ | 253 | /* Returns true if all buffers are successfully locked */ |
241 | static bool buffer_migrate_lock_buffers(struct buffer_head *head, | 254 | static bool buffer_migrate_lock_buffers(struct buffer_head *head, |
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index be04122fb277..6725ff183374 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c | |||
@@ -40,48 +40,44 @@ void __mmu_notifier_release(struct mm_struct *mm) | |||
40 | int id; | 40 | int id; |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * srcu_read_lock() here will block synchronize_srcu() in | 43 | * SRCU here will block mmu_notifier_unregister until |
44 | * mmu_notifier_unregister() until all registered | 44 | * ->release returns. |
45 | * ->release() callouts this function makes have | ||
46 | * returned. | ||
47 | */ | 45 | */ |
48 | id = srcu_read_lock(&srcu); | 46 | id = srcu_read_lock(&srcu); |
47 | hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) | ||
48 | /* | ||
49 | * If ->release runs before mmu_notifier_unregister it must be | ||
50 | * handled, as it's the only way for the driver to flush all | ||
51 | * existing sptes and stop the driver from establishing any more | ||
52 | * sptes before all the pages in the mm are freed. | ||
53 | */ | ||
54 | if (mn->ops->release) | ||
55 | mn->ops->release(mn, mm); | ||
56 | srcu_read_unlock(&srcu, id); | ||
57 | |||
49 | spin_lock(&mm->mmu_notifier_mm->lock); | 58 | spin_lock(&mm->mmu_notifier_mm->lock); |
50 | while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { | 59 | while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { |
51 | mn = hlist_entry(mm->mmu_notifier_mm->list.first, | 60 | mn = hlist_entry(mm->mmu_notifier_mm->list.first, |
52 | struct mmu_notifier, | 61 | struct mmu_notifier, |
53 | hlist); | 62 | hlist); |
54 | |||
55 | /* | 63 | /* |
56 | * Unlink. This will prevent mmu_notifier_unregister() | 64 | * We arrived before mmu_notifier_unregister so |
57 | * from also making the ->release() callout. | 65 | * mmu_notifier_unregister will do nothing other than to wait |
66 | * for ->release to finish and for mmu_notifier_unregister to | ||
67 | * return. | ||
58 | */ | 68 | */ |
59 | hlist_del_init_rcu(&mn->hlist); | 69 | hlist_del_init_rcu(&mn->hlist); |
60 | spin_unlock(&mm->mmu_notifier_mm->lock); | ||
61 | |||
62 | /* | ||
63 | * Clear sptes. (see 'release' description in mmu_notifier.h) | ||
64 | */ | ||
65 | if (mn->ops->release) | ||
66 | mn->ops->release(mn, mm); | ||
67 | |||
68 | spin_lock(&mm->mmu_notifier_mm->lock); | ||
69 | } | 70 | } |
70 | spin_unlock(&mm->mmu_notifier_mm->lock); | 71 | spin_unlock(&mm->mmu_notifier_mm->lock); |
71 | 72 | ||
72 | /* | 73 | /* |
73 | * All callouts to ->release() which we have done are complete. | 74 | * synchronize_srcu here prevents mmu_notifier_release from returning to |
74 | * Allow synchronize_srcu() in mmu_notifier_unregister() to complete | 75 | * exit_mmap (which would proceed with freeing all pages in the mm) |
75 | */ | 76 | * until the ->release method returns, if it was invoked by |
76 | srcu_read_unlock(&srcu, id); | 77 | * mmu_notifier_unregister. |
77 | 78 | * | |
78 | /* | 79 | * The mmu_notifier_mm can't go away from under us because one mm_count |
79 | * mmu_notifier_unregister() may have unlinked a notifier and may | 80 | * is held by exit_mmap. |
80 | * still be calling out to it. Additionally, other notifiers | ||
81 | * may have been active via vmtruncate() et. al. Block here | ||
82 | * to ensure that all notifier callouts for this mm have been | ||
83 | * completed and the sptes are really cleaned up before returning | ||
84 | * to exit_mmap(). | ||
85 | */ | 81 | */ |
86 | synchronize_srcu(&srcu); | 82 | synchronize_srcu(&srcu); |
87 | } | 83 | } |
@@ -292,31 +288,34 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) | |||
292 | { | 288 | { |
293 | BUG_ON(atomic_read(&mm->mm_count) <= 0); | 289 | BUG_ON(atomic_read(&mm->mm_count) <= 0); |
294 | 290 | ||
295 | spin_lock(&mm->mmu_notifier_mm->lock); | ||
296 | if (!hlist_unhashed(&mn->hlist)) { | 291 | if (!hlist_unhashed(&mn->hlist)) { |
292 | /* | ||
293 | * SRCU here will force exit_mmap to wait for ->release to | ||
294 | * finish before freeing the pages. | ||
295 | */ | ||
297 | int id; | 296 | int id; |
298 | 297 | ||
298 | id = srcu_read_lock(&srcu); | ||
299 | /* | 299 | /* |
300 | * Ensure we synchronize up with __mmu_notifier_release(). | 300 | * exit_mmap will block in mmu_notifier_release to guarantee |
301 | * that ->release is called before freeing the pages. | ||
301 | */ | 302 | */ |
302 | id = srcu_read_lock(&srcu); | ||
303 | |||
304 | hlist_del_rcu(&mn->hlist); | ||
305 | spin_unlock(&mm->mmu_notifier_mm->lock); | ||
306 | |||
307 | if (mn->ops->release) | 303 | if (mn->ops->release) |
308 | mn->ops->release(mn, mm); | 304 | mn->ops->release(mn, mm); |
305 | srcu_read_unlock(&srcu, id); | ||
309 | 306 | ||
307 | spin_lock(&mm->mmu_notifier_mm->lock); | ||
310 | /* | 308 | /* |
311 | * Allow __mmu_notifier_release() to complete. | 309 | * Can not use list_del_rcu() since __mmu_notifier_release |
310 | * can delete it before we hold the lock. | ||
312 | */ | 311 | */ |
313 | srcu_read_unlock(&srcu, id); | 312 | hlist_del_init_rcu(&mn->hlist); |
314 | } else | ||
315 | spin_unlock(&mm->mmu_notifier_mm->lock); | 313 | spin_unlock(&mm->mmu_notifier_mm->lock); |
314 | } | ||
316 | 315 | ||
317 | /* | 316 | /* |
318 | * Wait for any running method to finish, including ->release() if it | 317 | * Wait for any running method to finish, of course including |
319 | * was run by __mmu_notifier_release() instead of us. | 318 | * ->release if it was run by mmu_notifier_relase instead of us. |
320 | */ | 319 | */ |
321 | synchronize_srcu(&srcu); | 320 | synchronize_srcu(&srcu); |
322 | 321 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 98cbdf6e5532..c3edb624fccf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1628,6 +1628,7 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
1628 | long min = mark; | 1628 | long min = mark; |
1629 | long lowmem_reserve = z->lowmem_reserve[classzone_idx]; | 1629 | long lowmem_reserve = z->lowmem_reserve[classzone_idx]; |
1630 | int o; | 1630 | int o; |
1631 | long free_cma = 0; | ||
1631 | 1632 | ||
1632 | free_pages -= (1 << order) - 1; | 1633 | free_pages -= (1 << order) - 1; |
1633 | if (alloc_flags & ALLOC_HIGH) | 1634 | if (alloc_flags & ALLOC_HIGH) |
@@ -1637,9 +1638,10 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
1637 | #ifdef CONFIG_CMA | 1638 | #ifdef CONFIG_CMA |
1638 | /* If allocation can't use CMA areas don't use free CMA pages */ | 1639 | /* If allocation can't use CMA areas don't use free CMA pages */ |
1639 | if (!(alloc_flags & ALLOC_CMA)) | 1640 | if (!(alloc_flags & ALLOC_CMA)) |
1640 | free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES); | 1641 | free_cma = zone_page_state(z, NR_FREE_CMA_PAGES); |
1641 | #endif | 1642 | #endif |
1642 | if (free_pages <= min + lowmem_reserve) | 1643 | |
1644 | if (free_pages - free_cma <= min + lowmem_reserve) | ||
1643 | return false; | 1645 | return false; |
1644 | for (o = 0; o < order; o++) { | 1646 | for (o = 0; o < order; o++) { |
1645 | /* At the next order, this order's pages become unavailable */ | 1647 | /* At the next order, this order's pages become unavailable */ |
@@ -5158,7 +5160,7 @@ unsigned long free_reserved_area(unsigned long start, unsigned long end, | |||
5158 | for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { | 5160 | for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { |
5159 | if (poison) | 5161 | if (poison) |
5160 | memset((void *)pos, poison, PAGE_SIZE); | 5162 | memset((void *)pos, poison, PAGE_SIZE); |
5161 | free_reserved_page(virt_to_page(pos)); | 5163 | free_reserved_page(virt_to_page((void *)pos)); |
5162 | } | 5164 | } |
5163 | 5165 | ||
5164 | if (pages && s) | 5166 | if (pages && s) |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 35aa294656cd..5da2cbcfdbb5 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -127,28 +127,7 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, | |||
127 | return 0; | 127 | return 0; |
128 | } | 128 | } |
129 | 129 | ||
130 | static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) | ||
131 | { | ||
132 | struct vm_area_struct *vma; | ||
133 | |||
134 | /* We don't need vma lookup at all. */ | ||
135 | if (!walk->hugetlb_entry) | ||
136 | return NULL; | ||
137 | |||
138 | VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); | ||
139 | vma = find_vma(walk->mm, addr); | ||
140 | if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) | ||
141 | return vma; | ||
142 | |||
143 | return NULL; | ||
144 | } | ||
145 | |||
146 | #else /* CONFIG_HUGETLB_PAGE */ | 130 | #else /* CONFIG_HUGETLB_PAGE */ |
147 | static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) | ||
148 | { | ||
149 | return NULL; | ||
150 | } | ||
151 | |||
152 | static int walk_hugetlb_range(struct vm_area_struct *vma, | 131 | static int walk_hugetlb_range(struct vm_area_struct *vma, |
153 | unsigned long addr, unsigned long end, | 132 | unsigned long addr, unsigned long end, |
154 | struct mm_walk *walk) | 133 | struct mm_walk *walk) |
@@ -198,30 +177,53 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
198 | if (!walk->mm) | 177 | if (!walk->mm) |
199 | return -EINVAL; | 178 | return -EINVAL; |
200 | 179 | ||
180 | VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); | ||
181 | |||
201 | pgd = pgd_offset(walk->mm, addr); | 182 | pgd = pgd_offset(walk->mm, addr); |
202 | do { | 183 | do { |
203 | struct vm_area_struct *vma; | 184 | struct vm_area_struct *vma = NULL; |
204 | 185 | ||
205 | next = pgd_addr_end(addr, end); | 186 | next = pgd_addr_end(addr, end); |
206 | 187 | ||
207 | /* | 188 | /* |
208 | * handle hugetlb vma individually because pagetable walk for | 189 | * This function was not intended to be vma based. |
209 | * the hugetlb page is dependent on the architecture and | 190 | * But there are vma special cases to be handled: |
210 | * we can't handled it in the same manner as non-huge pages. | 191 | * - hugetlb vma's |
192 | * - VM_PFNMAP vma's | ||
211 | */ | 193 | */ |
212 | vma = hugetlb_vma(addr, walk); | 194 | vma = find_vma(walk->mm, addr); |
213 | if (vma) { | 195 | if (vma) { |
214 | if (vma->vm_end < next) | 196 | /* |
197 | * There are no page structures backing a VM_PFNMAP | ||
198 | * range, so do not allow split_huge_page_pmd(). | ||
199 | */ | ||
200 | if ((vma->vm_start <= addr) && | ||
201 | (vma->vm_flags & VM_PFNMAP)) { | ||
215 | next = vma->vm_end; | 202 | next = vma->vm_end; |
203 | pgd = pgd_offset(walk->mm, next); | ||
204 | continue; | ||
205 | } | ||
216 | /* | 206 | /* |
217 | * Hugepage is very tightly coupled with vma, so | 207 | * Handle hugetlb vma individually because pagetable |
218 | * walk through hugetlb entries within a given vma. | 208 | * walk for the hugetlb page is dependent on the |
209 | * architecture and we can't handled it in the same | ||
210 | * manner as non-huge pages. | ||
219 | */ | 211 | */ |
220 | err = walk_hugetlb_range(vma, addr, next, walk); | 212 | if (walk->hugetlb_entry && (vma->vm_start <= addr) && |
221 | if (err) | 213 | is_vm_hugetlb_page(vma)) { |
222 | break; | 214 | if (vma->vm_end < next) |
223 | pgd = pgd_offset(walk->mm, next); | 215 | next = vma->vm_end; |
224 | continue; | 216 | /* |
217 | * Hugepage is very tightly coupled with vma, | ||
218 | * so walk through hugetlb entries within a | ||
219 | * given vma. | ||
220 | */ | ||
221 | err = walk_hugetlb_range(vma, addr, next, walk); | ||
222 | if (err) | ||
223 | break; | ||
224 | pgd = pgd_offset(walk->mm, next); | ||
225 | continue; | ||
226 | } | ||
225 | } | 227 | } |
226 | 228 | ||
227 | if (pgd_none_or_clear_bad(pgd)) { | 229 | if (pgd_none_or_clear_bad(pgd)) { |
diff --git a/mm/slab_common.c b/mm/slab_common.c index ff3218a0f5e1..2d414508e9ec 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -373,8 +373,10 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) | |||
373 | { | 373 | { |
374 | int index; | 374 | int index; |
375 | 375 | ||
376 | if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE)) | 376 | if (size > KMALLOC_MAX_SIZE) { |
377 | WARN_ON_ONCE(!(flags & __GFP_NOWARN)); | ||
377 | return NULL; | 378 | return NULL; |
379 | } | ||
378 | 380 | ||
379 | if (size <= 192) { | 381 | if (size <= 192) { |
380 | if (!size) | 382 | if (!size) |
diff --git a/mm/swap_state.c b/mm/swap_state.c index b3d40dcf3624..f24ab0dff554 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -336,8 +336,24 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, | |||
336 | * Swap entry may have been freed since our caller observed it. | 336 | * Swap entry may have been freed since our caller observed it. |
337 | */ | 337 | */ |
338 | err = swapcache_prepare(entry); | 338 | err = swapcache_prepare(entry); |
339 | if (err == -EEXIST) { /* seems racy */ | 339 | if (err == -EEXIST) { |
340 | radix_tree_preload_end(); | 340 | radix_tree_preload_end(); |
341 | /* | ||
342 | * We might race against get_swap_page() and stumble | ||
343 | * across a SWAP_HAS_CACHE swap_map entry whose page | ||
344 | * has not been brought into the swapcache yet, while | ||
345 | * the other end is scheduled away waiting on discard | ||
346 | * I/O completion at scan_swap_map(). | ||
347 | * | ||
348 | * In order to avoid turning this transitory state | ||
349 | * into a permanent loop around this -EEXIST case | ||
350 | * if !CONFIG_PREEMPT and the I/O completion happens | ||
351 | * to be waiting on the CPU waitqueue where we are now | ||
352 | * busy looping, we just conditionally invoke the | ||
353 | * scheduler here, if there are some more important | ||
354 | * tasks to run. | ||
355 | */ | ||
356 | cond_resched(); | ||
341 | continue; | 357 | continue; |
342 | } | 358 | } |
343 | if (err) { /* swp entry is obsolete ? */ | 359 | if (err) { /* swp entry is obsolete ? */ |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 6c340d908b27..746af55b8455 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -2116,7 +2116,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2116 | } | 2116 | } |
2117 | /* frontswap enabled? set up bit-per-page map for frontswap */ | 2117 | /* frontswap enabled? set up bit-per-page map for frontswap */ |
2118 | if (frontswap_enabled) | 2118 | if (frontswap_enabled) |
2119 | frontswap_map = vzalloc(maxpages / sizeof(long)); | 2119 | frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long)); |
2120 | 2120 | ||
2121 | if (p->bdev) { | 2121 | if (p->bdev) { |
2122 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { | 2122 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { |