diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/huge_memory.c | 7 | ||||
| -rw-r--r-- | mm/memcontrol.c | 14 | ||||
| -rw-r--r-- | mm/memory_hotplug.c | 9 | ||||
| -rw-r--r-- | mm/migrate.c | 2 | ||||
| -rw-r--r-- | mm/mmu_notifier.c | 79 | ||||
| -rw-r--r-- | mm/page_alloc.c | 2 | ||||
| -rw-r--r-- | mm/pagewalk.c | 70 |
7 files changed, 101 insertions, 82 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 03a89a2f464b..362c329b83fe 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -2325,7 +2325,12 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
| 2325 | pte_unmap(pte); | 2325 | pte_unmap(pte); |
| 2326 | spin_lock(&mm->page_table_lock); | 2326 | spin_lock(&mm->page_table_lock); |
| 2327 | BUG_ON(!pmd_none(*pmd)); | 2327 | BUG_ON(!pmd_none(*pmd)); |
| 2328 | set_pmd_at(mm, address, pmd, _pmd); | 2328 | /* |
| 2329 | * We can only use set_pmd_at when establishing | ||
| 2330 | * hugepmds and never for establishing regular pmds that | ||
| 2331 | * points to regular pagetables. Use pmd_populate for that | ||
| 2332 | */ | ||
| 2333 | pmd_populate(mm, pmd, pmd_pgtable(_pmd)); | ||
| 2329 | spin_unlock(&mm->page_table_lock); | 2334 | spin_unlock(&mm->page_table_lock); |
| 2330 | anon_vma_unlock_write(vma->anon_vma); | 2335 | anon_vma_unlock_write(vma->anon_vma); |
| 2331 | goto out; | 2336 | goto out; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cb1c9dedf9b6..010d6c14129a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -4108,8 +4108,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, | |||
| 4108 | if (mem_cgroup_disabled()) | 4108 | if (mem_cgroup_disabled()) |
| 4109 | return NULL; | 4109 | return NULL; |
| 4110 | 4110 | ||
| 4111 | VM_BUG_ON(PageSwapCache(page)); | ||
| 4112 | |||
| 4113 | if (PageTransHuge(page)) { | 4111 | if (PageTransHuge(page)) { |
| 4114 | nr_pages <<= compound_order(page); | 4112 | nr_pages <<= compound_order(page); |
| 4115 | VM_BUG_ON(!PageTransHuge(page)); | 4113 | VM_BUG_ON(!PageTransHuge(page)); |
| @@ -4205,6 +4203,18 @@ void mem_cgroup_uncharge_page(struct page *page) | |||
| 4205 | if (page_mapped(page)) | 4203 | if (page_mapped(page)) |
| 4206 | return; | 4204 | return; |
| 4207 | VM_BUG_ON(page->mapping && !PageAnon(page)); | 4205 | VM_BUG_ON(page->mapping && !PageAnon(page)); |
| 4206 | /* | ||
| 4207 | * If the page is in swap cache, uncharge should be deferred | ||
| 4208 | * to the swap path, which also properly accounts swap usage | ||
| 4209 | * and handles memcg lifetime. | ||
| 4210 | * | ||
| 4211 | * Note that this check is not stable and reclaim may add the | ||
| 4212 | * page to swap cache at any time after this. However, if the | ||
| 4213 | * page is not in swap cache by the time page->mapcount hits | ||
| 4214 | * 0, there won't be any page table references to the swap | ||
| 4215 | * slot, and reclaim will free it and not actually write the | ||
| 4216 | * page to disk. | ||
| 4217 | */ | ||
| 4208 | if (PageSwapCache(page)) | 4218 | if (PageSwapCache(page)) |
| 4209 | return; | 4219 | return; |
| 4210 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false); | 4220 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false); |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a221fac1f47d..1ad92b46753e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
| @@ -720,9 +720,12 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, | |||
| 720 | start = phys_start_pfn << PAGE_SHIFT; | 720 | start = phys_start_pfn << PAGE_SHIFT; |
| 721 | size = nr_pages * PAGE_SIZE; | 721 | size = nr_pages * PAGE_SIZE; |
| 722 | ret = release_mem_region_adjustable(&iomem_resource, start, size); | 722 | ret = release_mem_region_adjustable(&iomem_resource, start, size); |
| 723 | if (ret) | 723 | if (ret) { |
| 724 | pr_warn("Unable to release resource <%016llx-%016llx> (%d)\n", | 724 | resource_size_t endres = start + size - 1; |
| 725 | start, start + size - 1, ret); | 725 | |
| 726 | pr_warn("Unable to release resource <%pa-%pa> (%d)\n", | ||
| 727 | &start, &endres, ret); | ||
| 728 | } | ||
| 726 | 729 | ||
| 727 | sections_to_remove = nr_pages / PAGES_PER_SECTION; | 730 | sections_to_remove = nr_pages / PAGES_PER_SECTION; |
| 728 | for (i = 0; i < sections_to_remove; i++) { | 731 | for (i = 0; i < sections_to_remove; i++) { |
diff --git a/mm/migrate.c b/mm/migrate.c index 27ed22579fd9..b1f57501de9c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -165,7 +165,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
| 165 | pte = arch_make_huge_pte(pte, vma, new, 0); | 165 | pte = arch_make_huge_pte(pte, vma, new, 0); |
| 166 | } | 166 | } |
| 167 | #endif | 167 | #endif |
| 168 | flush_cache_page(vma, addr, pte_pfn(pte)); | 168 | flush_dcache_page(new); |
| 169 | set_pte_at(mm, addr, ptep, pte); | 169 | set_pte_at(mm, addr, ptep, pte); |
| 170 | 170 | ||
| 171 | if (PageHuge(new)) { | 171 | if (PageHuge(new)) { |
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index be04122fb277..6725ff183374 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c | |||
| @@ -40,48 +40,44 @@ void __mmu_notifier_release(struct mm_struct *mm) | |||
| 40 | int id; | 40 | int id; |
| 41 | 41 | ||
| 42 | /* | 42 | /* |
| 43 | * srcu_read_lock() here will block synchronize_srcu() in | 43 | * SRCU here will block mmu_notifier_unregister until |
| 44 | * mmu_notifier_unregister() until all registered | 44 | * ->release returns. |
| 45 | * ->release() callouts this function makes have | ||
| 46 | * returned. | ||
| 47 | */ | 45 | */ |
| 48 | id = srcu_read_lock(&srcu); | 46 | id = srcu_read_lock(&srcu); |
| 47 | hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) | ||
| 48 | /* | ||
| 49 | * If ->release runs before mmu_notifier_unregister it must be | ||
| 50 | * handled, as it's the only way for the driver to flush all | ||
| 51 | * existing sptes and stop the driver from establishing any more | ||
| 52 | * sptes before all the pages in the mm are freed. | ||
| 53 | */ | ||
| 54 | if (mn->ops->release) | ||
| 55 | mn->ops->release(mn, mm); | ||
| 56 | srcu_read_unlock(&srcu, id); | ||
| 57 | |||
| 49 | spin_lock(&mm->mmu_notifier_mm->lock); | 58 | spin_lock(&mm->mmu_notifier_mm->lock); |
| 50 | while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { | 59 | while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { |
| 51 | mn = hlist_entry(mm->mmu_notifier_mm->list.first, | 60 | mn = hlist_entry(mm->mmu_notifier_mm->list.first, |
| 52 | struct mmu_notifier, | 61 | struct mmu_notifier, |
| 53 | hlist); | 62 | hlist); |
| 54 | |||
| 55 | /* | 63 | /* |
| 56 | * Unlink. This will prevent mmu_notifier_unregister() | 64 | * We arrived before mmu_notifier_unregister so |
| 57 | * from also making the ->release() callout. | 65 | * mmu_notifier_unregister will do nothing other than to wait |
| 66 | * for ->release to finish and for mmu_notifier_unregister to | ||
| 67 | * return. | ||
| 58 | */ | 68 | */ |
| 59 | hlist_del_init_rcu(&mn->hlist); | 69 | hlist_del_init_rcu(&mn->hlist); |
| 60 | spin_unlock(&mm->mmu_notifier_mm->lock); | ||
| 61 | |||
| 62 | /* | ||
| 63 | * Clear sptes. (see 'release' description in mmu_notifier.h) | ||
| 64 | */ | ||
| 65 | if (mn->ops->release) | ||
| 66 | mn->ops->release(mn, mm); | ||
| 67 | |||
| 68 | spin_lock(&mm->mmu_notifier_mm->lock); | ||
| 69 | } | 70 | } |
| 70 | spin_unlock(&mm->mmu_notifier_mm->lock); | 71 | spin_unlock(&mm->mmu_notifier_mm->lock); |
| 71 | 72 | ||
| 72 | /* | 73 | /* |
| 73 | * All callouts to ->release() which we have done are complete. | 74 | * synchronize_srcu here prevents mmu_notifier_release from returning to |
| 74 | * Allow synchronize_srcu() in mmu_notifier_unregister() to complete | 75 | * exit_mmap (which would proceed with freeing all pages in the mm) |
| 75 | */ | 76 | * until the ->release method returns, if it was invoked by |
| 76 | srcu_read_unlock(&srcu, id); | 77 | * mmu_notifier_unregister. |
| 77 | 78 | * | |
| 78 | /* | 79 | * The mmu_notifier_mm can't go away from under us because one mm_count |
| 79 | * mmu_notifier_unregister() may have unlinked a notifier and may | 80 | * is held by exit_mmap. |
| 80 | * still be calling out to it. Additionally, other notifiers | ||
| 81 | * may have been active via vmtruncate() et. al. Block here | ||
| 82 | * to ensure that all notifier callouts for this mm have been | ||
| 83 | * completed and the sptes are really cleaned up before returning | ||
| 84 | * to exit_mmap(). | ||
| 85 | */ | 81 | */ |
| 86 | synchronize_srcu(&srcu); | 82 | synchronize_srcu(&srcu); |
| 87 | } | 83 | } |
| @@ -292,31 +288,34 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) | |||
| 292 | { | 288 | { |
| 293 | BUG_ON(atomic_read(&mm->mm_count) <= 0); | 289 | BUG_ON(atomic_read(&mm->mm_count) <= 0); |
| 294 | 290 | ||
| 295 | spin_lock(&mm->mmu_notifier_mm->lock); | ||
| 296 | if (!hlist_unhashed(&mn->hlist)) { | 291 | if (!hlist_unhashed(&mn->hlist)) { |
| 292 | /* | ||
| 293 | * SRCU here will force exit_mmap to wait for ->release to | ||
| 294 | * finish before freeing the pages. | ||
| 295 | */ | ||
| 297 | int id; | 296 | int id; |
| 298 | 297 | ||
| 298 | id = srcu_read_lock(&srcu); | ||
| 299 | /* | 299 | /* |
| 300 | * Ensure we synchronize up with __mmu_notifier_release(). | 300 | * exit_mmap will block in mmu_notifier_release to guarantee |
| 301 | * that ->release is called before freeing the pages. | ||
| 301 | */ | 302 | */ |
| 302 | id = srcu_read_lock(&srcu); | ||
| 303 | |||
| 304 | hlist_del_rcu(&mn->hlist); | ||
| 305 | spin_unlock(&mm->mmu_notifier_mm->lock); | ||
| 306 | |||
| 307 | if (mn->ops->release) | 303 | if (mn->ops->release) |
| 308 | mn->ops->release(mn, mm); | 304 | mn->ops->release(mn, mm); |
| 305 | srcu_read_unlock(&srcu, id); | ||
| 309 | 306 | ||
| 307 | spin_lock(&mm->mmu_notifier_mm->lock); | ||
| 310 | /* | 308 | /* |
| 311 | * Allow __mmu_notifier_release() to complete. | 309 | * Can not use list_del_rcu() since __mmu_notifier_release |
| 310 | * can delete it before we hold the lock. | ||
| 312 | */ | 311 | */ |
| 313 | srcu_read_unlock(&srcu, id); | 312 | hlist_del_init_rcu(&mn->hlist); |
| 314 | } else | ||
| 315 | spin_unlock(&mm->mmu_notifier_mm->lock); | 313 | spin_unlock(&mm->mmu_notifier_mm->lock); |
| 314 | } | ||
| 316 | 315 | ||
| 317 | /* | 316 | /* |
| 318 | * Wait for any running method to finish, including ->release() if it | 317 | * Wait for any running method to finish, of course including |
| 319 | * was run by __mmu_notifier_release() instead of us. | 318 | * ->release if it was run by mmu_notifier_relase instead of us. |
| 320 | */ | 319 | */ |
| 321 | synchronize_srcu(&srcu); | 320 | synchronize_srcu(&srcu); |
| 322 | 321 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 98cbdf6e5532..378a15bcd649 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -5158,7 +5158,7 @@ unsigned long free_reserved_area(unsigned long start, unsigned long end, | |||
| 5158 | for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { | 5158 | for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { |
| 5159 | if (poison) | 5159 | if (poison) |
| 5160 | memset((void *)pos, poison, PAGE_SIZE); | 5160 | memset((void *)pos, poison, PAGE_SIZE); |
| 5161 | free_reserved_page(virt_to_page(pos)); | 5161 | free_reserved_page(virt_to_page((void *)pos)); |
| 5162 | } | 5162 | } |
| 5163 | 5163 | ||
| 5164 | if (pages && s) | 5164 | if (pages && s) |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 35aa294656cd..5da2cbcfdbb5 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
| @@ -127,28 +127,7 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, | |||
| 127 | return 0; | 127 | return 0; |
| 128 | } | 128 | } |
| 129 | 129 | ||
| 130 | static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) | ||
| 131 | { | ||
| 132 | struct vm_area_struct *vma; | ||
| 133 | |||
| 134 | /* We don't need vma lookup at all. */ | ||
| 135 | if (!walk->hugetlb_entry) | ||
| 136 | return NULL; | ||
| 137 | |||
| 138 | VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); | ||
| 139 | vma = find_vma(walk->mm, addr); | ||
| 140 | if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) | ||
| 141 | return vma; | ||
| 142 | |||
| 143 | return NULL; | ||
| 144 | } | ||
| 145 | |||
| 146 | #else /* CONFIG_HUGETLB_PAGE */ | 130 | #else /* CONFIG_HUGETLB_PAGE */ |
| 147 | static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) | ||
| 148 | { | ||
| 149 | return NULL; | ||
| 150 | } | ||
| 151 | |||
| 152 | static int walk_hugetlb_range(struct vm_area_struct *vma, | 131 | static int walk_hugetlb_range(struct vm_area_struct *vma, |
| 153 | unsigned long addr, unsigned long end, | 132 | unsigned long addr, unsigned long end, |
| 154 | struct mm_walk *walk) | 133 | struct mm_walk *walk) |
| @@ -198,30 +177,53 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
| 198 | if (!walk->mm) | 177 | if (!walk->mm) |
| 199 | return -EINVAL; | 178 | return -EINVAL; |
| 200 | 179 | ||
| 180 | VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); | ||
| 181 | |||
| 201 | pgd = pgd_offset(walk->mm, addr); | 182 | pgd = pgd_offset(walk->mm, addr); |
| 202 | do { | 183 | do { |
| 203 | struct vm_area_struct *vma; | 184 | struct vm_area_struct *vma = NULL; |
| 204 | 185 | ||
| 205 | next = pgd_addr_end(addr, end); | 186 | next = pgd_addr_end(addr, end); |
| 206 | 187 | ||
| 207 | /* | 188 | /* |
| 208 | * handle hugetlb vma individually because pagetable walk for | 189 | * This function was not intended to be vma based. |
| 209 | * the hugetlb page is dependent on the architecture and | 190 | * But there are vma special cases to be handled: |
| 210 | * we can't handled it in the same manner as non-huge pages. | 191 | * - hugetlb vma's |
| 192 | * - VM_PFNMAP vma's | ||
| 211 | */ | 193 | */ |
| 212 | vma = hugetlb_vma(addr, walk); | 194 | vma = find_vma(walk->mm, addr); |
| 213 | if (vma) { | 195 | if (vma) { |
| 214 | if (vma->vm_end < next) | 196 | /* |
| 197 | * There are no page structures backing a VM_PFNMAP | ||
| 198 | * range, so do not allow split_huge_page_pmd(). | ||
| 199 | */ | ||
| 200 | if ((vma->vm_start <= addr) && | ||
| 201 | (vma->vm_flags & VM_PFNMAP)) { | ||
| 215 | next = vma->vm_end; | 202 | next = vma->vm_end; |
| 203 | pgd = pgd_offset(walk->mm, next); | ||
| 204 | continue; | ||
| 205 | } | ||
| 216 | /* | 206 | /* |
| 217 | * Hugepage is very tightly coupled with vma, so | 207 | * Handle hugetlb vma individually because pagetable |
| 218 | * walk through hugetlb entries within a given vma. | 208 | * walk for the hugetlb page is dependent on the |
| 209 | * architecture and we can't handled it in the same | ||
| 210 | * manner as non-huge pages. | ||
| 219 | */ | 211 | */ |
| 220 | err = walk_hugetlb_range(vma, addr, next, walk); | 212 | if (walk->hugetlb_entry && (vma->vm_start <= addr) && |
| 221 | if (err) | 213 | is_vm_hugetlb_page(vma)) { |
| 222 | break; | 214 | if (vma->vm_end < next) |
| 223 | pgd = pgd_offset(walk->mm, next); | 215 | next = vma->vm_end; |
| 224 | continue; | 216 | /* |
| 217 | * Hugepage is very tightly coupled with vma, | ||
| 218 | * so walk through hugetlb entries within a | ||
| 219 | * given vma. | ||
| 220 | */ | ||
| 221 | err = walk_hugetlb_range(vma, addr, next, walk); | ||
| 222 | if (err) | ||
| 223 | break; | ||
| 224 | pgd = pgd_offset(walk->mm, next); | ||
| 225 | continue; | ||
| 226 | } | ||
| 225 | } | 227 | } |
| 226 | 228 | ||
| 227 | if (pgd_none_or_clear_bad(pgd)) { | 229 | if (pgd_none_or_clear_bad(pgd)) { |
