diff options
author | H. Peter Anvin <hpa@linux.intel.com> | 2013-06-28 18:26:17 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2013-06-28 18:26:17 -0400 |
commit | 9f84b6267ccde1bebe3f9cd40a91716b5ece5e20 (patch) | |
tree | b51dcf9fb1b7205ed8134ad1169e73719897163d /mm | |
parent | 719038de98bc8479b771c582a1e4a1e86079da22 (diff) | |
parent | 5f8c4218148822fde6eebbeefc34bd0a6061e031 (diff) |
Merge remote-tracking branch 'origin/x86/fpu' into queue/x86/cpu
Use the union of 3.10 x86/cpu and x86/fpu as baseline.
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 7 | ||||
-rw-r--r-- | mm/memcontrol.c | 14 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 9 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mmu_notifier.c | 79 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/pagewalk.c | 70 |
7 files changed, 101 insertions, 82 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 03a89a2f464b..362c329b83fe 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -2325,7 +2325,12 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2325 | pte_unmap(pte); | 2325 | pte_unmap(pte); |
2326 | spin_lock(&mm->page_table_lock); | 2326 | spin_lock(&mm->page_table_lock); |
2327 | BUG_ON(!pmd_none(*pmd)); | 2327 | BUG_ON(!pmd_none(*pmd)); |
2328 | set_pmd_at(mm, address, pmd, _pmd); | 2328 | /* |
2329 | * We can only use set_pmd_at when establishing | ||
2330 | * hugepmds and never for establishing regular pmds that | ||
2331 | * points to regular pagetables. Use pmd_populate for that | ||
2332 | */ | ||
2333 | pmd_populate(mm, pmd, pmd_pgtable(_pmd)); | ||
2329 | spin_unlock(&mm->page_table_lock); | 2334 | spin_unlock(&mm->page_table_lock); |
2330 | anon_vma_unlock_write(vma->anon_vma); | 2335 | anon_vma_unlock_write(vma->anon_vma); |
2331 | goto out; | 2336 | goto out; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cb1c9dedf9b6..010d6c14129a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -4108,8 +4108,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, | |||
4108 | if (mem_cgroup_disabled()) | 4108 | if (mem_cgroup_disabled()) |
4109 | return NULL; | 4109 | return NULL; |
4110 | 4110 | ||
4111 | VM_BUG_ON(PageSwapCache(page)); | ||
4112 | |||
4113 | if (PageTransHuge(page)) { | 4111 | if (PageTransHuge(page)) { |
4114 | nr_pages <<= compound_order(page); | 4112 | nr_pages <<= compound_order(page); |
4115 | VM_BUG_ON(!PageTransHuge(page)); | 4113 | VM_BUG_ON(!PageTransHuge(page)); |
@@ -4205,6 +4203,18 @@ void mem_cgroup_uncharge_page(struct page *page) | |||
4205 | if (page_mapped(page)) | 4203 | if (page_mapped(page)) |
4206 | return; | 4204 | return; |
4207 | VM_BUG_ON(page->mapping && !PageAnon(page)); | 4205 | VM_BUG_ON(page->mapping && !PageAnon(page)); |
4206 | /* | ||
4207 | * If the page is in swap cache, uncharge should be deferred | ||
4208 | * to the swap path, which also properly accounts swap usage | ||
4209 | * and handles memcg lifetime. | ||
4210 | * | ||
4211 | * Note that this check is not stable and reclaim may add the | ||
4212 | * page to swap cache at any time after this. However, if the | ||
4213 | * page is not in swap cache by the time page->mapcount hits | ||
4214 | * 0, there won't be any page table references to the swap | ||
4215 | * slot, and reclaim will free it and not actually write the | ||
4216 | * page to disk. | ||
4217 | */ | ||
4208 | if (PageSwapCache(page)) | 4218 | if (PageSwapCache(page)) |
4209 | return; | 4219 | return; |
4210 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false); | 4220 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false); |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a221fac1f47d..1ad92b46753e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -720,9 +720,12 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, | |||
720 | start = phys_start_pfn << PAGE_SHIFT; | 720 | start = phys_start_pfn << PAGE_SHIFT; |
721 | size = nr_pages * PAGE_SIZE; | 721 | size = nr_pages * PAGE_SIZE; |
722 | ret = release_mem_region_adjustable(&iomem_resource, start, size); | 722 | ret = release_mem_region_adjustable(&iomem_resource, start, size); |
723 | if (ret) | 723 | if (ret) { |
724 | pr_warn("Unable to release resource <%016llx-%016llx> (%d)\n", | 724 | resource_size_t endres = start + size - 1; |
725 | start, start + size - 1, ret); | 725 | |
726 | pr_warn("Unable to release resource <%pa-%pa> (%d)\n", | ||
727 | &start, &endres, ret); | ||
728 | } | ||
726 | 729 | ||
727 | sections_to_remove = nr_pages / PAGES_PER_SECTION; | 730 | sections_to_remove = nr_pages / PAGES_PER_SECTION; |
728 | for (i = 0; i < sections_to_remove; i++) { | 731 | for (i = 0; i < sections_to_remove; i++) { |
diff --git a/mm/migrate.c b/mm/migrate.c index 27ed22579fd9..b1f57501de9c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -165,7 +165,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
165 | pte = arch_make_huge_pte(pte, vma, new, 0); | 165 | pte = arch_make_huge_pte(pte, vma, new, 0); |
166 | } | 166 | } |
167 | #endif | 167 | #endif |
168 | flush_cache_page(vma, addr, pte_pfn(pte)); | 168 | flush_dcache_page(new); |
169 | set_pte_at(mm, addr, ptep, pte); | 169 | set_pte_at(mm, addr, ptep, pte); |
170 | 170 | ||
171 | if (PageHuge(new)) { | 171 | if (PageHuge(new)) { |
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index be04122fb277..6725ff183374 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c | |||
@@ -40,48 +40,44 @@ void __mmu_notifier_release(struct mm_struct *mm) | |||
40 | int id; | 40 | int id; |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * srcu_read_lock() here will block synchronize_srcu() in | 43 | * SRCU here will block mmu_notifier_unregister until |
44 | * mmu_notifier_unregister() until all registered | 44 | * ->release returns. |
45 | * ->release() callouts this function makes have | ||
46 | * returned. | ||
47 | */ | 45 | */ |
48 | id = srcu_read_lock(&srcu); | 46 | id = srcu_read_lock(&srcu); |
47 | hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) | ||
48 | /* | ||
49 | * If ->release runs before mmu_notifier_unregister it must be | ||
50 | * handled, as it's the only way for the driver to flush all | ||
51 | * existing sptes and stop the driver from establishing any more | ||
52 | * sptes before all the pages in the mm are freed. | ||
53 | */ | ||
54 | if (mn->ops->release) | ||
55 | mn->ops->release(mn, mm); | ||
56 | srcu_read_unlock(&srcu, id); | ||
57 | |||
49 | spin_lock(&mm->mmu_notifier_mm->lock); | 58 | spin_lock(&mm->mmu_notifier_mm->lock); |
50 | while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { | 59 | while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { |
51 | mn = hlist_entry(mm->mmu_notifier_mm->list.first, | 60 | mn = hlist_entry(mm->mmu_notifier_mm->list.first, |
52 | struct mmu_notifier, | 61 | struct mmu_notifier, |
53 | hlist); | 62 | hlist); |
54 | |||
55 | /* | 63 | /* |
56 | * Unlink. This will prevent mmu_notifier_unregister() | 64 | * We arrived before mmu_notifier_unregister so |
57 | * from also making the ->release() callout. | 65 | * mmu_notifier_unregister will do nothing other than to wait |
66 | * for ->release to finish and for mmu_notifier_unregister to | ||
67 | * return. | ||
58 | */ | 68 | */ |
59 | hlist_del_init_rcu(&mn->hlist); | 69 | hlist_del_init_rcu(&mn->hlist); |
60 | spin_unlock(&mm->mmu_notifier_mm->lock); | ||
61 | |||
62 | /* | ||
63 | * Clear sptes. (see 'release' description in mmu_notifier.h) | ||
64 | */ | ||
65 | if (mn->ops->release) | ||
66 | mn->ops->release(mn, mm); | ||
67 | |||
68 | spin_lock(&mm->mmu_notifier_mm->lock); | ||
69 | } | 70 | } |
70 | spin_unlock(&mm->mmu_notifier_mm->lock); | 71 | spin_unlock(&mm->mmu_notifier_mm->lock); |
71 | 72 | ||
72 | /* | 73 | /* |
73 | * All callouts to ->release() which we have done are complete. | 74 | * synchronize_srcu here prevents mmu_notifier_release from returning to |
74 | * Allow synchronize_srcu() in mmu_notifier_unregister() to complete | 75 | * exit_mmap (which would proceed with freeing all pages in the mm) |
75 | */ | 76 | * until the ->release method returns, if it was invoked by |
76 | srcu_read_unlock(&srcu, id); | 77 | * mmu_notifier_unregister. |
77 | 78 | * | |
78 | /* | 79 | * The mmu_notifier_mm can't go away from under us because one mm_count |
79 | * mmu_notifier_unregister() may have unlinked a notifier and may | 80 | * is held by exit_mmap. |
80 | * still be calling out to it. Additionally, other notifiers | ||
81 | * may have been active via vmtruncate() et. al. Block here | ||
82 | * to ensure that all notifier callouts for this mm have been | ||
83 | * completed and the sptes are really cleaned up before returning | ||
84 | * to exit_mmap(). | ||
85 | */ | 81 | */ |
86 | synchronize_srcu(&srcu); | 82 | synchronize_srcu(&srcu); |
87 | } | 83 | } |
@@ -292,31 +288,34 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) | |||
292 | { | 288 | { |
293 | BUG_ON(atomic_read(&mm->mm_count) <= 0); | 289 | BUG_ON(atomic_read(&mm->mm_count) <= 0); |
294 | 290 | ||
295 | spin_lock(&mm->mmu_notifier_mm->lock); | ||
296 | if (!hlist_unhashed(&mn->hlist)) { | 291 | if (!hlist_unhashed(&mn->hlist)) { |
292 | /* | ||
293 | * SRCU here will force exit_mmap to wait for ->release to | ||
294 | * finish before freeing the pages. | ||
295 | */ | ||
297 | int id; | 296 | int id; |
298 | 297 | ||
298 | id = srcu_read_lock(&srcu); | ||
299 | /* | 299 | /* |
300 | * Ensure we synchronize up with __mmu_notifier_release(). | 300 | * exit_mmap will block in mmu_notifier_release to guarantee |
301 | * that ->release is called before freeing the pages. | ||
301 | */ | 302 | */ |
302 | id = srcu_read_lock(&srcu); | ||
303 | |||
304 | hlist_del_rcu(&mn->hlist); | ||
305 | spin_unlock(&mm->mmu_notifier_mm->lock); | ||
306 | |||
307 | if (mn->ops->release) | 303 | if (mn->ops->release) |
308 | mn->ops->release(mn, mm); | 304 | mn->ops->release(mn, mm); |
305 | srcu_read_unlock(&srcu, id); | ||
309 | 306 | ||
307 | spin_lock(&mm->mmu_notifier_mm->lock); | ||
310 | /* | 308 | /* |
311 | * Allow __mmu_notifier_release() to complete. | 309 | * Can not use list_del_rcu() since __mmu_notifier_release |
310 | * can delete it before we hold the lock. | ||
312 | */ | 311 | */ |
313 | srcu_read_unlock(&srcu, id); | 312 | hlist_del_init_rcu(&mn->hlist); |
314 | } else | ||
315 | spin_unlock(&mm->mmu_notifier_mm->lock); | 313 | spin_unlock(&mm->mmu_notifier_mm->lock); |
314 | } | ||
316 | 315 | ||
317 | /* | 316 | /* |
318 | * Wait for any running method to finish, including ->release() if it | 317 | * Wait for any running method to finish, of course including |
319 | * was run by __mmu_notifier_release() instead of us. | 318 | * ->release if it was run by mmu_notifier_relase instead of us. |
320 | */ | 319 | */ |
321 | synchronize_srcu(&srcu); | 320 | synchronize_srcu(&srcu); |
322 | 321 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 98cbdf6e5532..378a15bcd649 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -5158,7 +5158,7 @@ unsigned long free_reserved_area(unsigned long start, unsigned long end, | |||
5158 | for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { | 5158 | for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { |
5159 | if (poison) | 5159 | if (poison) |
5160 | memset((void *)pos, poison, PAGE_SIZE); | 5160 | memset((void *)pos, poison, PAGE_SIZE); |
5161 | free_reserved_page(virt_to_page(pos)); | 5161 | free_reserved_page(virt_to_page((void *)pos)); |
5162 | } | 5162 | } |
5163 | 5163 | ||
5164 | if (pages && s) | 5164 | if (pages && s) |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 35aa294656cd..5da2cbcfdbb5 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -127,28 +127,7 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, | |||
127 | return 0; | 127 | return 0; |
128 | } | 128 | } |
129 | 129 | ||
130 | static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) | ||
131 | { | ||
132 | struct vm_area_struct *vma; | ||
133 | |||
134 | /* We don't need vma lookup at all. */ | ||
135 | if (!walk->hugetlb_entry) | ||
136 | return NULL; | ||
137 | |||
138 | VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); | ||
139 | vma = find_vma(walk->mm, addr); | ||
140 | if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) | ||
141 | return vma; | ||
142 | |||
143 | return NULL; | ||
144 | } | ||
145 | |||
146 | #else /* CONFIG_HUGETLB_PAGE */ | 130 | #else /* CONFIG_HUGETLB_PAGE */ |
147 | static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) | ||
148 | { | ||
149 | return NULL; | ||
150 | } | ||
151 | |||
152 | static int walk_hugetlb_range(struct vm_area_struct *vma, | 131 | static int walk_hugetlb_range(struct vm_area_struct *vma, |
153 | unsigned long addr, unsigned long end, | 132 | unsigned long addr, unsigned long end, |
154 | struct mm_walk *walk) | 133 | struct mm_walk *walk) |
@@ -198,30 +177,53 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
198 | if (!walk->mm) | 177 | if (!walk->mm) |
199 | return -EINVAL; | 178 | return -EINVAL; |
200 | 179 | ||
180 | VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); | ||
181 | |||
201 | pgd = pgd_offset(walk->mm, addr); | 182 | pgd = pgd_offset(walk->mm, addr); |
202 | do { | 183 | do { |
203 | struct vm_area_struct *vma; | 184 | struct vm_area_struct *vma = NULL; |
204 | 185 | ||
205 | next = pgd_addr_end(addr, end); | 186 | next = pgd_addr_end(addr, end); |
206 | 187 | ||
207 | /* | 188 | /* |
208 | * handle hugetlb vma individually because pagetable walk for | 189 | * This function was not intended to be vma based. |
209 | * the hugetlb page is dependent on the architecture and | 190 | * But there are vma special cases to be handled: |
210 | * we can't handled it in the same manner as non-huge pages. | 191 | * - hugetlb vma's |
192 | * - VM_PFNMAP vma's | ||
211 | */ | 193 | */ |
212 | vma = hugetlb_vma(addr, walk); | 194 | vma = find_vma(walk->mm, addr); |
213 | if (vma) { | 195 | if (vma) { |
214 | if (vma->vm_end < next) | 196 | /* |
197 | * There are no page structures backing a VM_PFNMAP | ||
198 | * range, so do not allow split_huge_page_pmd(). | ||
199 | */ | ||
200 | if ((vma->vm_start <= addr) && | ||
201 | (vma->vm_flags & VM_PFNMAP)) { | ||
215 | next = vma->vm_end; | 202 | next = vma->vm_end; |
203 | pgd = pgd_offset(walk->mm, next); | ||
204 | continue; | ||
205 | } | ||
216 | /* | 206 | /* |
217 | * Hugepage is very tightly coupled with vma, so | 207 | * Handle hugetlb vma individually because pagetable |
218 | * walk through hugetlb entries within a given vma. | 208 | * walk for the hugetlb page is dependent on the |
209 | * architecture and we can't handled it in the same | ||
210 | * manner as non-huge pages. | ||
219 | */ | 211 | */ |
220 | err = walk_hugetlb_range(vma, addr, next, walk); | 212 | if (walk->hugetlb_entry && (vma->vm_start <= addr) && |
221 | if (err) | 213 | is_vm_hugetlb_page(vma)) { |
222 | break; | 214 | if (vma->vm_end < next) |
223 | pgd = pgd_offset(walk->mm, next); | 215 | next = vma->vm_end; |
224 | continue; | 216 | /* |
217 | * Hugepage is very tightly coupled with vma, | ||
218 | * so walk through hugetlb entries within a | ||
219 | * given vma. | ||
220 | */ | ||
221 | err = walk_hugetlb_range(vma, addr, next, walk); | ||
222 | if (err) | ||
223 | break; | ||
224 | pgd = pgd_offset(walk->mm, next); | ||
225 | continue; | ||
226 | } | ||
225 | } | 227 | } |
226 | 228 | ||
227 | if (pgd_none_or_clear_bad(pgd)) { | 229 | if (pgd_none_or_clear_bad(pgd)) { |