aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2013-06-28 18:26:17 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2013-06-28 18:26:17 -0400
commit9f84b6267ccde1bebe3f9cd40a91716b5ece5e20 (patch)
treeb51dcf9fb1b7205ed8134ad1169e73719897163d /mm
parent719038de98bc8479b771c582a1e4a1e86079da22 (diff)
parent5f8c4218148822fde6eebbeefc34bd0a6061e031 (diff)
Merge remote-tracking branch 'origin/x86/fpu' into queue/x86/cpu
Use the union of 3.10 x86/cpu and x86/fpu as baseline. Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c7
-rw-r--r--mm/memcontrol.c14
-rw-r--r--mm/memory_hotplug.c9
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mmu_notifier.c79
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/pagewalk.c70
7 files changed, 101 insertions, 82 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 03a89a2f464b..362c329b83fe 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2325,7 +2325,12 @@ static void collapse_huge_page(struct mm_struct *mm,
2325 pte_unmap(pte); 2325 pte_unmap(pte);
2326 spin_lock(&mm->page_table_lock); 2326 spin_lock(&mm->page_table_lock);
2327 BUG_ON(!pmd_none(*pmd)); 2327 BUG_ON(!pmd_none(*pmd));
2328 set_pmd_at(mm, address, pmd, _pmd); 2328 /*
2329 * We can only use set_pmd_at when establishing
2330 * hugepmds and never for establishing regular pmds that
2331 * points to regular pagetables. Use pmd_populate for that
2332 */
2333 pmd_populate(mm, pmd, pmd_pgtable(_pmd));
2329 spin_unlock(&mm->page_table_lock); 2334 spin_unlock(&mm->page_table_lock);
2330 anon_vma_unlock_write(vma->anon_vma); 2335 anon_vma_unlock_write(vma->anon_vma);
2331 goto out; 2336 goto out;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cb1c9dedf9b6..010d6c14129a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4108,8 +4108,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
4108 if (mem_cgroup_disabled()) 4108 if (mem_cgroup_disabled())
4109 return NULL; 4109 return NULL;
4110 4110
4111 VM_BUG_ON(PageSwapCache(page));
4112
4113 if (PageTransHuge(page)) { 4111 if (PageTransHuge(page)) {
4114 nr_pages <<= compound_order(page); 4112 nr_pages <<= compound_order(page);
4115 VM_BUG_ON(!PageTransHuge(page)); 4113 VM_BUG_ON(!PageTransHuge(page));
@@ -4205,6 +4203,18 @@ void mem_cgroup_uncharge_page(struct page *page)
4205 if (page_mapped(page)) 4203 if (page_mapped(page))
4206 return; 4204 return;
4207 VM_BUG_ON(page->mapping && !PageAnon(page)); 4205 VM_BUG_ON(page->mapping && !PageAnon(page));
4206 /*
4207 * If the page is in swap cache, uncharge should be deferred
4208 * to the swap path, which also properly accounts swap usage
4209 * and handles memcg lifetime.
4210 *
4211 * Note that this check is not stable and reclaim may add the
4212 * page to swap cache at any time after this. However, if the
4213 * page is not in swap cache by the time page->mapcount hits
4214 * 0, there won't be any page table references to the swap
4215 * slot, and reclaim will free it and not actually write the
4216 * page to disk.
4217 */
4208 if (PageSwapCache(page)) 4218 if (PageSwapCache(page))
4209 return; 4219 return;
4210 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false); 4220 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a221fac1f47d..1ad92b46753e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -720,9 +720,12 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
720 start = phys_start_pfn << PAGE_SHIFT; 720 start = phys_start_pfn << PAGE_SHIFT;
721 size = nr_pages * PAGE_SIZE; 721 size = nr_pages * PAGE_SIZE;
722 ret = release_mem_region_adjustable(&iomem_resource, start, size); 722 ret = release_mem_region_adjustable(&iomem_resource, start, size);
723 if (ret) 723 if (ret) {
724 pr_warn("Unable to release resource <%016llx-%016llx> (%d)\n", 724 resource_size_t endres = start + size - 1;
725 start, start + size - 1, ret); 725
726 pr_warn("Unable to release resource <%pa-%pa> (%d)\n",
727 &start, &endres, ret);
728 }
726 729
727 sections_to_remove = nr_pages / PAGES_PER_SECTION; 730 sections_to_remove = nr_pages / PAGES_PER_SECTION;
728 for (i = 0; i < sections_to_remove; i++) { 731 for (i = 0; i < sections_to_remove; i++) {
diff --git a/mm/migrate.c b/mm/migrate.c
index 27ed22579fd9..b1f57501de9c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -165,7 +165,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
165 pte = arch_make_huge_pte(pte, vma, new, 0); 165 pte = arch_make_huge_pte(pte, vma, new, 0);
166 } 166 }
167#endif 167#endif
168 flush_cache_page(vma, addr, pte_pfn(pte)); 168 flush_dcache_page(new);
169 set_pte_at(mm, addr, ptep, pte); 169 set_pte_at(mm, addr, ptep, pte);
170 170
171 if (PageHuge(new)) { 171 if (PageHuge(new)) {
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index be04122fb277..6725ff183374 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -40,48 +40,44 @@ void __mmu_notifier_release(struct mm_struct *mm)
40 int id; 40 int id;
41 41
42 /* 42 /*
43 * srcu_read_lock() here will block synchronize_srcu() in 43 * SRCU here will block mmu_notifier_unregister until
44 * mmu_notifier_unregister() until all registered 44 * ->release returns.
45 * ->release() callouts this function makes have
46 * returned.
47 */ 45 */
48 id = srcu_read_lock(&srcu); 46 id = srcu_read_lock(&srcu);
47 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist)
48 /*
49 * If ->release runs before mmu_notifier_unregister it must be
50 * handled, as it's the only way for the driver to flush all
51 * existing sptes and stop the driver from establishing any more
52 * sptes before all the pages in the mm are freed.
53 */
54 if (mn->ops->release)
55 mn->ops->release(mn, mm);
56 srcu_read_unlock(&srcu, id);
57
49 spin_lock(&mm->mmu_notifier_mm->lock); 58 spin_lock(&mm->mmu_notifier_mm->lock);
50 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { 59 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
51 mn = hlist_entry(mm->mmu_notifier_mm->list.first, 60 mn = hlist_entry(mm->mmu_notifier_mm->list.first,
52 struct mmu_notifier, 61 struct mmu_notifier,
53 hlist); 62 hlist);
54
55 /* 63 /*
56 * Unlink. This will prevent mmu_notifier_unregister() 64 * We arrived before mmu_notifier_unregister so
57 * from also making the ->release() callout. 65 * mmu_notifier_unregister will do nothing other than to wait
66 * for ->release to finish and for mmu_notifier_unregister to
67 * return.
58 */ 68 */
59 hlist_del_init_rcu(&mn->hlist); 69 hlist_del_init_rcu(&mn->hlist);
60 spin_unlock(&mm->mmu_notifier_mm->lock);
61
62 /*
63 * Clear sptes. (see 'release' description in mmu_notifier.h)
64 */
65 if (mn->ops->release)
66 mn->ops->release(mn, mm);
67
68 spin_lock(&mm->mmu_notifier_mm->lock);
69 } 70 }
70 spin_unlock(&mm->mmu_notifier_mm->lock); 71 spin_unlock(&mm->mmu_notifier_mm->lock);
71 72
72 /* 73 /*
73 * All callouts to ->release() which we have done are complete. 74 * synchronize_srcu here prevents mmu_notifier_release from returning to
74 * Allow synchronize_srcu() in mmu_notifier_unregister() to complete 75 * exit_mmap (which would proceed with freeing all pages in the mm)
75 */ 76 * until the ->release method returns, if it was invoked by
76 srcu_read_unlock(&srcu, id); 77 * mmu_notifier_unregister.
77 78 *
78 /* 79 * The mmu_notifier_mm can't go away from under us because one mm_count
79 * mmu_notifier_unregister() may have unlinked a notifier and may 80 * is held by exit_mmap.
80 * still be calling out to it. Additionally, other notifiers
81 * may have been active via vmtruncate() et. al. Block here
82 * to ensure that all notifier callouts for this mm have been
83 * completed and the sptes are really cleaned up before returning
84 * to exit_mmap().
85 */ 81 */
86 synchronize_srcu(&srcu); 82 synchronize_srcu(&srcu);
87} 83}
@@ -292,31 +288,34 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
292{ 288{
293 BUG_ON(atomic_read(&mm->mm_count) <= 0); 289 BUG_ON(atomic_read(&mm->mm_count) <= 0);
294 290
295 spin_lock(&mm->mmu_notifier_mm->lock);
296 if (!hlist_unhashed(&mn->hlist)) { 291 if (!hlist_unhashed(&mn->hlist)) {
292 /*
293 * SRCU here will force exit_mmap to wait for ->release to
294 * finish before freeing the pages.
295 */
297 int id; 296 int id;
298 297
298 id = srcu_read_lock(&srcu);
299 /* 299 /*
300 * Ensure we synchronize up with __mmu_notifier_release(). 300 * exit_mmap will block in mmu_notifier_release to guarantee
301 * that ->release is called before freeing the pages.
301 */ 302 */
302 id = srcu_read_lock(&srcu);
303
304 hlist_del_rcu(&mn->hlist);
305 spin_unlock(&mm->mmu_notifier_mm->lock);
306
307 if (mn->ops->release) 303 if (mn->ops->release)
308 mn->ops->release(mn, mm); 304 mn->ops->release(mn, mm);
305 srcu_read_unlock(&srcu, id);
309 306
307 spin_lock(&mm->mmu_notifier_mm->lock);
310 /* 308 /*
311 * Allow __mmu_notifier_release() to complete. 309 * Can not use list_del_rcu() since __mmu_notifier_release
310 * can delete it before we hold the lock.
312 */ 311 */
313 srcu_read_unlock(&srcu, id); 312 hlist_del_init_rcu(&mn->hlist);
314 } else
315 spin_unlock(&mm->mmu_notifier_mm->lock); 313 spin_unlock(&mm->mmu_notifier_mm->lock);
314 }
316 315
317 /* 316 /*
318 * Wait for any running method to finish, including ->release() if it 317 * Wait for any running method to finish, of course including
319 * was run by __mmu_notifier_release() instead of us. 318 * ->release if it was run by mmu_notifier_relase instead of us.
320 */ 319 */
321 synchronize_srcu(&srcu); 320 synchronize_srcu(&srcu);
322 321
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 98cbdf6e5532..378a15bcd649 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5158,7 +5158,7 @@ unsigned long free_reserved_area(unsigned long start, unsigned long end,
5158 for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { 5158 for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) {
5159 if (poison) 5159 if (poison)
5160 memset((void *)pos, poison, PAGE_SIZE); 5160 memset((void *)pos, poison, PAGE_SIZE);
5161 free_reserved_page(virt_to_page(pos)); 5161 free_reserved_page(virt_to_page((void *)pos));
5162 } 5162 }
5163 5163
5164 if (pages && s) 5164 if (pages && s)
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 35aa294656cd..5da2cbcfdbb5 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -127,28 +127,7 @@ static int walk_hugetlb_range(struct vm_area_struct *vma,
127 return 0; 127 return 0;
128} 128}
129 129
130static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
131{
132 struct vm_area_struct *vma;
133
134 /* We don't need vma lookup at all. */
135 if (!walk->hugetlb_entry)
136 return NULL;
137
138 VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
139 vma = find_vma(walk->mm, addr);
140 if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma))
141 return vma;
142
143 return NULL;
144}
145
146#else /* CONFIG_HUGETLB_PAGE */ 130#else /* CONFIG_HUGETLB_PAGE */
147static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
148{
149 return NULL;
150}
151
152static int walk_hugetlb_range(struct vm_area_struct *vma, 131static int walk_hugetlb_range(struct vm_area_struct *vma,
153 unsigned long addr, unsigned long end, 132 unsigned long addr, unsigned long end,
154 struct mm_walk *walk) 133 struct mm_walk *walk)
@@ -198,30 +177,53 @@ int walk_page_range(unsigned long addr, unsigned long end,
198 if (!walk->mm) 177 if (!walk->mm)
199 return -EINVAL; 178 return -EINVAL;
200 179
180 VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
181
201 pgd = pgd_offset(walk->mm, addr); 182 pgd = pgd_offset(walk->mm, addr);
202 do { 183 do {
203 struct vm_area_struct *vma; 184 struct vm_area_struct *vma = NULL;
204 185
205 next = pgd_addr_end(addr, end); 186 next = pgd_addr_end(addr, end);
206 187
207 /* 188 /*
208 * handle hugetlb vma individually because pagetable walk for 189 * This function was not intended to be vma based.
209 * the hugetlb page is dependent on the architecture and 190 * But there are vma special cases to be handled:
210 * we can't handled it in the same manner as non-huge pages. 191 * - hugetlb vma's
192 * - VM_PFNMAP vma's
211 */ 193 */
212 vma = hugetlb_vma(addr, walk); 194 vma = find_vma(walk->mm, addr);
213 if (vma) { 195 if (vma) {
214 if (vma->vm_end < next) 196 /*
197 * There are no page structures backing a VM_PFNMAP
198 * range, so do not allow split_huge_page_pmd().
199 */
200 if ((vma->vm_start <= addr) &&
201 (vma->vm_flags & VM_PFNMAP)) {
215 next = vma->vm_end; 202 next = vma->vm_end;
203 pgd = pgd_offset(walk->mm, next);
204 continue;
205 }
216 /* 206 /*
217 * Hugepage is very tightly coupled with vma, so 207 * Handle hugetlb vma individually because pagetable
218 * walk through hugetlb entries within a given vma. 208 * walk for the hugetlb page is dependent on the
209 * architecture and we can't handled it in the same
210 * manner as non-huge pages.
219 */ 211 */
220 err = walk_hugetlb_range(vma, addr, next, walk); 212 if (walk->hugetlb_entry && (vma->vm_start <= addr) &&
221 if (err) 213 is_vm_hugetlb_page(vma)) {
222 break; 214 if (vma->vm_end < next)
223 pgd = pgd_offset(walk->mm, next); 215 next = vma->vm_end;
224 continue; 216 /*
217 * Hugepage is very tightly coupled with vma,
218 * so walk through hugetlb entries within a
219 * given vma.
220 */
221 err = walk_hugetlb_range(vma, addr, next, walk);
222 if (err)
223 break;
224 pgd = pgd_offset(walk->mm, next);
225 continue;
226 }
225 } 227 }
226 228
227 if (pgd_none_or_clear_bad(pgd)) { 229 if (pgd_none_or_clear_bad(pgd)) {