aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-12-14 20:59:31 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:20 -0500
commite9995ef978a7d5296fe04a9a2c5ca6e66d8bb4e5 (patch)
treedf4324273856e06b8277b7e4a0fa9289eb8e6385 /mm/migrate.c
parent407f9c8b0889ced1dbe2f9157e4e60c61329d5c9 (diff)
ksm: rmap_walk to remove_migation_ptes
A side-effect of making ksm pages swappable is that they have to be placed on the LRUs: which then exposes them to isolate_lru_page() and hence to page migration. Add rmap_walk() for remove_migration_ptes() to use: rmap_walk_anon() and rmap_walk_file() in rmap.c, but rmap_walk_ksm() in ksm.c. Perhaps some consolidation with existing code is possible, but don't attempt that yet (try_to_unmap needs to handle nonlinears, but migration pte removal does not). rmap_walk() is sadly less general than it appears: rmap_walk_anon(), like remove_anon_migration_ptes() which it replaces, avoids calling page_lock_anon_vma(), because that includes a page_mapped() test which fails when all migration ptes are in place. That was valid when NUMA page migration was introduced (holding mmap_sem provided the missing guarantee that anon_vma's slab had not already been destroyed), but I believe not valid in the memory hotremove case added since. For now do the same as before, and consider the best way to fix that unlikely race later on. When fixed, we can probably use rmap_walk() on hwpoisoned ksm pages too: for now, they remain among hwpoison's various exceptions (its PageKsm test comes before the page is locked, but its page_lock_anon_vma fails safely if an anon gets upgraded). Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Izik Eidus <ieidus@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Chris Wright <chrisw@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c85
1 files changed, 18 insertions, 67 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 367272d04423..0b714747c028 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -21,6 +21,7 @@
21#include <linux/mm_inline.h> 21#include <linux/mm_inline.h>
22#include <linux/nsproxy.h> 22#include <linux/nsproxy.h>
23#include <linux/pagevec.h> 23#include <linux/pagevec.h>
24#include <linux/ksm.h>
24#include <linux/rmap.h> 25#include <linux/rmap.h>
25#include <linux/topology.h> 26#include <linux/topology.h>
26#include <linux/cpu.h> 27#include <linux/cpu.h>
@@ -78,8 +79,8 @@ int putback_lru_pages(struct list_head *l)
78/* 79/*
79 * Restore a potential migration pte to a working pte entry 80 * Restore a potential migration pte to a working pte entry
80 */ 81 */
81static void remove_migration_pte(struct vm_area_struct *vma, 82static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
82 struct page *old, struct page *new) 83 unsigned long addr, void *old)
83{ 84{
84 struct mm_struct *mm = vma->vm_mm; 85 struct mm_struct *mm = vma->vm_mm;
85 swp_entry_t entry; 86 swp_entry_t entry;
@@ -88,40 +89,37 @@ static void remove_migration_pte(struct vm_area_struct *vma,
88 pmd_t *pmd; 89 pmd_t *pmd;
89 pte_t *ptep, pte; 90 pte_t *ptep, pte;
90 spinlock_t *ptl; 91 spinlock_t *ptl;
91 unsigned long addr = page_address_in_vma(new, vma);
92
93 if (addr == -EFAULT)
94 return;
95 92
96 pgd = pgd_offset(mm, addr); 93 pgd = pgd_offset(mm, addr);
97 if (!pgd_present(*pgd)) 94 if (!pgd_present(*pgd))
98 return; 95 goto out;
99 96
100 pud = pud_offset(pgd, addr); 97 pud = pud_offset(pgd, addr);
101 if (!pud_present(*pud)) 98 if (!pud_present(*pud))
102 return; 99 goto out;
103 100
104 pmd = pmd_offset(pud, addr); 101 pmd = pmd_offset(pud, addr);
105 if (!pmd_present(*pmd)) 102 if (!pmd_present(*pmd))
106 return; 103 goto out;
107 104
108 ptep = pte_offset_map(pmd, addr); 105 ptep = pte_offset_map(pmd, addr);
109 106
110 if (!is_swap_pte(*ptep)) { 107 if (!is_swap_pte(*ptep)) {
111 pte_unmap(ptep); 108 pte_unmap(ptep);
112 return; 109 goto out;
113 } 110 }
114 111
115 ptl = pte_lockptr(mm, pmd); 112 ptl = pte_lockptr(mm, pmd);
116 spin_lock(ptl); 113 spin_lock(ptl);
117 pte = *ptep; 114 pte = *ptep;
118 if (!is_swap_pte(pte)) 115 if (!is_swap_pte(pte))
119 goto out; 116 goto unlock;
120 117
121 entry = pte_to_swp_entry(pte); 118 entry = pte_to_swp_entry(pte);
122 119
123 if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) 120 if (!is_migration_entry(entry) ||
124 goto out; 121 migration_entry_to_page(entry) != old)
122 goto unlock;
125 123
126 get_page(new); 124 get_page(new);
127 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 125 pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
@@ -137,55 +135,10 @@ static void remove_migration_pte(struct vm_area_struct *vma,
137 135
138 /* No need to invalidate - it was non-present before */ 136 /* No need to invalidate - it was non-present before */
139 update_mmu_cache(vma, addr, pte); 137 update_mmu_cache(vma, addr, pte);
140 138unlock:
141out:
142 pte_unmap_unlock(ptep, ptl); 139 pte_unmap_unlock(ptep, ptl);
143} 140out:
144 141 return SWAP_AGAIN;
145/*
146 * Note that remove_file_migration_ptes will only work on regular mappings,
147 * Nonlinear mappings do not use migration entries.
148 */
149static void remove_file_migration_ptes(struct page *old, struct page *new)
150{
151 struct vm_area_struct *vma;
152 struct address_space *mapping = new->mapping;
153 struct prio_tree_iter iter;
154 pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
155
156 if (!mapping)
157 return;
158
159 spin_lock(&mapping->i_mmap_lock);
160
161 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff)
162 remove_migration_pte(vma, old, new);
163
164 spin_unlock(&mapping->i_mmap_lock);
165}
166
167/*
168 * Must hold mmap_sem lock on at least one of the vmas containing
169 * the page so that the anon_vma cannot vanish.
170 */
171static void remove_anon_migration_ptes(struct page *old, struct page *new)
172{
173 struct anon_vma *anon_vma;
174 struct vm_area_struct *vma;
175
176 /*
177 * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
178 */
179 anon_vma = page_anon_vma(new);
180 if (!anon_vma)
181 return;
182
183 spin_lock(&anon_vma->lock);
184
185 list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
186 remove_migration_pte(vma, old, new);
187
188 spin_unlock(&anon_vma->lock);
189} 142}
190 143
191/* 144/*
@@ -194,10 +147,7 @@ static void remove_anon_migration_ptes(struct page *old, struct page *new)
194 */ 147 */
195static void remove_migration_ptes(struct page *old, struct page *new) 148static void remove_migration_ptes(struct page *old, struct page *new)
196{ 149{
197 if (PageAnon(new)) 150 rmap_walk(new, remove_migration_pte, old);
198 remove_anon_migration_ptes(old, new);
199 else
200 remove_file_migration_ptes(old, new);
201} 151}
202 152
203/* 153/*
@@ -358,6 +308,7 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
358 } 308 }
359 309
360 mlock_migrate_page(newpage, page); 310 mlock_migrate_page(newpage, page);
311 ksm_migrate_page(newpage, page);
361 312
362 ClearPageSwapCache(page); 313 ClearPageSwapCache(page);
363 ClearPagePrivate(page); 314 ClearPagePrivate(page);
@@ -577,9 +528,9 @@ static int move_to_new_page(struct page *newpage, struct page *page)
577 else 528 else
578 rc = fallback_migrate_page(mapping, newpage, page); 529 rc = fallback_migrate_page(mapping, newpage, page);
579 530
580 if (!rc) { 531 if (!rc)
581 remove_migration_ptes(page, newpage); 532 remove_migration_ptes(page, newpage);
582 } else 533 else
583 newpage->mapping = NULL; 534 newpage->mapping = NULL;
584 535
585 unlock_page(newpage); 536 unlock_page(newpage);