aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-12-14 20:59:31 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:20 -0500
commite9995ef978a7d5296fe04a9a2c5ca6e66d8bb4e5 (patch)
treedf4324273856e06b8277b7e4a0fa9289eb8e6385 /mm/rmap.c
parent407f9c8b0889ced1dbe2f9157e4e60c61329d5c9 (diff)
ksm: rmap_walk to remove_migation_ptes
A side-effect of making ksm pages swappable is that they have to be placed on the LRUs: which then exposes them to isolate_lru_page() and hence to page migration. Add rmap_walk() for remove_migration_ptes() to use: rmap_walk_anon() and rmap_walk_file() in rmap.c, but rmap_walk_ksm() in ksm.c. Perhaps some consolidation with existing code is possible, but don't attempt that yet (try_to_unmap needs to handle nonlinears, but migration pte removal does not). rmap_walk() is sadly less general than it appears: rmap_walk_anon(), like remove_anon_migration_ptes() which it replaces, avoids calling page_lock_anon_vma(), because that includes a page_mapped() test which fails when all migration ptes are in place. That was valid when NUMA page migration was introduced (holding mmap_sem provided the missing guarantee that anon_vma's slab had not already been destroyed), but I believe not valid in the memory hotremove case added since. For now do the same as before, and consider the best way to fix that unlikely race later on. When fixed, we can probably use rmap_walk() on hwpoisoned ksm pages too: for now, they remain among hwpoison's various exceptions (its PageKsm test comes before the page is locked, but its page_lock_anon_vma fails safely if an anon gets upgraded). Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Izik Eidus <ieidus@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Chris Wright <chrisw@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c79
1 files changed, 79 insertions, 0 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 2e38e9048327..c81bedd7d527 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1203,3 +1203,82 @@ int try_to_munlock(struct page *page)
1203 else 1203 else
1204 return try_to_unmap_file(page, TTU_MUNLOCK); 1204 return try_to_unmap_file(page, TTU_MUNLOCK);
1205} 1205}
1206
1207#ifdef CONFIG_MIGRATION
1208/*
1209 * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
1210 * Called by migrate.c to remove migration ptes, but might be used more later.
1211 */
1212static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1213 struct vm_area_struct *, unsigned long, void *), void *arg)
1214{
1215 struct anon_vma *anon_vma;
1216 struct vm_area_struct *vma;
1217 int ret = SWAP_AGAIN;
1218
1219 /*
1220 * Note: remove_migration_ptes() cannot use page_lock_anon_vma()
1221 * because that depends on page_mapped(); but not all its usages
1222 * are holding mmap_sem, which also gave the necessary guarantee
1223 * (that this anon_vma's slab has not already been destroyed).
1224 * This needs to be reviewed later: avoiding page_lock_anon_vma()
1225 * is risky, and currently limits the usefulness of rmap_walk().
1226 */
1227 anon_vma = page_anon_vma(page);
1228 if (!anon_vma)
1229 return ret;
1230 spin_lock(&anon_vma->lock);
1231 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
1232 unsigned long address = vma_address(page, vma);
1233 if (address == -EFAULT)
1234 continue;
1235 ret = rmap_one(page, vma, address, arg);
1236 if (ret != SWAP_AGAIN)
1237 break;
1238 }
1239 spin_unlock(&anon_vma->lock);
1240 return ret;
1241}
1242
1243static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1244 struct vm_area_struct *, unsigned long, void *), void *arg)
1245{
1246 struct address_space *mapping = page->mapping;
1247 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1248 struct vm_area_struct *vma;
1249 struct prio_tree_iter iter;
1250 int ret = SWAP_AGAIN;
1251
1252 if (!mapping)
1253 return ret;
1254 spin_lock(&mapping->i_mmap_lock);
1255 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1256 unsigned long address = vma_address(page, vma);
1257 if (address == -EFAULT)
1258 continue;
1259 ret = rmap_one(page, vma, address, arg);
1260 if (ret != SWAP_AGAIN)
1261 break;
1262 }
1263 /*
1264 * No nonlinear handling: being always shared, nonlinear vmas
1265 * never contain migration ptes. Decide what to do about this
1266 * limitation to linear when we need rmap_walk() on nonlinear.
1267 */
1268 spin_unlock(&mapping->i_mmap_lock);
1269 return ret;
1270}
1271
1272int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
1273 struct vm_area_struct *, unsigned long, void *), void *arg)
1274{
1275 VM_BUG_ON(!PageLocked(page));
1276
1277 if (unlikely(PageKsm(page)))
1278 return rmap_walk_ksm(page, rmap_one, arg);
1279 else if (PageAnon(page))
1280 return rmap_walk_anon(page, rmap_one, arg);
1281 else
1282 return rmap_walk_file(page, rmap_one, arg);
1283}
1284#endif /* CONFIG_MIGRATION */