aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2014-03-21 00:52:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-03-21 01:09:09 -0400
commit7e09e738afd21ef99f047425fc0b0c9be8b03254 (patch)
tree423cb848a7d6875509b99720d4afcfa208bac078
parent3fb725c48b93c0a152174b6dbbc1029b5e734c7b (diff)
mm: fix swapops.h:131 bug if remap_file_pages raced migration
Add remove_linear_migration_ptes_from_nonlinear(), to fix an interesting little include/linux/swapops.h:131 BUG_ON(!PageLocked) found by trinity: indicating that remove_migration_ptes() failed to find one of the migration entries that was temporarily inserted. The problem comes from remap_file_pages()'s switch from vma_interval_tree (good for inserting the migration entry) to i_mmap_nonlinear list (no good for locating it again); but can only be a problem if the remap_file_pages() range does not cover the whole of the vma (zap_pte() clears the range). remove_migration_ptes() needs a file_nonlinear method to go down the i_mmap_nonlinear list, applying linear location to look for migration entries in those vmas too, just in case there was this race. The file_nonlinear method does need rmap_walk_control.arg to do this; but it never needed vma passed in - vma comes from its own iteration. Reported-and-tested-by: Dave Jones <davej@redhat.com> Reported-and-tested-by: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/rmap.h3
-rw-r--r--mm/migrate.c32
-rw-r--r--mm/rmap.c5
3 files changed, 36 insertions, 4 deletions
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1da693d51255..b66c2110cb1f 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -250,8 +250,7 @@ struct rmap_walk_control {
250 int (*rmap_one)(struct page *page, struct vm_area_struct *vma, 250 int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
251 unsigned long addr, void *arg); 251 unsigned long addr, void *arg);
252 int (*done)(struct page *page); 252 int (*done)(struct page *page);
253 int (*file_nonlinear)(struct page *, struct address_space *, 253 int (*file_nonlinear)(struct page *, struct address_space *, void *arg);
254 struct vm_area_struct *vma);
255 struct anon_vma *(*anon_lock)(struct page *page); 254 struct anon_vma *(*anon_lock)(struct page *page);
256 bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); 255 bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
257}; 256};
diff --git a/mm/migrate.c b/mm/migrate.c
index b494fdb9a636..bed48809e5d0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -178,6 +178,37 @@ out:
178} 178}
179 179
180/* 180/*
181 * Congratulations to trinity for discovering this bug.
182 * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
183 * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
184 * replace the specified range by file ptes throughout (maybe populated after).
185 * If page migration finds a page within that range, while it's still located
186 * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
187 * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
188 * But if the migrating page is in a part of the vma outside the range to be
189 * remapped, then it will not be cleared, and remove_migration_ptes() needs to
190 * deal with it. Fortunately, this part of the vma is of course still linear,
191 * so we just need to use linear location on the nonlinear list.
192 */
193static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
194 struct address_space *mapping, void *arg)
195{
196 struct vm_area_struct *vma;
197 /* hugetlbfs does not support remap_pages, so no huge pgoff worries */
198 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
199 unsigned long addr;
200
201 list_for_each_entry(vma,
202 &mapping->i_mmap_nonlinear, shared.nonlinear) {
203
204 addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
205 if (addr >= vma->vm_start && addr < vma->vm_end)
206 remove_migration_pte(page, vma, addr, arg);
207 }
208 return SWAP_AGAIN;
209}
210
211/*
181 * Get rid of all migration entries and replace them by 212 * Get rid of all migration entries and replace them by
182 * references to the indicated page. 213 * references to the indicated page.
183 */ 214 */
@@ -186,6 +217,7 @@ static void remove_migration_ptes(struct page *old, struct page *new)
186 struct rmap_walk_control rwc = { 217 struct rmap_walk_control rwc = {
187 .rmap_one = remove_migration_pte, 218 .rmap_one = remove_migration_pte,
188 .arg = old, 219 .arg = old,
220 .file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
189 }; 221 };
190 222
191 rmap_walk(new, &rwc); 223 rmap_walk(new, &rwc);
diff --git a/mm/rmap.c b/mm/rmap.c
index d9d42316a99a..8fc049f9a5a6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1360,8 +1360,9 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1360} 1360}
1361 1361
1362static int try_to_unmap_nonlinear(struct page *page, 1362static int try_to_unmap_nonlinear(struct page *page,
1363 struct address_space *mapping, struct vm_area_struct *vma) 1363 struct address_space *mapping, void *arg)
1364{ 1364{
1365 struct vm_area_struct *vma;
1365 int ret = SWAP_AGAIN; 1366 int ret = SWAP_AGAIN;
1366 unsigned long cursor; 1367 unsigned long cursor;
1367 unsigned long max_nl_cursor = 0; 1368 unsigned long max_nl_cursor = 0;
@@ -1663,7 +1664,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
1663 if (list_empty(&mapping->i_mmap_nonlinear)) 1664 if (list_empty(&mapping->i_mmap_nonlinear))
1664 goto done; 1665 goto done;
1665 1666
1666 ret = rwc->file_nonlinear(page, mapping, vma); 1667 ret = rwc->file_nonlinear(page, mapping, rwc->arg);
1667 1668
1668done: 1669done:
1669 mutex_unlock(&mapping->i_mmap_mutex); 1670 mutex_unlock(&mapping->i_mmap_mutex);