diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-06-21 20:15:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-21 21:46:21 -0400 |
commit | c475a8ab625d567eacf5e30ec35d6d8704558062 (patch) | |
tree | 0971bef7b876f1b3eb160621fc2b61cb5313827b | |
parent | d296e9cd02c92e576ecce5344026a4df4353cdb2 (diff) |
[PATCH] can_share_swap_page: use page_mapcount
Remember that ironic get_user_pages race? when the raised page_count on a
page swapped out led do_wp_page to decide that it had to copy on write, so
substituted a different page into userspace. 2.6.7 onwards have Andrea's
solution, where try_to_unmap_one backs out if it finds page_count raised.
Which works, but is unsatisfying (rmap.c has no other page_count heuristics),
and was found a few months ago to hang an intensive page migration test. A
year ago I was hesitant to engage page_mapcount, now it seems the right fix.
So remove the page_count hack from try_to_unmap_one; and use activate_page in
unuse_mm when dropping lock, to replace its secondary effect of helping
swapoff to make progress in that case.
Simplify can_share_swap_page (now called only on anonymous pages) to check
page_mapcount + page_swapcount == 1: still needs the page lock to stabilize
their (pessimistic) sum, but does not need swapper_space.tree_lock for that.
In do_swap_page, move swap_free and unlock_page below page_add_anon_rmap, to
keep sum on the high side, and correct when can_share_swap_page called.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | mm/memory.c | 10 | ||||
-rw-r--r-- | mm/rmap.c | 21 | ||||
-rw-r--r-- | mm/swapfile.c | 55 |
3 files changed, 21 insertions, 65 deletions
diff --git a/mm/memory.c b/mm/memory.c index 1c0a3db78a05..da91b7bf9986 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1686,10 +1686,6 @@ static int do_swap_page(struct mm_struct * mm, | |||
1686 | } | 1686 | } |
1687 | 1687 | ||
1688 | /* The page isn't present yet, go ahead with the fault. */ | 1688 | /* The page isn't present yet, go ahead with the fault. */ |
1689 | |||
1690 | swap_free(entry); | ||
1691 | if (vm_swap_full()) | ||
1692 | remove_exclusive_swap_page(page); | ||
1693 | 1689 | ||
1694 | inc_mm_counter(mm, rss); | 1690 | inc_mm_counter(mm, rss); |
1695 | pte = mk_pte(page, vma->vm_page_prot); | 1691 | pte = mk_pte(page, vma->vm_page_prot); |
@@ -1697,12 +1693,16 @@ static int do_swap_page(struct mm_struct * mm, | |||
1697 | pte = maybe_mkwrite(pte_mkdirty(pte), vma); | 1693 | pte = maybe_mkwrite(pte_mkdirty(pte), vma); |
1698 | write_access = 0; | 1694 | write_access = 0; |
1699 | } | 1695 | } |
1700 | unlock_page(page); | ||
1701 | 1696 | ||
1702 | flush_icache_page(vma, page); | 1697 | flush_icache_page(vma, page); |
1703 | set_pte_at(mm, address, page_table, pte); | 1698 | set_pte_at(mm, address, page_table, pte); |
1704 | page_add_anon_rmap(page, vma, address); | 1699 | page_add_anon_rmap(page, vma, address); |
1705 | 1700 | ||
1701 | swap_free(entry); | ||
1702 | if (vm_swap_full()) | ||
1703 | remove_exclusive_swap_page(page); | ||
1704 | unlock_page(page); | ||
1705 | |||
1706 | if (write_access) { | 1706 | if (write_access) { |
1707 | if (do_wp_page(mm, vma, address, | 1707 | if (do_wp_page(mm, vma, address, |
1708 | page_table, pmd, pte) == VM_FAULT_OOM) | 1708 | page_table, pmd, pte) == VM_FAULT_OOM) |
@@ -539,27 +539,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
539 | goto out_unmap; | 539 | goto out_unmap; |
540 | } | 540 | } |
541 | 541 | ||
542 | /* | ||
543 | * Don't pull an anonymous page out from under get_user_pages. | ||
544 | * GUP carefully breaks COW and raises page count (while holding | ||
545 | * page_table_lock, as we have here) to make sure that the page | ||
546 | * cannot be freed. If we unmap that page here, a user write | ||
547 | * access to the virtual address will bring back the page, but | ||
548 | * its raised count will (ironically) be taken to mean it's not | ||
549 | * an exclusive swap page, do_wp_page will replace it by a copy | ||
550 | * page, and the user never get to see the data GUP was holding | ||
551 | * the original page for. | ||
552 | * | ||
553 | * This test is also useful for when swapoff (unuse_process) has | ||
554 | * to drop page lock: its reference to the page stops existing | ||
555 | * ptes from being unmapped, so swapoff can make progress. | ||
556 | */ | ||
557 | if (PageSwapCache(page) && | ||
558 | page_count(page) != page_mapcount(page) + 2) { | ||
559 | ret = SWAP_FAIL; | ||
560 | goto out_unmap; | ||
561 | } | ||
562 | |||
563 | /* Nuke the page table entry. */ | 542 | /* Nuke the page table entry. */ |
564 | flush_cache_page(vma, address, page_to_pfn(page)); | 543 | flush_cache_page(vma, address, page_to_pfn(page)); |
565 | pteval = ptep_clear_flush(vma, address, pte); | 544 | pteval = ptep_clear_flush(vma, address, pte); |
diff --git a/mm/swapfile.c b/mm/swapfile.c index da48405cd9a3..60cd24a55204 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -276,61 +276,37 @@ void swap_free(swp_entry_t entry) | |||
276 | } | 276 | } |
277 | 277 | ||
278 | /* | 278 | /* |
279 | * Check if we're the only user of a swap page, | 279 | * How many references to page are currently swapped out? |
280 | * when the page is locked. | ||
281 | */ | 280 | */ |
282 | static int exclusive_swap_page(struct page *page) | 281 | static inline int page_swapcount(struct page *page) |
283 | { | 282 | { |
284 | int retval = 0; | 283 | int count = 0; |
285 | struct swap_info_struct * p; | 284 | struct swap_info_struct *p; |
286 | swp_entry_t entry; | 285 | swp_entry_t entry; |
287 | 286 | ||
288 | entry.val = page->private; | 287 | entry.val = page->private; |
289 | p = swap_info_get(entry); | 288 | p = swap_info_get(entry); |
290 | if (p) { | 289 | if (p) { |
291 | /* Is the only swap cache user the cache itself? */ | 290 | /* Subtract the 1 for the swap cache itself */ |
292 | if (p->swap_map[swp_offset(entry)] == 1) { | 291 | count = p->swap_map[swp_offset(entry)] - 1; |
293 | /* Recheck the page count with the swapcache lock held.. */ | ||
294 | write_lock_irq(&swapper_space.tree_lock); | ||
295 | if (page_count(page) == 2) | ||
296 | retval = 1; | ||
297 | write_unlock_irq(&swapper_space.tree_lock); | ||
298 | } | ||
299 | swap_info_put(p); | 292 | swap_info_put(p); |
300 | } | 293 | } |
301 | return retval; | 294 | return count; |
302 | } | 295 | } |
303 | 296 | ||
304 | /* | 297 | /* |
305 | * We can use this swap cache entry directly | 298 | * We can use this swap cache entry directly |
306 | * if there are no other references to it. | 299 | * if there are no other references to it. |
307 | * | ||
308 | * Here "exclusive_swap_page()" does the real | ||
309 | * work, but we opportunistically check whether | ||
310 | * we need to get all the locks first.. | ||
311 | */ | 300 | */ |
312 | int can_share_swap_page(struct page *page) | 301 | int can_share_swap_page(struct page *page) |
313 | { | 302 | { |
314 | int retval = 0; | 303 | int count; |
315 | 304 | ||
316 | if (!PageLocked(page)) | 305 | BUG_ON(!PageLocked(page)); |
317 | BUG(); | 306 | count = page_mapcount(page); |
318 | switch (page_count(page)) { | 307 | if (count <= 1 && PageSwapCache(page)) |
319 | case 3: | 308 | count += page_swapcount(page); |
320 | if (!PagePrivate(page)) | 309 | return count == 1; |
321 | break; | ||
322 | /* Fallthrough */ | ||
323 | case 2: | ||
324 | if (!PageSwapCache(page)) | ||
325 | break; | ||
326 | retval = exclusive_swap_page(page); | ||
327 | break; | ||
328 | case 1: | ||
329 | if (PageReserved(page)) | ||
330 | break; | ||
331 | retval = 1; | ||
332 | } | ||
333 | return retval; | ||
334 | } | 310 | } |
335 | 311 | ||
336 | /* | 312 | /* |
@@ -529,9 +505,10 @@ static int unuse_mm(struct mm_struct *mm, | |||
529 | 505 | ||
530 | if (!down_read_trylock(&mm->mmap_sem)) { | 506 | if (!down_read_trylock(&mm->mmap_sem)) { |
531 | /* | 507 | /* |
532 | * Our reference to the page stops try_to_unmap_one from | 508 | * Activate page so shrink_cache is unlikely to unmap its |
533 | * unmapping its ptes, so swapoff can make progress. | 509 | * ptes while lock is dropped, so swapoff can make progress. |
534 | */ | 510 | */ |
511 | activate_page(page); | ||
535 | unlock_page(page); | 512 | unlock_page(page); |
536 | down_read(&mm->mmap_sem); | 513 | down_read(&mm->mmap_sem); |
537 | lock_page(page); | 514 | lock_page(page); |