aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2008-02-05 01:28:46 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-05 12:44:15 -0500
commit2e441889c38fe1b6ef6b963e6993076aa120176c (patch)
treef8cc99d2b8a3b90c6ff2c3e6d14e90d7edd95d7e
parent8952898b0d25223f38daf46b86156fd1c4d17ad0 (diff)
swapoff: scan ptes preemptibly
Provided that CONFIG_HIGHPTE is not set, unuse_pte_range can reduce latency in swapoff by scanning the page table preemptibly: so long as unuse_pte is careful to recheck that entry under pte lock. (To tell the truth, this patch was not inspired by any cries for lower latency here: rather, this restructuring permits a future memory controller patch to allocate with GFP_KERNEL in unuse_pte, where before it could not. But it would be wrong to tuck this change away inside a memcgroup patch.) Signed-off-by: Hugh Dickins <hugh@veritas.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Tested-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/swapfile.c38
1 files changed, 31 insertions, 7 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f5ba723faf81..14bc4f28a8cc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -506,9 +506,19 @@ unsigned int count_swap_pages(int type, int free)
506 * just let do_wp_page work it out if a write is requested later - to 506 * just let do_wp_page work it out if a write is requested later - to
507 * force COW, vm_page_prot omits write permission from any private vma. 507 * force COW, vm_page_prot omits write permission from any private vma.
508 */ 508 */
509static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, 509static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
510 unsigned long addr, swp_entry_t entry, struct page *page) 510 unsigned long addr, swp_entry_t entry, struct page *page)
511{ 511{
512 spinlock_t *ptl;
513 pte_t *pte;
514 int found = 1;
515
516 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
517 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
518 found = 0;
519 goto out;
520 }
521
512 inc_mm_counter(vma->vm_mm, anon_rss); 522 inc_mm_counter(vma->vm_mm, anon_rss);
513 get_page(page); 523 get_page(page);
514 set_pte_at(vma->vm_mm, addr, pte, 524 set_pte_at(vma->vm_mm, addr, pte,
@@ -520,6 +530,9 @@ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
520 * immediately swapped out again after swapon. 530 * immediately swapped out again after swapon.
521 */ 531 */
522 activate_page(page); 532 activate_page(page);
533out:
534 pte_unmap_unlock(pte, ptl);
535 return found;
523} 536}
524 537
525static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 538static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
@@ -528,22 +541,33 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
528{ 541{
529 pte_t swp_pte = swp_entry_to_pte(entry); 542 pte_t swp_pte = swp_entry_to_pte(entry);
530 pte_t *pte; 543 pte_t *pte;
531 spinlock_t *ptl;
532 int found = 0; 544 int found = 0;
533 545
534 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 546 /*
547 * We don't actually need pte lock while scanning for swp_pte: since
548 * we hold page lock and mmap_sem, swp_pte cannot be inserted into the
549 * page table while we're scanning; though it could get zapped, and on
550 * some architectures (e.g. x86_32 with PAE) we might catch a glimpse
551 * of unmatched parts which look like swp_pte, so unuse_pte must
552 * recheck under pte lock. Scanning without pte lock lets it be
553 * preemptible whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE.
554 */
555 pte = pte_offset_map(pmd, addr);
535 do { 556 do {
536 /* 557 /*
537 * swapoff spends a _lot_ of time in this loop! 558 * swapoff spends a _lot_ of time in this loop!
538 * Test inline before going to call unuse_pte. 559 * Test inline before going to call unuse_pte.
539 */ 560 */
540 if (unlikely(pte_same(*pte, swp_pte))) { 561 if (unlikely(pte_same(*pte, swp_pte))) {
541 unuse_pte(vma, pte++, addr, entry, page); 562 pte_unmap(pte);
542 found = 1; 563 found = unuse_pte(vma, pmd, addr, entry, page);
543 break; 564 if (found)
565 goto out;
566 pte = pte_offset_map(pmd, addr);
544 } 567 }
545 } while (pte++, addr += PAGE_SIZE, addr != end); 568 } while (pte++, addr += PAGE_SIZE, addr != end);
546 pte_unmap_unlock(pte - 1, ptl); 569 pte_unmap(pte - 1);
570out:
547 return found; 571 return found;
548} 572}
549 573