diff options
author | Hugh Dickins <hugh@veritas.com> | 2008-02-05 01:28:46 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-05 12:44:15 -0500 |
commit | 2e441889c38fe1b6ef6b963e6993076aa120176c (patch) | |
tree | f8cc99d2b8a3b90c6ff2c3e6d14e90d7edd95d7e | |
parent | 8952898b0d25223f38daf46b86156fd1c4d17ad0 (diff) |
swapoff: scan ptes preemptibly
Provided that CONFIG_HIGHPTE is not set, unuse_pte_range can reduce latency
in swapoff by scanning the page table preemptibly: so long as unuse_pte is
careful to recheck that entry under pte lock.
(To tell the truth, this patch was not inspired by any cries for lower
latency here: rather, this restructuring permits a future memory controller
patch to allocate with GFP_KERNEL in unuse_pte, where before it could not.
But it would be wrong to tuck this change away inside a memcgroup patch.)
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Tested-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/swapfile.c | 38 |
1 files changed, 31 insertions, 7 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index f5ba723faf81..14bc4f28a8cc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -506,9 +506,19 @@ unsigned int count_swap_pages(int type, int free) | |||
506 | * just let do_wp_page work it out if a write is requested later - to | 506 | * just let do_wp_page work it out if a write is requested later - to |
507 | * force COW, vm_page_prot omits write permission from any private vma. | 507 | * force COW, vm_page_prot omits write permission from any private vma. |
508 | */ | 508 | */ |
509 | static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, | 509 | static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, |
510 | unsigned long addr, swp_entry_t entry, struct page *page) | 510 | unsigned long addr, swp_entry_t entry, struct page *page) |
511 | { | 511 | { |
512 | spinlock_t *ptl; | ||
513 | pte_t *pte; | ||
514 | int found = 1; | ||
515 | |||
516 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | ||
517 | if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { | ||
518 | found = 0; | ||
519 | goto out; | ||
520 | } | ||
521 | |||
512 | inc_mm_counter(vma->vm_mm, anon_rss); | 522 | inc_mm_counter(vma->vm_mm, anon_rss); |
513 | get_page(page); | 523 | get_page(page); |
514 | set_pte_at(vma->vm_mm, addr, pte, | 524 | set_pte_at(vma->vm_mm, addr, pte, |
@@ -520,6 +530,9 @@ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, | |||
520 | * immediately swapped out again after swapon. | 530 | * immediately swapped out again after swapon. |
521 | */ | 531 | */ |
522 | activate_page(page); | 532 | activate_page(page); |
533 | out: | ||
534 | pte_unmap_unlock(pte, ptl); | ||
535 | return found; | ||
523 | } | 536 | } |
524 | 537 | ||
525 | static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 538 | static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
@@ -528,22 +541,33 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
528 | { | 541 | { |
529 | pte_t swp_pte = swp_entry_to_pte(entry); | 542 | pte_t swp_pte = swp_entry_to_pte(entry); |
530 | pte_t *pte; | 543 | pte_t *pte; |
531 | spinlock_t *ptl; | ||
532 | int found = 0; | 544 | int found = 0; |
533 | 545 | ||
534 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 546 | /* |
547 | * We don't actually need pte lock while scanning for swp_pte: since | ||
548 | * we hold page lock and mmap_sem, swp_pte cannot be inserted into the | ||
549 | * page table while we're scanning; though it could get zapped, and on | ||
550 | * some architectures (e.g. x86_32 with PAE) we might catch a glimpse | ||
551 | * of unmatched parts which look like swp_pte, so unuse_pte must | ||
552 | * recheck under pte lock. Scanning without pte lock lets it be | ||
553 | * preemptible whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE. | ||
554 | */ | ||
555 | pte = pte_offset_map(pmd, addr); | ||
535 | do { | 556 | do { |
536 | /* | 557 | /* |
537 | * swapoff spends a _lot_ of time in this loop! | 558 | * swapoff spends a _lot_ of time in this loop! |
538 | * Test inline before going to call unuse_pte. | 559 | * Test inline before going to call unuse_pte. |
539 | */ | 560 | */ |
540 | if (unlikely(pte_same(*pte, swp_pte))) { | 561 | if (unlikely(pte_same(*pte, swp_pte))) { |
541 | unuse_pte(vma, pte++, addr, entry, page); | 562 | pte_unmap(pte); |
542 | found = 1; | 563 | found = unuse_pte(vma, pmd, addr, entry, page); |
543 | break; | 564 | if (found) |
565 | goto out; | ||
566 | pte = pte_offset_map(pmd, addr); | ||
544 | } | 567 | } |
545 | } while (pte++, addr += PAGE_SIZE, addr != end); | 568 | } while (pte++, addr += PAGE_SIZE, addr != end); |
546 | pte_unmap_unlock(pte - 1, ptl); | 569 | pte_unmap(pte - 1); |
570 | out: | ||
547 | return found; | 571 | return found; |
548 | } | 572 | } |
549 | 573 | ||