diff options
author | Robin Holt <holt@sgi.com> | 2005-11-13 19:06:42 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-13 21:14:12 -0500 |
commit | 51c6f666fceb3184eeff045dad4432b602cd648e (patch) | |
tree | 33e29916e0fea872ba6f29eba698219a740b078f | |
parent | 885036d32f5d3c427c3e2b385b5a5503805e3e52 (diff) |
[PATCH] mm: ZAP_BLOCK causes redundant work
The address based work estimate for unmapping (for lockbreak) is and always
was horribly inefficient for sparse mappings. The problem is most simply
explained with an example:
If we find a pgd is clear, we still have to call into unmap_page_range
PGDIR_SIZE / ZAP_BLOCK_SIZE times, each time checking the clear pgd, in
order to progress the working address to the next pgd.
The fundamental way to solve the problem is to keep track of the end
address we've processed and pass it back to the higher layers.
From: Nick Piggin <npiggin@suse.de>
Modification to completely get away from address based work estimate
and instead use an abstract count, with a very small cost for empty
entries as opposed to present pages.
On 2.6.14-git2, ppc64, and CONFIG_PREEMPT=y, mapping and unmapping 1TB
of virtual address space takes 1.69s; with the following patch applied,
this operation can be done 1000 times in less than 0.01s
From: Andrew Morton <akpm@osdl.org>
With CONFIG_HUTETLB_PAGE=n:
mm/memory.c: In function `unmap_vmas':
mm/memory.c:779: warning: division by zero
Due to
zap_work -= (end - start) /
(HPAGE_SIZE / PAGE_SIZE);
So make the dummy HPAGE_SIZE non-zero
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/hugetlb.h | 4 | ||||
-rw-r--r-- | mm/memory.c | 89 |
2 files changed, 57 insertions, 36 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0cea162b08c0..1056717ee501 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -102,8 +102,8 @@ static inline unsigned long hugetlb_total_pages(void) | |||
102 | #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) | 102 | #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) |
103 | 103 | ||
104 | #ifndef HPAGE_MASK | 104 | #ifndef HPAGE_MASK |
105 | #define HPAGE_MASK 0 /* Keep the compiler happy */ | 105 | #define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */ |
106 | #define HPAGE_SIZE 0 | 106 | #define HPAGE_SIZE PAGE_SIZE |
107 | #endif | 107 | #endif |
108 | 108 | ||
109 | #endif /* !CONFIG_HUGETLB_PAGE */ | 109 | #endif /* !CONFIG_HUGETLB_PAGE */ |
diff --git a/mm/memory.c b/mm/memory.c index 0f60baf6f69b..2998cfc12f5b 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -549,10 +549,10 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
549 | return 0; | 549 | return 0; |
550 | } | 550 | } |
551 | 551 | ||
552 | static void zap_pte_range(struct mmu_gather *tlb, | 552 | static unsigned long zap_pte_range(struct mmu_gather *tlb, |
553 | struct vm_area_struct *vma, pmd_t *pmd, | 553 | struct vm_area_struct *vma, pmd_t *pmd, |
554 | unsigned long addr, unsigned long end, | 554 | unsigned long addr, unsigned long end, |
555 | struct zap_details *details) | 555 | long *zap_work, struct zap_details *details) |
556 | { | 556 | { |
557 | struct mm_struct *mm = tlb->mm; | 557 | struct mm_struct *mm = tlb->mm; |
558 | pte_t *pte; | 558 | pte_t *pte; |
@@ -563,10 +563,15 @@ static void zap_pte_range(struct mmu_gather *tlb, | |||
563 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 563 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
564 | do { | 564 | do { |
565 | pte_t ptent = *pte; | 565 | pte_t ptent = *pte; |
566 | if (pte_none(ptent)) | 566 | if (pte_none(ptent)) { |
567 | (*zap_work)--; | ||
567 | continue; | 568 | continue; |
569 | } | ||
568 | if (pte_present(ptent)) { | 570 | if (pte_present(ptent)) { |
569 | struct page *page = NULL; | 571 | struct page *page = NULL; |
572 | |||
573 | (*zap_work) -= PAGE_SIZE; | ||
574 | |||
570 | if (!(vma->vm_flags & VM_RESERVED)) { | 575 | if (!(vma->vm_flags & VM_RESERVED)) { |
571 | unsigned long pfn = pte_pfn(ptent); | 576 | unsigned long pfn = pte_pfn(ptent); |
572 | if (unlikely(!pfn_valid(pfn))) | 577 | if (unlikely(!pfn_valid(pfn))) |
@@ -624,16 +629,18 @@ static void zap_pte_range(struct mmu_gather *tlb, | |||
624 | if (!pte_file(ptent)) | 629 | if (!pte_file(ptent)) |
625 | free_swap_and_cache(pte_to_swp_entry(ptent)); | 630 | free_swap_and_cache(pte_to_swp_entry(ptent)); |
626 | pte_clear_full(mm, addr, pte, tlb->fullmm); | 631 | pte_clear_full(mm, addr, pte, tlb->fullmm); |
627 | } while (pte++, addr += PAGE_SIZE, addr != end); | 632 | } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); |
628 | 633 | ||
629 | add_mm_rss(mm, file_rss, anon_rss); | 634 | add_mm_rss(mm, file_rss, anon_rss); |
630 | pte_unmap_unlock(pte - 1, ptl); | 635 | pte_unmap_unlock(pte - 1, ptl); |
636 | |||
637 | return addr; | ||
631 | } | 638 | } |
632 | 639 | ||
633 | static inline void zap_pmd_range(struct mmu_gather *tlb, | 640 | static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, |
634 | struct vm_area_struct *vma, pud_t *pud, | 641 | struct vm_area_struct *vma, pud_t *pud, |
635 | unsigned long addr, unsigned long end, | 642 | unsigned long addr, unsigned long end, |
636 | struct zap_details *details) | 643 | long *zap_work, struct zap_details *details) |
637 | { | 644 | { |
638 | pmd_t *pmd; | 645 | pmd_t *pmd; |
639 | unsigned long next; | 646 | unsigned long next; |
@@ -641,16 +648,21 @@ static inline void zap_pmd_range(struct mmu_gather *tlb, | |||
641 | pmd = pmd_offset(pud, addr); | 648 | pmd = pmd_offset(pud, addr); |
642 | do { | 649 | do { |
643 | next = pmd_addr_end(addr, end); | 650 | next = pmd_addr_end(addr, end); |
644 | if (pmd_none_or_clear_bad(pmd)) | 651 | if (pmd_none_or_clear_bad(pmd)) { |
652 | (*zap_work)--; | ||
645 | continue; | 653 | continue; |
646 | zap_pte_range(tlb, vma, pmd, addr, next, details); | 654 | } |
647 | } while (pmd++, addr = next, addr != end); | 655 | next = zap_pte_range(tlb, vma, pmd, addr, next, |
656 | zap_work, details); | ||
657 | } while (pmd++, addr = next, (addr != end && *zap_work > 0)); | ||
658 | |||
659 | return addr; | ||
648 | } | 660 | } |
649 | 661 | ||
650 | static inline void zap_pud_range(struct mmu_gather *tlb, | 662 | static inline unsigned long zap_pud_range(struct mmu_gather *tlb, |
651 | struct vm_area_struct *vma, pgd_t *pgd, | 663 | struct vm_area_struct *vma, pgd_t *pgd, |
652 | unsigned long addr, unsigned long end, | 664 | unsigned long addr, unsigned long end, |
653 | struct zap_details *details) | 665 | long *zap_work, struct zap_details *details) |
654 | { | 666 | { |
655 | pud_t *pud; | 667 | pud_t *pud; |
656 | unsigned long next; | 668 | unsigned long next; |
@@ -658,15 +670,21 @@ static inline void zap_pud_range(struct mmu_gather *tlb, | |||
658 | pud = pud_offset(pgd, addr); | 670 | pud = pud_offset(pgd, addr); |
659 | do { | 671 | do { |
660 | next = pud_addr_end(addr, end); | 672 | next = pud_addr_end(addr, end); |
661 | if (pud_none_or_clear_bad(pud)) | 673 | if (pud_none_or_clear_bad(pud)) { |
674 | (*zap_work)--; | ||
662 | continue; | 675 | continue; |
663 | zap_pmd_range(tlb, vma, pud, addr, next, details); | 676 | } |
664 | } while (pud++, addr = next, addr != end); | 677 | next = zap_pmd_range(tlb, vma, pud, addr, next, |
678 | zap_work, details); | ||
679 | } while (pud++, addr = next, (addr != end && *zap_work > 0)); | ||
680 | |||
681 | return addr; | ||
665 | } | 682 | } |
666 | 683 | ||
667 | static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | 684 | static unsigned long unmap_page_range(struct mmu_gather *tlb, |
685 | struct vm_area_struct *vma, | ||
668 | unsigned long addr, unsigned long end, | 686 | unsigned long addr, unsigned long end, |
669 | struct zap_details *details) | 687 | long *zap_work, struct zap_details *details) |
670 | { | 688 | { |
671 | pgd_t *pgd; | 689 | pgd_t *pgd; |
672 | unsigned long next; | 690 | unsigned long next; |
@@ -679,11 +697,16 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
679 | pgd = pgd_offset(vma->vm_mm, addr); | 697 | pgd = pgd_offset(vma->vm_mm, addr); |
680 | do { | 698 | do { |
681 | next = pgd_addr_end(addr, end); | 699 | next = pgd_addr_end(addr, end); |
682 | if (pgd_none_or_clear_bad(pgd)) | 700 | if (pgd_none_or_clear_bad(pgd)) { |
701 | (*zap_work)--; | ||
683 | continue; | 702 | continue; |
684 | zap_pud_range(tlb, vma, pgd, addr, next, details); | 703 | } |
685 | } while (pgd++, addr = next, addr != end); | 704 | next = zap_pud_range(tlb, vma, pgd, addr, next, |
705 | zap_work, details); | ||
706 | } while (pgd++, addr = next, (addr != end && *zap_work > 0)); | ||
686 | tlb_end_vma(tlb, vma); | 707 | tlb_end_vma(tlb, vma); |
708 | |||
709 | return addr; | ||
687 | } | 710 | } |
688 | 711 | ||
689 | #ifdef CONFIG_PREEMPT | 712 | #ifdef CONFIG_PREEMPT |
@@ -724,7 +747,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
724 | unsigned long end_addr, unsigned long *nr_accounted, | 747 | unsigned long end_addr, unsigned long *nr_accounted, |
725 | struct zap_details *details) | 748 | struct zap_details *details) |
726 | { | 749 | { |
727 | unsigned long zap_bytes = ZAP_BLOCK_SIZE; | 750 | long zap_work = ZAP_BLOCK_SIZE; |
728 | unsigned long tlb_start = 0; /* For tlb_finish_mmu */ | 751 | unsigned long tlb_start = 0; /* For tlb_finish_mmu */ |
729 | int tlb_start_valid = 0; | 752 | int tlb_start_valid = 0; |
730 | unsigned long start = start_addr; | 753 | unsigned long start = start_addr; |
@@ -745,27 +768,25 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
745 | *nr_accounted += (end - start) >> PAGE_SHIFT; | 768 | *nr_accounted += (end - start) >> PAGE_SHIFT; |
746 | 769 | ||
747 | while (start != end) { | 770 | while (start != end) { |
748 | unsigned long block; | ||
749 | |||
750 | if (!tlb_start_valid) { | 771 | if (!tlb_start_valid) { |
751 | tlb_start = start; | 772 | tlb_start = start; |
752 | tlb_start_valid = 1; | 773 | tlb_start_valid = 1; |
753 | } | 774 | } |
754 | 775 | ||
755 | if (is_vm_hugetlb_page(vma)) { | 776 | if (unlikely(is_vm_hugetlb_page(vma))) { |
756 | block = end - start; | ||
757 | unmap_hugepage_range(vma, start, end); | 777 | unmap_hugepage_range(vma, start, end); |
758 | } else { | 778 | zap_work -= (end - start) / |
759 | block = min(zap_bytes, end - start); | 779 | (HPAGE_SIZE / PAGE_SIZE); |
760 | unmap_page_range(*tlbp, vma, start, | 780 | start = end; |
761 | start + block, details); | 781 | } else |
782 | start = unmap_page_range(*tlbp, vma, | ||
783 | start, end, &zap_work, details); | ||
784 | |||
785 | if (zap_work > 0) { | ||
786 | BUG_ON(start != end); | ||
787 | break; | ||
762 | } | 788 | } |
763 | 789 | ||
764 | start += block; | ||
765 | zap_bytes -= block; | ||
766 | if ((long)zap_bytes > 0) | ||
767 | continue; | ||
768 | |||
769 | tlb_finish_mmu(*tlbp, tlb_start, start); | 790 | tlb_finish_mmu(*tlbp, tlb_start, start); |
770 | 791 | ||
771 | if (need_resched() || | 792 | if (need_resched() || |
@@ -779,7 +800,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
779 | 800 | ||
780 | *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm); | 801 | *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm); |
781 | tlb_start_valid = 0; | 802 | tlb_start_valid = 0; |
782 | zap_bytes = ZAP_BLOCK_SIZE; | 803 | zap_work = ZAP_BLOCK_SIZE; |
783 | } | 804 | } |
784 | } | 805 | } |
785 | out: | 806 | out: |