aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobin Holt <holt@sgi.com>2005-11-13 19:06:42 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-13 21:14:12 -0500
commit51c6f666fceb3184eeff045dad4432b602cd648e (patch)
tree33e29916e0fea872ba6f29eba698219a740b078f
parent885036d32f5d3c427c3e2b385b5a5503805e3e52 (diff)
[PATCH] mm: ZAP_BLOCK causes redundant work
The address based work estimate for unmapping (for lockbreak) is and always was horribly inefficient for sparse mappings. The problem is most simply explained with an example: If we find a pgd is clear, we still have to call into unmap_page_range PGDIR_SIZE / ZAP_BLOCK_SIZE times, each time checking the clear pgd, in order to progress the working address to the next pgd. The fundamental way to solve the problem is to keep track of the end address we've processed and pass it back to the higher layers. From: Nick Piggin <npiggin@suse.de> Modification to completely get away from address based work estimate and instead use an abstract count, with a very small cost for empty entries as opposed to present pages. On 2.6.14-git2, ppc64, and CONFIG_PREEMPT=y, mapping and unmapping 1TB of virtual address space takes 1.69s; with the following patch applied, this operation can be done 1000 times in less than 0.01s From: Andrew Morton <akpm@osdl.org> With CONFIG_HUTETLB_PAGE=n: mm/memory.c: In function `unmap_vmas': mm/memory.c:779: warning: division by zero Due to zap_work -= (end - start) / (HPAGE_SIZE / PAGE_SIZE); So make the dummy HPAGE_SIZE non-zero Signed-off-by: Robin Holt <holt@sgi.com> Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/hugetlb.h4
-rw-r--r--mm/memory.c89
2 files changed, 57 insertions, 36 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0cea162b08c0..1056717ee501 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -102,8 +102,8 @@ static inline unsigned long hugetlb_total_pages(void)
102#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) 102#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; })
103 103
104#ifndef HPAGE_MASK 104#ifndef HPAGE_MASK
105#define HPAGE_MASK 0 /* Keep the compiler happy */ 105#define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */
106#define HPAGE_SIZE 0 106#define HPAGE_SIZE PAGE_SIZE
107#endif 107#endif
108 108
109#endif /* !CONFIG_HUGETLB_PAGE */ 109#endif /* !CONFIG_HUGETLB_PAGE */
diff --git a/mm/memory.c b/mm/memory.c
index 0f60baf6f69b..2998cfc12f5b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -549,10 +549,10 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
549 return 0; 549 return 0;
550} 550}
551 551
552static void zap_pte_range(struct mmu_gather *tlb, 552static unsigned long zap_pte_range(struct mmu_gather *tlb,
553 struct vm_area_struct *vma, pmd_t *pmd, 553 struct vm_area_struct *vma, pmd_t *pmd,
554 unsigned long addr, unsigned long end, 554 unsigned long addr, unsigned long end,
555 struct zap_details *details) 555 long *zap_work, struct zap_details *details)
556{ 556{
557 struct mm_struct *mm = tlb->mm; 557 struct mm_struct *mm = tlb->mm;
558 pte_t *pte; 558 pte_t *pte;
@@ -563,10 +563,15 @@ static void zap_pte_range(struct mmu_gather *tlb,
563 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 563 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
564 do { 564 do {
565 pte_t ptent = *pte; 565 pte_t ptent = *pte;
566 if (pte_none(ptent)) 566 if (pte_none(ptent)) {
567 (*zap_work)--;
567 continue; 568 continue;
569 }
568 if (pte_present(ptent)) { 570 if (pte_present(ptent)) {
569 struct page *page = NULL; 571 struct page *page = NULL;
572
573 (*zap_work) -= PAGE_SIZE;
574
570 if (!(vma->vm_flags & VM_RESERVED)) { 575 if (!(vma->vm_flags & VM_RESERVED)) {
571 unsigned long pfn = pte_pfn(ptent); 576 unsigned long pfn = pte_pfn(ptent);
572 if (unlikely(!pfn_valid(pfn))) 577 if (unlikely(!pfn_valid(pfn)))
@@ -624,16 +629,18 @@ static void zap_pte_range(struct mmu_gather *tlb,
624 if (!pte_file(ptent)) 629 if (!pte_file(ptent))
625 free_swap_and_cache(pte_to_swp_entry(ptent)); 630 free_swap_and_cache(pte_to_swp_entry(ptent));
626 pte_clear_full(mm, addr, pte, tlb->fullmm); 631 pte_clear_full(mm, addr, pte, tlb->fullmm);
627 } while (pte++, addr += PAGE_SIZE, addr != end); 632 } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
628 633
629 add_mm_rss(mm, file_rss, anon_rss); 634 add_mm_rss(mm, file_rss, anon_rss);
630 pte_unmap_unlock(pte - 1, ptl); 635 pte_unmap_unlock(pte - 1, ptl);
636
637 return addr;
631} 638}
632 639
633static inline void zap_pmd_range(struct mmu_gather *tlb, 640static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
634 struct vm_area_struct *vma, pud_t *pud, 641 struct vm_area_struct *vma, pud_t *pud,
635 unsigned long addr, unsigned long end, 642 unsigned long addr, unsigned long end,
636 struct zap_details *details) 643 long *zap_work, struct zap_details *details)
637{ 644{
638 pmd_t *pmd; 645 pmd_t *pmd;
639 unsigned long next; 646 unsigned long next;
@@ -641,16 +648,21 @@ static inline void zap_pmd_range(struct mmu_gather *tlb,
641 pmd = pmd_offset(pud, addr); 648 pmd = pmd_offset(pud, addr);
642 do { 649 do {
643 next = pmd_addr_end(addr, end); 650 next = pmd_addr_end(addr, end);
644 if (pmd_none_or_clear_bad(pmd)) 651 if (pmd_none_or_clear_bad(pmd)) {
652 (*zap_work)--;
645 continue; 653 continue;
646 zap_pte_range(tlb, vma, pmd, addr, next, details); 654 }
647 } while (pmd++, addr = next, addr != end); 655 next = zap_pte_range(tlb, vma, pmd, addr, next,
656 zap_work, details);
657 } while (pmd++, addr = next, (addr != end && *zap_work > 0));
658
659 return addr;
648} 660}
649 661
650static inline void zap_pud_range(struct mmu_gather *tlb, 662static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
651 struct vm_area_struct *vma, pgd_t *pgd, 663 struct vm_area_struct *vma, pgd_t *pgd,
652 unsigned long addr, unsigned long end, 664 unsigned long addr, unsigned long end,
653 struct zap_details *details) 665 long *zap_work, struct zap_details *details)
654{ 666{
655 pud_t *pud; 667 pud_t *pud;
656 unsigned long next; 668 unsigned long next;
@@ -658,15 +670,21 @@ static inline void zap_pud_range(struct mmu_gather *tlb,
658 pud = pud_offset(pgd, addr); 670 pud = pud_offset(pgd, addr);
659 do { 671 do {
660 next = pud_addr_end(addr, end); 672 next = pud_addr_end(addr, end);
661 if (pud_none_or_clear_bad(pud)) 673 if (pud_none_or_clear_bad(pud)) {
674 (*zap_work)--;
662 continue; 675 continue;
663 zap_pmd_range(tlb, vma, pud, addr, next, details); 676 }
664 } while (pud++, addr = next, addr != end); 677 next = zap_pmd_range(tlb, vma, pud, addr, next,
678 zap_work, details);
679 } while (pud++, addr = next, (addr != end && *zap_work > 0));
680
681 return addr;
665} 682}
666 683
667static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, 684static unsigned long unmap_page_range(struct mmu_gather *tlb,
685 struct vm_area_struct *vma,
668 unsigned long addr, unsigned long end, 686 unsigned long addr, unsigned long end,
669 struct zap_details *details) 687 long *zap_work, struct zap_details *details)
670{ 688{
671 pgd_t *pgd; 689 pgd_t *pgd;
672 unsigned long next; 690 unsigned long next;
@@ -679,11 +697,16 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
679 pgd = pgd_offset(vma->vm_mm, addr); 697 pgd = pgd_offset(vma->vm_mm, addr);
680 do { 698 do {
681 next = pgd_addr_end(addr, end); 699 next = pgd_addr_end(addr, end);
682 if (pgd_none_or_clear_bad(pgd)) 700 if (pgd_none_or_clear_bad(pgd)) {
701 (*zap_work)--;
683 continue; 702 continue;
684 zap_pud_range(tlb, vma, pgd, addr, next, details); 703 }
685 } while (pgd++, addr = next, addr != end); 704 next = zap_pud_range(tlb, vma, pgd, addr, next,
705 zap_work, details);
706 } while (pgd++, addr = next, (addr != end && *zap_work > 0));
686 tlb_end_vma(tlb, vma); 707 tlb_end_vma(tlb, vma);
708
709 return addr;
687} 710}
688 711
689#ifdef CONFIG_PREEMPT 712#ifdef CONFIG_PREEMPT
@@ -724,7 +747,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
724 unsigned long end_addr, unsigned long *nr_accounted, 747 unsigned long end_addr, unsigned long *nr_accounted,
725 struct zap_details *details) 748 struct zap_details *details)
726{ 749{
727 unsigned long zap_bytes = ZAP_BLOCK_SIZE; 750 long zap_work = ZAP_BLOCK_SIZE;
728 unsigned long tlb_start = 0; /* For tlb_finish_mmu */ 751 unsigned long tlb_start = 0; /* For tlb_finish_mmu */
729 int tlb_start_valid = 0; 752 int tlb_start_valid = 0;
730 unsigned long start = start_addr; 753 unsigned long start = start_addr;
@@ -745,27 +768,25 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
745 *nr_accounted += (end - start) >> PAGE_SHIFT; 768 *nr_accounted += (end - start) >> PAGE_SHIFT;
746 769
747 while (start != end) { 770 while (start != end) {
748 unsigned long block;
749
750 if (!tlb_start_valid) { 771 if (!tlb_start_valid) {
751 tlb_start = start; 772 tlb_start = start;
752 tlb_start_valid = 1; 773 tlb_start_valid = 1;
753 } 774 }
754 775
755 if (is_vm_hugetlb_page(vma)) { 776 if (unlikely(is_vm_hugetlb_page(vma))) {
756 block = end - start;
757 unmap_hugepage_range(vma, start, end); 777 unmap_hugepage_range(vma, start, end);
758 } else { 778 zap_work -= (end - start) /
759 block = min(zap_bytes, end - start); 779 (HPAGE_SIZE / PAGE_SIZE);
760 unmap_page_range(*tlbp, vma, start, 780 start = end;
761 start + block, details); 781 } else
782 start = unmap_page_range(*tlbp, vma,
783 start, end, &zap_work, details);
784
785 if (zap_work > 0) {
786 BUG_ON(start != end);
787 break;
762 } 788 }
763 789
764 start += block;
765 zap_bytes -= block;
766 if ((long)zap_bytes > 0)
767 continue;
768
769 tlb_finish_mmu(*tlbp, tlb_start, start); 790 tlb_finish_mmu(*tlbp, tlb_start, start);
770 791
771 if (need_resched() || 792 if (need_resched() ||
@@ -779,7 +800,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
779 800
780 *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm); 801 *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
781 tlb_start_valid = 0; 802 tlb_start_valid = 0;
782 zap_bytes = ZAP_BLOCK_SIZE; 803 zap_work = ZAP_BLOCK_SIZE;
783 } 804 }
784 } 805 }
785out: 806out: