diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-10-29 21:16:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:41 -0400 |
commit | 508034a32b819a2d40aa7ac0dbc8cd2e044c2de6 (patch) | |
tree | 906a8f0095af24f403b30d649d3ec1ffb4ff2f50 /mm/memory.c | |
parent | 8f4f8c164cb4af1432cc25eda82928ea4519ba72 (diff) |
[PATCH] mm: unmap_vmas with inner ptlock
Remove the page_table_lock from around the calls to unmap_vmas, and replace
the pte_offset_map in zap_pte_range by pte_offset_map_lock: all callers are
now safe to descend without page_table_lock.
Don't attempt fancy locking for hugepages, just take page_table_lock in
unmap_hugepage_range. Which makes zap_hugepage_range, and the hugetlb test in
zap_page_range, redundant: unmap_vmas calls unmap_hugepage_range anyway. Nor
does unmap_vmas have much use for its mm arg now.
The tlb_start_vma and tlb_end_vma in unmap_page_range are now called without
page_table_lock: if they're implemented at all, they typically come down to
flush_cache_range (usually done outside page_table_lock) and flush_tlb_range
(which we already audited for the mprotect case).
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 41 |
1 files changed, 12 insertions, 29 deletions
diff --git a/mm/memory.c b/mm/memory.c index 4ea89a2e3a83..622a4ef5409f 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -551,10 +551,11 @@ static void zap_pte_range(struct mmu_gather *tlb, | |||
551 | { | 551 | { |
552 | struct mm_struct *mm = tlb->mm; | 552 | struct mm_struct *mm = tlb->mm; |
553 | pte_t *pte; | 553 | pte_t *pte; |
554 | spinlock_t *ptl; | ||
554 | int file_rss = 0; | 555 | int file_rss = 0; |
555 | int anon_rss = 0; | 556 | int anon_rss = 0; |
556 | 557 | ||
557 | pte = pte_offset_map(pmd, addr); | 558 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
558 | do { | 559 | do { |
559 | pte_t ptent = *pte; | 560 | pte_t ptent = *pte; |
560 | if (pte_none(ptent)) | 561 | if (pte_none(ptent)) |
@@ -621,7 +622,7 @@ static void zap_pte_range(struct mmu_gather *tlb, | |||
621 | } while (pte++, addr += PAGE_SIZE, addr != end); | 622 | } while (pte++, addr += PAGE_SIZE, addr != end); |
622 | 623 | ||
623 | add_mm_rss(mm, file_rss, anon_rss); | 624 | add_mm_rss(mm, file_rss, anon_rss); |
624 | pte_unmap(pte - 1); | 625 | pte_unmap_unlock(pte - 1, ptl); |
625 | } | 626 | } |
626 | 627 | ||
627 | static inline void zap_pmd_range(struct mmu_gather *tlb, | 628 | static inline void zap_pmd_range(struct mmu_gather *tlb, |
@@ -690,7 +691,6 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
690 | /** | 691 | /** |
691 | * unmap_vmas - unmap a range of memory covered by a list of vma's | 692 | * unmap_vmas - unmap a range of memory covered by a list of vma's |
692 | * @tlbp: address of the caller's struct mmu_gather | 693 | * @tlbp: address of the caller's struct mmu_gather |
693 | * @mm: the controlling mm_struct | ||
694 | * @vma: the starting vma | 694 | * @vma: the starting vma |
695 | * @start_addr: virtual address at which to start unmapping | 695 | * @start_addr: virtual address at which to start unmapping |
696 | * @end_addr: virtual address at which to end unmapping | 696 | * @end_addr: virtual address at which to end unmapping |
@@ -699,10 +699,10 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
699 | * | 699 | * |
700 | * Returns the end address of the unmapping (restart addr if interrupted). | 700 | * Returns the end address of the unmapping (restart addr if interrupted). |
701 | * | 701 | * |
702 | * Unmap all pages in the vma list. Called under page_table_lock. | 702 | * Unmap all pages in the vma list. |
703 | * | 703 | * |
704 | * We aim to not hold page_table_lock for too long (for scheduling latency | 704 | * We aim to not hold locks for too long (for scheduling latency reasons). |
705 | * reasons). So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to | 705 | * So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to |
706 | * return the ending mmu_gather to the caller. | 706 | * return the ending mmu_gather to the caller. |
707 | * | 707 | * |
708 | * Only addresses between `start' and `end' will be unmapped. | 708 | * Only addresses between `start' and `end' will be unmapped. |
@@ -714,7 +714,7 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
714 | * ensure that any thus-far unmapped pages are flushed before unmap_vmas() | 714 | * ensure that any thus-far unmapped pages are flushed before unmap_vmas() |
715 | * drops the lock and schedules. | 715 | * drops the lock and schedules. |
716 | */ | 716 | */ |
717 | unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, | 717 | unsigned long unmap_vmas(struct mmu_gather **tlbp, |
718 | struct vm_area_struct *vma, unsigned long start_addr, | 718 | struct vm_area_struct *vma, unsigned long start_addr, |
719 | unsigned long end_addr, unsigned long *nr_accounted, | 719 | unsigned long end_addr, unsigned long *nr_accounted, |
720 | struct zap_details *details) | 720 | struct zap_details *details) |
@@ -764,19 +764,15 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, | |||
764 | tlb_finish_mmu(*tlbp, tlb_start, start); | 764 | tlb_finish_mmu(*tlbp, tlb_start, start); |
765 | 765 | ||
766 | if (need_resched() || | 766 | if (need_resched() || |
767 | need_lockbreak(&mm->page_table_lock) || | ||
768 | (i_mmap_lock && need_lockbreak(i_mmap_lock))) { | 767 | (i_mmap_lock && need_lockbreak(i_mmap_lock))) { |
769 | if (i_mmap_lock) { | 768 | if (i_mmap_lock) { |
770 | /* must reset count of rss freed */ | 769 | *tlbp = NULL; |
771 | *tlbp = tlb_gather_mmu(mm, fullmm); | ||
772 | goto out; | 770 | goto out; |
773 | } | 771 | } |
774 | spin_unlock(&mm->page_table_lock); | ||
775 | cond_resched(); | 772 | cond_resched(); |
776 | spin_lock(&mm->page_table_lock); | ||
777 | } | 773 | } |
778 | 774 | ||
779 | *tlbp = tlb_gather_mmu(mm, fullmm); | 775 | *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm); |
780 | tlb_start_valid = 0; | 776 | tlb_start_valid = 0; |
781 | zap_bytes = ZAP_BLOCK_SIZE; | 777 | zap_bytes = ZAP_BLOCK_SIZE; |
782 | } | 778 | } |
@@ -800,18 +796,12 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | |||
800 | unsigned long end = address + size; | 796 | unsigned long end = address + size; |
801 | unsigned long nr_accounted = 0; | 797 | unsigned long nr_accounted = 0; |
802 | 798 | ||
803 | if (is_vm_hugetlb_page(vma)) { | ||
804 | zap_hugepage_range(vma, address, size); | ||
805 | return end; | ||
806 | } | ||
807 | |||
808 | lru_add_drain(); | 799 | lru_add_drain(); |
809 | tlb = tlb_gather_mmu(mm, 0); | 800 | tlb = tlb_gather_mmu(mm, 0); |
810 | update_hiwater_rss(mm); | 801 | update_hiwater_rss(mm); |
811 | spin_lock(&mm->page_table_lock); | 802 | end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); |
812 | end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details); | 803 | if (tlb) |
813 | spin_unlock(&mm->page_table_lock); | 804 | tlb_finish_mmu(tlb, address, end); |
814 | tlb_finish_mmu(tlb, address, end); | ||
815 | return end; | 805 | return end; |
816 | } | 806 | } |
817 | 807 | ||
@@ -1434,13 +1424,6 @@ again: | |||
1434 | 1424 | ||
1435 | restart_addr = zap_page_range(vma, start_addr, | 1425 | restart_addr = zap_page_range(vma, start_addr, |
1436 | end_addr - start_addr, details); | 1426 | end_addr - start_addr, details); |
1437 | |||
1438 | /* | ||
1439 | * We cannot rely on the break test in unmap_vmas: | ||
1440 | * on the one hand, we don't want to restart our loop | ||
1441 | * just because that broke out for the page_table_lock; | ||
1442 | * on the other hand, it does no test when vma is small. | ||
1443 | */ | ||
1444 | need_break = need_resched() || | 1427 | need_break = need_resched() || |
1445 | need_lockbreak(details->i_mmap_lock); | 1428 | need_lockbreak(details->i_mmap_lock); |
1446 | 1429 | ||