diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-10-29 21:16:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:41 -0400 |
commit | 508034a32b819a2d40aa7ac0dbc8cd2e044c2de6 (patch) | |
tree | 906a8f0095af24f403b30d649d3ec1ffb4ff2f50 /mm | |
parent | 8f4f8c164cb4af1432cc25eda82928ea4519ba72 (diff) |
[PATCH] mm: unmap_vmas with inner ptlock
Remove the page_table_lock from around the calls to unmap_vmas, and replace
the pte_offset_map in zap_pte_range by pte_offset_map_lock: all callers are
now safe to descend without page_table_lock.
Don't attempt fancy locking for hugepages, just take page_table_lock in
unmap_hugepage_range. Which makes zap_hugepage_range, and the hugetlb test in
zap_page_range, redundant: unmap_vmas calls unmap_hugepage_range anyway. Nor
does unmap_vmas have much use for its mm arg now.
The tlb_start_vma and tlb_end_vma in unmap_page_range are now called without
page_table_lock: if they're implemented at all, they typically come down to
flush_cache_range (usually done outside page_table_lock) and flush_tlb_range
(which we already audited for the mprotect case).
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 12 | ||||
-rw-r--r-- | mm/memory.c | 41 | ||||
-rw-r--r-- | mm/mmap.c | 8 |
3 files changed, 17 insertions, 44 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ea0826ff2663..f29b7dc02c39 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -314,6 +314,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
314 | BUG_ON(start & ~HPAGE_MASK); | 314 | BUG_ON(start & ~HPAGE_MASK); |
315 | BUG_ON(end & ~HPAGE_MASK); | 315 | BUG_ON(end & ~HPAGE_MASK); |
316 | 316 | ||
317 | spin_lock(&mm->page_table_lock); | ||
318 | |||
317 | /* Update high watermark before we lower rss */ | 319 | /* Update high watermark before we lower rss */ |
318 | update_hiwater_rss(mm); | 320 | update_hiwater_rss(mm); |
319 | 321 | ||
@@ -333,17 +335,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
333 | put_page(page); | 335 | put_page(page); |
334 | add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE)); | 336 | add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE)); |
335 | } | 337 | } |
336 | flush_tlb_range(vma, start, end); | ||
337 | } | ||
338 | 338 | ||
339 | void zap_hugepage_range(struct vm_area_struct *vma, | ||
340 | unsigned long start, unsigned long length) | ||
341 | { | ||
342 | struct mm_struct *mm = vma->vm_mm; | ||
343 | |||
344 | spin_lock(&mm->page_table_lock); | ||
345 | unmap_hugepage_range(vma, start, start + length); | ||
346 | spin_unlock(&mm->page_table_lock); | 339 | spin_unlock(&mm->page_table_lock); |
340 | flush_tlb_range(vma, start, end); | ||
347 | } | 341 | } |
348 | 342 | ||
349 | int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) | 343 | int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) |
diff --git a/mm/memory.c b/mm/memory.c index 4ea89a2e3a83..622a4ef5409f 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -551,10 +551,11 @@ static void zap_pte_range(struct mmu_gather *tlb, | |||
551 | { | 551 | { |
552 | struct mm_struct *mm = tlb->mm; | 552 | struct mm_struct *mm = tlb->mm; |
553 | pte_t *pte; | 553 | pte_t *pte; |
554 | spinlock_t *ptl; | ||
554 | int file_rss = 0; | 555 | int file_rss = 0; |
555 | int anon_rss = 0; | 556 | int anon_rss = 0; |
556 | 557 | ||
557 | pte = pte_offset_map(pmd, addr); | 558 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
558 | do { | 559 | do { |
559 | pte_t ptent = *pte; | 560 | pte_t ptent = *pte; |
560 | if (pte_none(ptent)) | 561 | if (pte_none(ptent)) |
@@ -621,7 +622,7 @@ static void zap_pte_range(struct mmu_gather *tlb, | |||
621 | } while (pte++, addr += PAGE_SIZE, addr != end); | 622 | } while (pte++, addr += PAGE_SIZE, addr != end); |
622 | 623 | ||
623 | add_mm_rss(mm, file_rss, anon_rss); | 624 | add_mm_rss(mm, file_rss, anon_rss); |
624 | pte_unmap(pte - 1); | 625 | pte_unmap_unlock(pte - 1, ptl); |
625 | } | 626 | } |
626 | 627 | ||
627 | static inline void zap_pmd_range(struct mmu_gather *tlb, | 628 | static inline void zap_pmd_range(struct mmu_gather *tlb, |
@@ -690,7 +691,6 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
690 | /** | 691 | /** |
691 | * unmap_vmas - unmap a range of memory covered by a list of vma's | 692 | * unmap_vmas - unmap a range of memory covered by a list of vma's |
692 | * @tlbp: address of the caller's struct mmu_gather | 693 | * @tlbp: address of the caller's struct mmu_gather |
693 | * @mm: the controlling mm_struct | ||
694 | * @vma: the starting vma | 694 | * @vma: the starting vma |
695 | * @start_addr: virtual address at which to start unmapping | 695 | * @start_addr: virtual address at which to start unmapping |
696 | * @end_addr: virtual address at which to end unmapping | 696 | * @end_addr: virtual address at which to end unmapping |
@@ -699,10 +699,10 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
699 | * | 699 | * |
700 | * Returns the end address of the unmapping (restart addr if interrupted). | 700 | * Returns the end address of the unmapping (restart addr if interrupted). |
701 | * | 701 | * |
702 | * Unmap all pages in the vma list. Called under page_table_lock. | 702 | * Unmap all pages in the vma list. |
703 | * | 703 | * |
704 | * We aim to not hold page_table_lock for too long (for scheduling latency | 704 | * We aim to not hold locks for too long (for scheduling latency reasons). |
705 | * reasons). So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to | 705 | * So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to |
706 | * return the ending mmu_gather to the caller. | 706 | * return the ending mmu_gather to the caller. |
707 | * | 707 | * |
708 | * Only addresses between `start' and `end' will be unmapped. | 708 | * Only addresses between `start' and `end' will be unmapped. |
@@ -714,7 +714,7 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
714 | * ensure that any thus-far unmapped pages are flushed before unmap_vmas() | 714 | * ensure that any thus-far unmapped pages are flushed before unmap_vmas() |
715 | * drops the lock and schedules. | 715 | * drops the lock and schedules. |
716 | */ | 716 | */ |
717 | unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, | 717 | unsigned long unmap_vmas(struct mmu_gather **tlbp, |
718 | struct vm_area_struct *vma, unsigned long start_addr, | 718 | struct vm_area_struct *vma, unsigned long start_addr, |
719 | unsigned long end_addr, unsigned long *nr_accounted, | 719 | unsigned long end_addr, unsigned long *nr_accounted, |
720 | struct zap_details *details) | 720 | struct zap_details *details) |
@@ -764,19 +764,15 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, | |||
764 | tlb_finish_mmu(*tlbp, tlb_start, start); | 764 | tlb_finish_mmu(*tlbp, tlb_start, start); |
765 | 765 | ||
766 | if (need_resched() || | 766 | if (need_resched() || |
767 | need_lockbreak(&mm->page_table_lock) || | ||
768 | (i_mmap_lock && need_lockbreak(i_mmap_lock))) { | 767 | (i_mmap_lock && need_lockbreak(i_mmap_lock))) { |
769 | if (i_mmap_lock) { | 768 | if (i_mmap_lock) { |
770 | /* must reset count of rss freed */ | 769 | *tlbp = NULL; |
771 | *tlbp = tlb_gather_mmu(mm, fullmm); | ||
772 | goto out; | 770 | goto out; |
773 | } | 771 | } |
774 | spin_unlock(&mm->page_table_lock); | ||
775 | cond_resched(); | 772 | cond_resched(); |
776 | spin_lock(&mm->page_table_lock); | ||
777 | } | 773 | } |
778 | 774 | ||
779 | *tlbp = tlb_gather_mmu(mm, fullmm); | 775 | *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm); |
780 | tlb_start_valid = 0; | 776 | tlb_start_valid = 0; |
781 | zap_bytes = ZAP_BLOCK_SIZE; | 777 | zap_bytes = ZAP_BLOCK_SIZE; |
782 | } | 778 | } |
@@ -800,18 +796,12 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | |||
800 | unsigned long end = address + size; | 796 | unsigned long end = address + size; |
801 | unsigned long nr_accounted = 0; | 797 | unsigned long nr_accounted = 0; |
802 | 798 | ||
803 | if (is_vm_hugetlb_page(vma)) { | ||
804 | zap_hugepage_range(vma, address, size); | ||
805 | return end; | ||
806 | } | ||
807 | |||
808 | lru_add_drain(); | 799 | lru_add_drain(); |
809 | tlb = tlb_gather_mmu(mm, 0); | 800 | tlb = tlb_gather_mmu(mm, 0); |
810 | update_hiwater_rss(mm); | 801 | update_hiwater_rss(mm); |
811 | spin_lock(&mm->page_table_lock); | 802 | end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); |
812 | end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details); | 803 | if (tlb) |
813 | spin_unlock(&mm->page_table_lock); | 804 | tlb_finish_mmu(tlb, address, end); |
814 | tlb_finish_mmu(tlb, address, end); | ||
815 | return end; | 805 | return end; |
816 | } | 806 | } |
817 | 807 | ||
@@ -1434,13 +1424,6 @@ again: | |||
1434 | 1424 | ||
1435 | restart_addr = zap_page_range(vma, start_addr, | 1425 | restart_addr = zap_page_range(vma, start_addr, |
1436 | end_addr - start_addr, details); | 1426 | end_addr - start_addr, details); |
1437 | |||
1438 | /* | ||
1439 | * We cannot rely on the break test in unmap_vmas: | ||
1440 | * on the one hand, we don't want to restart our loop | ||
1441 | * just because that broke out for the page_table_lock; | ||
1442 | * on the other hand, it does no test when vma is small. | ||
1443 | */ | ||
1444 | need_break = need_resched() || | 1427 | need_break = need_resched() || |
1445 | need_lockbreak(details->i_mmap_lock); | 1428 | need_lockbreak(details->i_mmap_lock); |
1446 | 1429 | ||
@@ -1673,9 +1673,7 @@ static void unmap_region(struct mm_struct *mm, | |||
1673 | lru_add_drain(); | 1673 | lru_add_drain(); |
1674 | tlb = tlb_gather_mmu(mm, 0); | 1674 | tlb = tlb_gather_mmu(mm, 0); |
1675 | update_hiwater_rss(mm); | 1675 | update_hiwater_rss(mm); |
1676 | spin_lock(&mm->page_table_lock); | 1676 | unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); |
1677 | unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL); | ||
1678 | spin_unlock(&mm->page_table_lock); | ||
1679 | vm_unacct_memory(nr_accounted); | 1677 | vm_unacct_memory(nr_accounted); |
1680 | free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, | 1678 | free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, |
1681 | next? next->vm_start: 0); | 1679 | next? next->vm_start: 0); |
@@ -1958,9 +1956,7 @@ void exit_mmap(struct mm_struct *mm) | |||
1958 | tlb = tlb_gather_mmu(mm, 1); | 1956 | tlb = tlb_gather_mmu(mm, 1); |
1959 | /* Don't update_hiwater_rss(mm) here, do_exit already did */ | 1957 | /* Don't update_hiwater_rss(mm) here, do_exit already did */ |
1960 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ | 1958 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ |
1961 | spin_lock(&mm->page_table_lock); | 1959 | end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); |
1962 | end = unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL); | ||
1963 | spin_unlock(&mm->page_table_lock); | ||
1964 | vm_unacct_memory(nr_accounted); | 1960 | vm_unacct_memory(nr_accounted); |
1965 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); | 1961 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); |
1966 | tlb_finish_mmu(tlb, 0, end); | 1962 | tlb_finish_mmu(tlb, 0, end); |