diff options
| author | Hugh Dickins <hugh@veritas.com> | 2005-10-29 21:16:30 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:41 -0400 |
| commit | 508034a32b819a2d40aa7ac0dbc8cd2e044c2de6 (patch) | |
| tree | 906a8f0095af24f403b30d649d3ec1ffb4ff2f50 | |
| parent | 8f4f8c164cb4af1432cc25eda82928ea4519ba72 (diff) | |
[PATCH] mm: unmap_vmas with inner ptlock
Remove the page_table_lock from around the calls to unmap_vmas, and replace
the pte_offset_map in zap_pte_range by pte_offset_map_lock: all callers are
now safe to descend without page_table_lock.
Don't attempt fancy locking for hugepages, just take page_table_lock in
unmap_hugepage_range. Which makes zap_hugepage_range, and the hugetlb test in
zap_page_range, redundant: unmap_vmas calls unmap_hugepage_range anyway. Nor
does unmap_vmas have much use for its mm arg now.
The tlb_start_vma and tlb_end_vma in unmap_page_range are now called without
page_table_lock: if they're implemented at all, they typically come down to
flush_cache_range (usually done outside page_table_lock) and flush_tlb_range
(which we already audited for the mprotect case).
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
| -rw-r--r-- | fs/hugetlbfs/inode.c | 10 | ||||
| -rw-r--r-- | include/linux/hugetlb.h | 2 | ||||
| -rw-r--r-- | include/linux/mm.h | 2 | ||||
| -rw-r--r-- | mm/hugetlb.c | 12 | ||||
| -rw-r--r-- | mm/memory.c | 41 | ||||
| -rw-r--r-- | mm/mmap.c | 8 |
6 files changed, 21 insertions, 54 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3a9b6d179cbd..a826a8add5e3 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -92,7 +92,7 @@ out: | |||
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | /* | 94 | /* |
| 95 | * Called under down_write(mmap_sem), page_table_lock is not held | 95 | * Called under down_write(mmap_sem). |
| 96 | */ | 96 | */ |
| 97 | 97 | ||
| 98 | #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA | 98 | #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA |
| @@ -308,7 +308,6 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) | |||
| 308 | 308 | ||
| 309 | vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { | 309 | vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { |
| 310 | unsigned long h_vm_pgoff; | 310 | unsigned long h_vm_pgoff; |
| 311 | unsigned long v_length; | ||
| 312 | unsigned long v_offset; | 311 | unsigned long v_offset; |
| 313 | 312 | ||
| 314 | h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); | 313 | h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); |
| @@ -319,11 +318,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) | |||
| 319 | if (h_vm_pgoff >= h_pgoff) | 318 | if (h_vm_pgoff >= h_pgoff) |
| 320 | v_offset = 0; | 319 | v_offset = 0; |
| 321 | 320 | ||
| 322 | v_length = vma->vm_end - vma->vm_start; | 321 | unmap_hugepage_range(vma, |
| 323 | 322 | vma->vm_start + v_offset, vma->vm_end); | |
| 324 | zap_hugepage_range(vma, | ||
| 325 | vma->vm_start + v_offset, | ||
| 326 | v_length - v_offset); | ||
| 327 | } | 323 | } |
| 328 | } | 324 | } |
| 329 | 325 | ||
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d664330d900e..0cea162b08c0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
| @@ -16,7 +16,6 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) | |||
| 16 | int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); | 16 | int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); |
| 17 | int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); | 17 | int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); |
| 18 | int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); | 18 | int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); |
| 19 | void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); | ||
| 20 | void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); | 19 | void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); |
| 21 | int hugetlb_prefault(struct address_space *, struct vm_area_struct *); | 20 | int hugetlb_prefault(struct address_space *, struct vm_area_struct *); |
| 22 | int hugetlb_report_meminfo(char *); | 21 | int hugetlb_report_meminfo(char *); |
| @@ -87,7 +86,6 @@ static inline unsigned long hugetlb_total_pages(void) | |||
| 87 | #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) | 86 | #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) |
| 88 | #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) | 87 | #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) |
| 89 | #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) | 88 | #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) |
| 90 | #define zap_hugepage_range(vma, start, len) BUG() | ||
| 91 | #define unmap_hugepage_range(vma, start, end) BUG() | 89 | #define unmap_hugepage_range(vma, start, end) BUG() |
| 92 | #define is_hugepage_mem_enough(size) 0 | 90 | #define is_hugepage_mem_enough(size) 0 |
| 93 | #define hugetlb_report_meminfo(buf) 0 | 91 | #define hugetlb_report_meminfo(buf) 0 |
diff --git a/include/linux/mm.h b/include/linux/mm.h index d4c3512e7db4..972e2ce8e07c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -682,7 +682,7 @@ struct zap_details { | |||
| 682 | 682 | ||
| 683 | unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | 683 | unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, |
| 684 | unsigned long size, struct zap_details *); | 684 | unsigned long size, struct zap_details *); |
| 685 | unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm, | 685 | unsigned long unmap_vmas(struct mmu_gather **tlb, |
| 686 | struct vm_area_struct *start_vma, unsigned long start_addr, | 686 | struct vm_area_struct *start_vma, unsigned long start_addr, |
| 687 | unsigned long end_addr, unsigned long *nr_accounted, | 687 | unsigned long end_addr, unsigned long *nr_accounted, |
| 688 | struct zap_details *); | 688 | struct zap_details *); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ea0826ff2663..f29b7dc02c39 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -314,6 +314,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
| 314 | BUG_ON(start & ~HPAGE_MASK); | 314 | BUG_ON(start & ~HPAGE_MASK); |
| 315 | BUG_ON(end & ~HPAGE_MASK); | 315 | BUG_ON(end & ~HPAGE_MASK); |
| 316 | 316 | ||
| 317 | spin_lock(&mm->page_table_lock); | ||
| 318 | |||
| 317 | /* Update high watermark before we lower rss */ | 319 | /* Update high watermark before we lower rss */ |
| 318 | update_hiwater_rss(mm); | 320 | update_hiwater_rss(mm); |
| 319 | 321 | ||
| @@ -333,17 +335,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
| 333 | put_page(page); | 335 | put_page(page); |
| 334 | add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE)); | 336 | add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE)); |
| 335 | } | 337 | } |
| 336 | flush_tlb_range(vma, start, end); | ||
| 337 | } | ||
| 338 | 338 | ||
| 339 | void zap_hugepage_range(struct vm_area_struct *vma, | ||
| 340 | unsigned long start, unsigned long length) | ||
| 341 | { | ||
| 342 | struct mm_struct *mm = vma->vm_mm; | ||
| 343 | |||
| 344 | spin_lock(&mm->page_table_lock); | ||
| 345 | unmap_hugepage_range(vma, start, start + length); | ||
| 346 | spin_unlock(&mm->page_table_lock); | 339 | spin_unlock(&mm->page_table_lock); |
| 340 | flush_tlb_range(vma, start, end); | ||
| 347 | } | 341 | } |
| 348 | 342 | ||
| 349 | int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) | 343 | int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) |
diff --git a/mm/memory.c b/mm/memory.c index 4ea89a2e3a83..622a4ef5409f 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -551,10 +551,11 @@ static void zap_pte_range(struct mmu_gather *tlb, | |||
| 551 | { | 551 | { |
| 552 | struct mm_struct *mm = tlb->mm; | 552 | struct mm_struct *mm = tlb->mm; |
| 553 | pte_t *pte; | 553 | pte_t *pte; |
| 554 | spinlock_t *ptl; | ||
| 554 | int file_rss = 0; | 555 | int file_rss = 0; |
| 555 | int anon_rss = 0; | 556 | int anon_rss = 0; |
| 556 | 557 | ||
| 557 | pte = pte_offset_map(pmd, addr); | 558 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
| 558 | do { | 559 | do { |
| 559 | pte_t ptent = *pte; | 560 | pte_t ptent = *pte; |
| 560 | if (pte_none(ptent)) | 561 | if (pte_none(ptent)) |
| @@ -621,7 +622,7 @@ static void zap_pte_range(struct mmu_gather *tlb, | |||
| 621 | } while (pte++, addr += PAGE_SIZE, addr != end); | 622 | } while (pte++, addr += PAGE_SIZE, addr != end); |
| 622 | 623 | ||
| 623 | add_mm_rss(mm, file_rss, anon_rss); | 624 | add_mm_rss(mm, file_rss, anon_rss); |
| 624 | pte_unmap(pte - 1); | 625 | pte_unmap_unlock(pte - 1, ptl); |
| 625 | } | 626 | } |
| 626 | 627 | ||
| 627 | static inline void zap_pmd_range(struct mmu_gather *tlb, | 628 | static inline void zap_pmd_range(struct mmu_gather *tlb, |
| @@ -690,7 +691,6 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
| 690 | /** | 691 | /** |
| 691 | * unmap_vmas - unmap a range of memory covered by a list of vma's | 692 | * unmap_vmas - unmap a range of memory covered by a list of vma's |
| 692 | * @tlbp: address of the caller's struct mmu_gather | 693 | * @tlbp: address of the caller's struct mmu_gather |
| 693 | * @mm: the controlling mm_struct | ||
| 694 | * @vma: the starting vma | 694 | * @vma: the starting vma |
| 695 | * @start_addr: virtual address at which to start unmapping | 695 | * @start_addr: virtual address at which to start unmapping |
| 696 | * @end_addr: virtual address at which to end unmapping | 696 | * @end_addr: virtual address at which to end unmapping |
| @@ -699,10 +699,10 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
| 699 | * | 699 | * |
| 700 | * Returns the end address of the unmapping (restart addr if interrupted). | 700 | * Returns the end address of the unmapping (restart addr if interrupted). |
| 701 | * | 701 | * |
| 702 | * Unmap all pages in the vma list. Called under page_table_lock. | 702 | * Unmap all pages in the vma list. |
| 703 | * | 703 | * |
| 704 | * We aim to not hold page_table_lock for too long (for scheduling latency | 704 | * We aim to not hold locks for too long (for scheduling latency reasons). |
| 705 | * reasons). So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to | 705 | * So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to |
| 706 | * return the ending mmu_gather to the caller. | 706 | * return the ending mmu_gather to the caller. |
| 707 | * | 707 | * |
| 708 | * Only addresses between `start' and `end' will be unmapped. | 708 | * Only addresses between `start' and `end' will be unmapped. |
| @@ -714,7 +714,7 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
| 714 | * ensure that any thus-far unmapped pages are flushed before unmap_vmas() | 714 | * ensure that any thus-far unmapped pages are flushed before unmap_vmas() |
| 715 | * drops the lock and schedules. | 715 | * drops the lock and schedules. |
| 716 | */ | 716 | */ |
| 717 | unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, | 717 | unsigned long unmap_vmas(struct mmu_gather **tlbp, |
| 718 | struct vm_area_struct *vma, unsigned long start_addr, | 718 | struct vm_area_struct *vma, unsigned long start_addr, |
| 719 | unsigned long end_addr, unsigned long *nr_accounted, | 719 | unsigned long end_addr, unsigned long *nr_accounted, |
| 720 | struct zap_details *details) | 720 | struct zap_details *details) |
| @@ -764,19 +764,15 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, | |||
| 764 | tlb_finish_mmu(*tlbp, tlb_start, start); | 764 | tlb_finish_mmu(*tlbp, tlb_start, start); |
| 765 | 765 | ||
| 766 | if (need_resched() || | 766 | if (need_resched() || |
| 767 | need_lockbreak(&mm->page_table_lock) || | ||
| 768 | (i_mmap_lock && need_lockbreak(i_mmap_lock))) { | 767 | (i_mmap_lock && need_lockbreak(i_mmap_lock))) { |
| 769 | if (i_mmap_lock) { | 768 | if (i_mmap_lock) { |
| 770 | /* must reset count of rss freed */ | 769 | *tlbp = NULL; |
| 771 | *tlbp = tlb_gather_mmu(mm, fullmm); | ||
| 772 | goto out; | 770 | goto out; |
| 773 | } | 771 | } |
| 774 | spin_unlock(&mm->page_table_lock); | ||
| 775 | cond_resched(); | 772 | cond_resched(); |
| 776 | spin_lock(&mm->page_table_lock); | ||
| 777 | } | 773 | } |
| 778 | 774 | ||
| 779 | *tlbp = tlb_gather_mmu(mm, fullmm); | 775 | *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm); |
| 780 | tlb_start_valid = 0; | 776 | tlb_start_valid = 0; |
| 781 | zap_bytes = ZAP_BLOCK_SIZE; | 777 | zap_bytes = ZAP_BLOCK_SIZE; |
| 782 | } | 778 | } |
| @@ -800,18 +796,12 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | |||
| 800 | unsigned long end = address + size; | 796 | unsigned long end = address + size; |
| 801 | unsigned long nr_accounted = 0; | 797 | unsigned long nr_accounted = 0; |
| 802 | 798 | ||
| 803 | if (is_vm_hugetlb_page(vma)) { | ||
| 804 | zap_hugepage_range(vma, address, size); | ||
| 805 | return end; | ||
| 806 | } | ||
| 807 | |||
| 808 | lru_add_drain(); | 799 | lru_add_drain(); |
| 809 | tlb = tlb_gather_mmu(mm, 0); | 800 | tlb = tlb_gather_mmu(mm, 0); |
| 810 | update_hiwater_rss(mm); | 801 | update_hiwater_rss(mm); |
| 811 | spin_lock(&mm->page_table_lock); | 802 | end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); |
| 812 | end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details); | 803 | if (tlb) |
| 813 | spin_unlock(&mm->page_table_lock); | 804 | tlb_finish_mmu(tlb, address, end); |
| 814 | tlb_finish_mmu(tlb, address, end); | ||
| 815 | return end; | 805 | return end; |
| 816 | } | 806 | } |
| 817 | 807 | ||
| @@ -1434,13 +1424,6 @@ again: | |||
| 1434 | 1424 | ||
| 1435 | restart_addr = zap_page_range(vma, start_addr, | 1425 | restart_addr = zap_page_range(vma, start_addr, |
| 1436 | end_addr - start_addr, details); | 1426 | end_addr - start_addr, details); |
| 1437 | |||
| 1438 | /* | ||
| 1439 | * We cannot rely on the break test in unmap_vmas: | ||
| 1440 | * on the one hand, we don't want to restart our loop | ||
| 1441 | * just because that broke out for the page_table_lock; | ||
| 1442 | * on the other hand, it does no test when vma is small. | ||
| 1443 | */ | ||
| 1444 | need_break = need_resched() || | 1427 | need_break = need_resched() || |
| 1445 | need_lockbreak(details->i_mmap_lock); | 1428 | need_lockbreak(details->i_mmap_lock); |
| 1446 | 1429 | ||
| @@ -1673,9 +1673,7 @@ static void unmap_region(struct mm_struct *mm, | |||
| 1673 | lru_add_drain(); | 1673 | lru_add_drain(); |
| 1674 | tlb = tlb_gather_mmu(mm, 0); | 1674 | tlb = tlb_gather_mmu(mm, 0); |
| 1675 | update_hiwater_rss(mm); | 1675 | update_hiwater_rss(mm); |
| 1676 | spin_lock(&mm->page_table_lock); | 1676 | unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); |
| 1677 | unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL); | ||
| 1678 | spin_unlock(&mm->page_table_lock); | ||
| 1679 | vm_unacct_memory(nr_accounted); | 1677 | vm_unacct_memory(nr_accounted); |
| 1680 | free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, | 1678 | free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, |
| 1681 | next? next->vm_start: 0); | 1679 | next? next->vm_start: 0); |
| @@ -1958,9 +1956,7 @@ void exit_mmap(struct mm_struct *mm) | |||
| 1958 | tlb = tlb_gather_mmu(mm, 1); | 1956 | tlb = tlb_gather_mmu(mm, 1); |
| 1959 | /* Don't update_hiwater_rss(mm) here, do_exit already did */ | 1957 | /* Don't update_hiwater_rss(mm) here, do_exit already did */ |
| 1960 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ | 1958 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ |
| 1961 | spin_lock(&mm->page_table_lock); | 1959 | end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); |
| 1962 | end = unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL); | ||
| 1963 | spin_unlock(&mm->page_table_lock); | ||
| 1964 | vm_unacct_memory(nr_accounted); | 1960 | vm_unacct_memory(nr_accounted); |
| 1965 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); | 1961 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); |
| 1966 | tlb_finish_mmu(tlb, 0, end); | 1962 | tlb_finish_mmu(tlb, 0, end); |
