diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2013-11-14 17:31:02 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-14 19:32:14 -0500 |
commit | cb900f41215447433cbc456d1c4294e858a84d7c (patch) | |
tree | 1f3704d9a023a20baa2872d6639a58387ef2d7c2 /mm | |
parent | c389a250ab4cfa4a3775d9f2c45271618af6d5b2 (diff) |
mm, hugetlb: convert hugetlbfs to use split pmd lock
Hugetlb supports multiple page sizes. We use split lock only for PMD
level, but not for PUD.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 110 | ||||
-rw-r--r-- | mm/mempolicy.c | 5 | ||||
-rw-r--r-- | mm/migrate.c | 7 | ||||
-rw-r--r-- | mm/rmap.c | 2 |
4 files changed, 74 insertions, 50 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0b7656e804d1..7d57af21f49e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2376,6 +2376,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
2376 | cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; | 2376 | cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; |
2377 | 2377 | ||
2378 | for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { | 2378 | for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { |
2379 | spinlock_t *src_ptl, *dst_ptl; | ||
2379 | src_pte = huge_pte_offset(src, addr); | 2380 | src_pte = huge_pte_offset(src, addr); |
2380 | if (!src_pte) | 2381 | if (!src_pte) |
2381 | continue; | 2382 | continue; |
@@ -2387,8 +2388,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
2387 | if (dst_pte == src_pte) | 2388 | if (dst_pte == src_pte) |
2388 | continue; | 2389 | continue; |
2389 | 2390 | ||
2390 | spin_lock(&dst->page_table_lock); | 2391 | dst_ptl = huge_pte_lock(h, dst, dst_pte); |
2391 | spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING); | 2392 | src_ptl = huge_pte_lockptr(h, src, src_pte); |
2393 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); | ||
2392 | if (!huge_pte_none(huge_ptep_get(src_pte))) { | 2394 | if (!huge_pte_none(huge_ptep_get(src_pte))) { |
2393 | if (cow) | 2395 | if (cow) |
2394 | huge_ptep_set_wrprotect(src, addr, src_pte); | 2396 | huge_ptep_set_wrprotect(src, addr, src_pte); |
@@ -2398,8 +2400,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
2398 | page_dup_rmap(ptepage); | 2400 | page_dup_rmap(ptepage); |
2399 | set_huge_pte_at(dst, addr, dst_pte, entry); | 2401 | set_huge_pte_at(dst, addr, dst_pte, entry); |
2400 | } | 2402 | } |
2401 | spin_unlock(&src->page_table_lock); | 2403 | spin_unlock(src_ptl); |
2402 | spin_unlock(&dst->page_table_lock); | 2404 | spin_unlock(dst_ptl); |
2403 | } | 2405 | } |
2404 | return 0; | 2406 | return 0; |
2405 | 2407 | ||
@@ -2442,6 +2444,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
2442 | unsigned long address; | 2444 | unsigned long address; |
2443 | pte_t *ptep; | 2445 | pte_t *ptep; |
2444 | pte_t pte; | 2446 | pte_t pte; |
2447 | spinlock_t *ptl; | ||
2445 | struct page *page; | 2448 | struct page *page; |
2446 | struct hstate *h = hstate_vma(vma); | 2449 | struct hstate *h = hstate_vma(vma); |
2447 | unsigned long sz = huge_page_size(h); | 2450 | unsigned long sz = huge_page_size(h); |
@@ -2455,25 +2458,25 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
2455 | tlb_start_vma(tlb, vma); | 2458 | tlb_start_vma(tlb, vma); |
2456 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 2459 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
2457 | again: | 2460 | again: |
2458 | spin_lock(&mm->page_table_lock); | ||
2459 | for (address = start; address < end; address += sz) { | 2461 | for (address = start; address < end; address += sz) { |
2460 | ptep = huge_pte_offset(mm, address); | 2462 | ptep = huge_pte_offset(mm, address); |
2461 | if (!ptep) | 2463 | if (!ptep) |
2462 | continue; | 2464 | continue; |
2463 | 2465 | ||
2466 | ptl = huge_pte_lock(h, mm, ptep); | ||
2464 | if (huge_pmd_unshare(mm, &address, ptep)) | 2467 | if (huge_pmd_unshare(mm, &address, ptep)) |
2465 | continue; | 2468 | goto unlock; |
2466 | 2469 | ||
2467 | pte = huge_ptep_get(ptep); | 2470 | pte = huge_ptep_get(ptep); |
2468 | if (huge_pte_none(pte)) | 2471 | if (huge_pte_none(pte)) |
2469 | continue; | 2472 | goto unlock; |
2470 | 2473 | ||
2471 | /* | 2474 | /* |
2472 | * HWPoisoned hugepage is already unmapped and dropped reference | 2475 | * HWPoisoned hugepage is already unmapped and dropped reference |
2473 | */ | 2476 | */ |
2474 | if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { | 2477 | if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { |
2475 | huge_pte_clear(mm, address, ptep); | 2478 | huge_pte_clear(mm, address, ptep); |
2476 | continue; | 2479 | goto unlock; |
2477 | } | 2480 | } |
2478 | 2481 | ||
2479 | page = pte_page(pte); | 2482 | page = pte_page(pte); |
@@ -2484,7 +2487,7 @@ again: | |||
2484 | */ | 2487 | */ |
2485 | if (ref_page) { | 2488 | if (ref_page) { |
2486 | if (page != ref_page) | 2489 | if (page != ref_page) |
2487 | continue; | 2490 | goto unlock; |
2488 | 2491 | ||
2489 | /* | 2492 | /* |
2490 | * Mark the VMA as having unmapped its page so that | 2493 | * Mark the VMA as having unmapped its page so that |
@@ -2501,13 +2504,18 @@ again: | |||
2501 | 2504 | ||
2502 | page_remove_rmap(page); | 2505 | page_remove_rmap(page); |
2503 | force_flush = !__tlb_remove_page(tlb, page); | 2506 | force_flush = !__tlb_remove_page(tlb, page); |
2504 | if (force_flush) | 2507 | if (force_flush) { |
2508 | spin_unlock(ptl); | ||
2505 | break; | 2509 | break; |
2510 | } | ||
2506 | /* Bail out after unmapping reference page if supplied */ | 2511 | /* Bail out after unmapping reference page if supplied */ |
2507 | if (ref_page) | 2512 | if (ref_page) { |
2513 | spin_unlock(ptl); | ||
2508 | break; | 2514 | break; |
2515 | } | ||
2516 | unlock: | ||
2517 | spin_unlock(ptl); | ||
2509 | } | 2518 | } |
2510 | spin_unlock(&mm->page_table_lock); | ||
2511 | /* | 2519 | /* |
2512 | * mmu_gather ran out of room to batch pages, we break out of | 2520 | * mmu_gather ran out of room to batch pages, we break out of |
2513 | * the PTE lock to avoid doing the potential expensive TLB invalidate | 2521 | * the PTE lock to avoid doing the potential expensive TLB invalidate |
@@ -2613,7 +2621,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2613 | */ | 2621 | */ |
2614 | static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | 2622 | static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, |
2615 | unsigned long address, pte_t *ptep, pte_t pte, | 2623 | unsigned long address, pte_t *ptep, pte_t pte, |
2616 | struct page *pagecache_page) | 2624 | struct page *pagecache_page, spinlock_t *ptl) |
2617 | { | 2625 | { |
2618 | struct hstate *h = hstate_vma(vma); | 2626 | struct hstate *h = hstate_vma(vma); |
2619 | struct page *old_page, *new_page; | 2627 | struct page *old_page, *new_page; |
@@ -2647,8 +2655,8 @@ retry_avoidcopy: | |||
2647 | 2655 | ||
2648 | page_cache_get(old_page); | 2656 | page_cache_get(old_page); |
2649 | 2657 | ||
2650 | /* Drop page_table_lock as buddy allocator may be called */ | 2658 | /* Drop page table lock as buddy allocator may be called */ |
2651 | spin_unlock(&mm->page_table_lock); | 2659 | spin_unlock(ptl); |
2652 | new_page = alloc_huge_page(vma, address, outside_reserve); | 2660 | new_page = alloc_huge_page(vma, address, outside_reserve); |
2653 | 2661 | ||
2654 | if (IS_ERR(new_page)) { | 2662 | if (IS_ERR(new_page)) { |
@@ -2666,13 +2674,13 @@ retry_avoidcopy: | |||
2666 | BUG_ON(huge_pte_none(pte)); | 2674 | BUG_ON(huge_pte_none(pte)); |
2667 | if (unmap_ref_private(mm, vma, old_page, address)) { | 2675 | if (unmap_ref_private(mm, vma, old_page, address)) { |
2668 | BUG_ON(huge_pte_none(pte)); | 2676 | BUG_ON(huge_pte_none(pte)); |
2669 | spin_lock(&mm->page_table_lock); | 2677 | spin_lock(ptl); |
2670 | ptep = huge_pte_offset(mm, address & huge_page_mask(h)); | 2678 | ptep = huge_pte_offset(mm, address & huge_page_mask(h)); |
2671 | if (likely(pte_same(huge_ptep_get(ptep), pte))) | 2679 | if (likely(pte_same(huge_ptep_get(ptep), pte))) |
2672 | goto retry_avoidcopy; | 2680 | goto retry_avoidcopy; |
2673 | /* | 2681 | /* |
2674 | * race occurs while re-acquiring page_table_lock, and | 2682 | * race occurs while re-acquiring page table |
2675 | * our job is done. | 2683 | * lock, and our job is done. |
2676 | */ | 2684 | */ |
2677 | return 0; | 2685 | return 0; |
2678 | } | 2686 | } |
@@ -2680,7 +2688,7 @@ retry_avoidcopy: | |||
2680 | } | 2688 | } |
2681 | 2689 | ||
2682 | /* Caller expects lock to be held */ | 2690 | /* Caller expects lock to be held */ |
2683 | spin_lock(&mm->page_table_lock); | 2691 | spin_lock(ptl); |
2684 | if (err == -ENOMEM) | 2692 | if (err == -ENOMEM) |
2685 | return VM_FAULT_OOM; | 2693 | return VM_FAULT_OOM; |
2686 | else | 2694 | else |
@@ -2695,7 +2703,7 @@ retry_avoidcopy: | |||
2695 | page_cache_release(new_page); | 2703 | page_cache_release(new_page); |
2696 | page_cache_release(old_page); | 2704 | page_cache_release(old_page); |
2697 | /* Caller expects lock to be held */ | 2705 | /* Caller expects lock to be held */ |
2698 | spin_lock(&mm->page_table_lock); | 2706 | spin_lock(ptl); |
2699 | return VM_FAULT_OOM; | 2707 | return VM_FAULT_OOM; |
2700 | } | 2708 | } |
2701 | 2709 | ||
@@ -2707,10 +2715,10 @@ retry_avoidcopy: | |||
2707 | mmun_end = mmun_start + huge_page_size(h); | 2715 | mmun_end = mmun_start + huge_page_size(h); |
2708 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 2716 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
2709 | /* | 2717 | /* |
2710 | * Retake the page_table_lock to check for racing updates | 2718 | * Retake the page table lock to check for racing updates |
2711 | * before the page tables are altered | 2719 | * before the page tables are altered |
2712 | */ | 2720 | */ |
2713 | spin_lock(&mm->page_table_lock); | 2721 | spin_lock(ptl); |
2714 | ptep = huge_pte_offset(mm, address & huge_page_mask(h)); | 2722 | ptep = huge_pte_offset(mm, address & huge_page_mask(h)); |
2715 | if (likely(pte_same(huge_ptep_get(ptep), pte))) { | 2723 | if (likely(pte_same(huge_ptep_get(ptep), pte))) { |
2716 | ClearPagePrivate(new_page); | 2724 | ClearPagePrivate(new_page); |
@@ -2724,13 +2732,13 @@ retry_avoidcopy: | |||
2724 | /* Make the old page be freed below */ | 2732 | /* Make the old page be freed below */ |
2725 | new_page = old_page; | 2733 | new_page = old_page; |
2726 | } | 2734 | } |
2727 | spin_unlock(&mm->page_table_lock); | 2735 | spin_unlock(ptl); |
2728 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 2736 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2729 | page_cache_release(new_page); | 2737 | page_cache_release(new_page); |
2730 | page_cache_release(old_page); | 2738 | page_cache_release(old_page); |
2731 | 2739 | ||
2732 | /* Caller expects lock to be held */ | 2740 | /* Caller expects lock to be held */ |
2733 | spin_lock(&mm->page_table_lock); | 2741 | spin_lock(ptl); |
2734 | return 0; | 2742 | return 0; |
2735 | } | 2743 | } |
2736 | 2744 | ||
@@ -2778,6 +2786,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2778 | struct page *page; | 2786 | struct page *page; |
2779 | struct address_space *mapping; | 2787 | struct address_space *mapping; |
2780 | pte_t new_pte; | 2788 | pte_t new_pte; |
2789 | spinlock_t *ptl; | ||
2781 | 2790 | ||
2782 | /* | 2791 | /* |
2783 | * Currently, we are forced to kill the process in the event the | 2792 | * Currently, we are forced to kill the process in the event the |
@@ -2864,7 +2873,8 @@ retry: | |||
2864 | goto backout_unlocked; | 2873 | goto backout_unlocked; |
2865 | } | 2874 | } |
2866 | 2875 | ||
2867 | spin_lock(&mm->page_table_lock); | 2876 | ptl = huge_pte_lockptr(h, mm, ptep); |
2877 | spin_lock(ptl); | ||
2868 | size = i_size_read(mapping->host) >> huge_page_shift(h); | 2878 | size = i_size_read(mapping->host) >> huge_page_shift(h); |
2869 | if (idx >= size) | 2879 | if (idx >= size) |
2870 | goto backout; | 2880 | goto backout; |
@@ -2885,16 +2895,16 @@ retry: | |||
2885 | 2895 | ||
2886 | if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { | 2896 | if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { |
2887 | /* Optimization, do the COW without a second fault */ | 2897 | /* Optimization, do the COW without a second fault */ |
2888 | ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page); | 2898 | ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl); |
2889 | } | 2899 | } |
2890 | 2900 | ||
2891 | spin_unlock(&mm->page_table_lock); | 2901 | spin_unlock(ptl); |
2892 | unlock_page(page); | 2902 | unlock_page(page); |
2893 | out: | 2903 | out: |
2894 | return ret; | 2904 | return ret; |
2895 | 2905 | ||
2896 | backout: | 2906 | backout: |
2897 | spin_unlock(&mm->page_table_lock); | 2907 | spin_unlock(ptl); |
2898 | backout_unlocked: | 2908 | backout_unlocked: |
2899 | unlock_page(page); | 2909 | unlock_page(page); |
2900 | put_page(page); | 2910 | put_page(page); |
@@ -2906,6 +2916,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2906 | { | 2916 | { |
2907 | pte_t *ptep; | 2917 | pte_t *ptep; |
2908 | pte_t entry; | 2918 | pte_t entry; |
2919 | spinlock_t *ptl; | ||
2909 | int ret; | 2920 | int ret; |
2910 | struct page *page = NULL; | 2921 | struct page *page = NULL; |
2911 | struct page *pagecache_page = NULL; | 2922 | struct page *pagecache_page = NULL; |
@@ -2918,7 +2929,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2918 | if (ptep) { | 2929 | if (ptep) { |
2919 | entry = huge_ptep_get(ptep); | 2930 | entry = huge_ptep_get(ptep); |
2920 | if (unlikely(is_hugetlb_entry_migration(entry))) { | 2931 | if (unlikely(is_hugetlb_entry_migration(entry))) { |
2921 | migration_entry_wait_huge(mm, ptep); | 2932 | migration_entry_wait_huge(vma, mm, ptep); |
2922 | return 0; | 2933 | return 0; |
2923 | } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) | 2934 | } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) |
2924 | return VM_FAULT_HWPOISON_LARGE | | 2935 | return VM_FAULT_HWPOISON_LARGE | |
@@ -2974,17 +2985,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2974 | if (page != pagecache_page) | 2985 | if (page != pagecache_page) |
2975 | lock_page(page); | 2986 | lock_page(page); |
2976 | 2987 | ||
2977 | spin_lock(&mm->page_table_lock); | 2988 | ptl = huge_pte_lockptr(h, mm, ptep); |
2989 | spin_lock(ptl); | ||
2978 | /* Check for a racing update before calling hugetlb_cow */ | 2990 | /* Check for a racing update before calling hugetlb_cow */ |
2979 | if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) | 2991 | if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) |
2980 | goto out_page_table_lock; | 2992 | goto out_ptl; |
2981 | 2993 | ||
2982 | 2994 | ||
2983 | if (flags & FAULT_FLAG_WRITE) { | 2995 | if (flags & FAULT_FLAG_WRITE) { |
2984 | if (!huge_pte_write(entry)) { | 2996 | if (!huge_pte_write(entry)) { |
2985 | ret = hugetlb_cow(mm, vma, address, ptep, entry, | 2997 | ret = hugetlb_cow(mm, vma, address, ptep, entry, |
2986 | pagecache_page); | 2998 | pagecache_page, ptl); |
2987 | goto out_page_table_lock; | 2999 | goto out_ptl; |
2988 | } | 3000 | } |
2989 | entry = huge_pte_mkdirty(entry); | 3001 | entry = huge_pte_mkdirty(entry); |
2990 | } | 3002 | } |
@@ -2993,8 +3005,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2993 | flags & FAULT_FLAG_WRITE)) | 3005 | flags & FAULT_FLAG_WRITE)) |
2994 | update_mmu_cache(vma, address, ptep); | 3006 | update_mmu_cache(vma, address, ptep); |
2995 | 3007 | ||
2996 | out_page_table_lock: | 3008 | out_ptl: |
2997 | spin_unlock(&mm->page_table_lock); | 3009 | spin_unlock(ptl); |
2998 | 3010 | ||
2999 | if (pagecache_page) { | 3011 | if (pagecache_page) { |
3000 | unlock_page(pagecache_page); | 3012 | unlock_page(pagecache_page); |
@@ -3020,9 +3032,9 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3020 | unsigned long remainder = *nr_pages; | 3032 | unsigned long remainder = *nr_pages; |
3021 | struct hstate *h = hstate_vma(vma); | 3033 | struct hstate *h = hstate_vma(vma); |
3022 | 3034 | ||
3023 | spin_lock(&mm->page_table_lock); | ||
3024 | while (vaddr < vma->vm_end && remainder) { | 3035 | while (vaddr < vma->vm_end && remainder) { |
3025 | pte_t *pte; | 3036 | pte_t *pte; |
3037 | spinlock_t *ptl = NULL; | ||
3026 | int absent; | 3038 | int absent; |
3027 | struct page *page; | 3039 | struct page *page; |
3028 | 3040 | ||
@@ -3030,8 +3042,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3030 | * Some archs (sparc64, sh*) have multiple pte_ts to | 3042 | * Some archs (sparc64, sh*) have multiple pte_ts to |
3031 | * each hugepage. We have to make sure we get the | 3043 | * each hugepage. We have to make sure we get the |
3032 | * first, for the page indexing below to work. | 3044 | * first, for the page indexing below to work. |
3045 | * | ||
3046 | * Note that page table lock is not held when pte is null. | ||
3033 | */ | 3047 | */ |
3034 | pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); | 3048 | pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); |
3049 | if (pte) | ||
3050 | ptl = huge_pte_lock(h, mm, pte); | ||
3035 | absent = !pte || huge_pte_none(huge_ptep_get(pte)); | 3051 | absent = !pte || huge_pte_none(huge_ptep_get(pte)); |
3036 | 3052 | ||
3037 | /* | 3053 | /* |
@@ -3043,6 +3059,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3043 | */ | 3059 | */ |
3044 | if (absent && (flags & FOLL_DUMP) && | 3060 | if (absent && (flags & FOLL_DUMP) && |
3045 | !hugetlbfs_pagecache_present(h, vma, vaddr)) { | 3061 | !hugetlbfs_pagecache_present(h, vma, vaddr)) { |
3062 | if (pte) | ||
3063 | spin_unlock(ptl); | ||
3046 | remainder = 0; | 3064 | remainder = 0; |
3047 | break; | 3065 | break; |
3048 | } | 3066 | } |
@@ -3062,10 +3080,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3062 | !huge_pte_write(huge_ptep_get(pte)))) { | 3080 | !huge_pte_write(huge_ptep_get(pte)))) { |
3063 | int ret; | 3081 | int ret; |
3064 | 3082 | ||
3065 | spin_unlock(&mm->page_table_lock); | 3083 | if (pte) |
3084 | spin_unlock(ptl); | ||
3066 | ret = hugetlb_fault(mm, vma, vaddr, | 3085 | ret = hugetlb_fault(mm, vma, vaddr, |
3067 | (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); | 3086 | (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); |
3068 | spin_lock(&mm->page_table_lock); | ||
3069 | if (!(ret & VM_FAULT_ERROR)) | 3087 | if (!(ret & VM_FAULT_ERROR)) |
3070 | continue; | 3088 | continue; |
3071 | 3089 | ||
@@ -3096,8 +3114,8 @@ same_page: | |||
3096 | */ | 3114 | */ |
3097 | goto same_page; | 3115 | goto same_page; |
3098 | } | 3116 | } |
3117 | spin_unlock(ptl); | ||
3099 | } | 3118 | } |
3100 | spin_unlock(&mm->page_table_lock); | ||
3101 | *nr_pages = remainder; | 3119 | *nr_pages = remainder; |
3102 | *position = vaddr; | 3120 | *position = vaddr; |
3103 | 3121 | ||
@@ -3118,13 +3136,15 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
3118 | flush_cache_range(vma, address, end); | 3136 | flush_cache_range(vma, address, end); |
3119 | 3137 | ||
3120 | mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); | 3138 | mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); |
3121 | spin_lock(&mm->page_table_lock); | ||
3122 | for (; address < end; address += huge_page_size(h)) { | 3139 | for (; address < end; address += huge_page_size(h)) { |
3140 | spinlock_t *ptl; | ||
3123 | ptep = huge_pte_offset(mm, address); | 3141 | ptep = huge_pte_offset(mm, address); |
3124 | if (!ptep) | 3142 | if (!ptep) |
3125 | continue; | 3143 | continue; |
3144 | ptl = huge_pte_lock(h, mm, ptep); | ||
3126 | if (huge_pmd_unshare(mm, &address, ptep)) { | 3145 | if (huge_pmd_unshare(mm, &address, ptep)) { |
3127 | pages++; | 3146 | pages++; |
3147 | spin_unlock(ptl); | ||
3128 | continue; | 3148 | continue; |
3129 | } | 3149 | } |
3130 | if (!huge_pte_none(huge_ptep_get(ptep))) { | 3150 | if (!huge_pte_none(huge_ptep_get(ptep))) { |
@@ -3134,8 +3154,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
3134 | set_huge_pte_at(mm, address, ptep, pte); | 3154 | set_huge_pte_at(mm, address, ptep, pte); |
3135 | pages++; | 3155 | pages++; |
3136 | } | 3156 | } |
3157 | spin_unlock(ptl); | ||
3137 | } | 3158 | } |
3138 | spin_unlock(&mm->page_table_lock); | ||
3139 | /* | 3159 | /* |
3140 | * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare | 3160 | * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare |
3141 | * may have cleared our pud entry and done put_page on the page table: | 3161 | * may have cleared our pud entry and done put_page on the page table: |
@@ -3298,6 +3318,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | |||
3298 | unsigned long saddr; | 3318 | unsigned long saddr; |
3299 | pte_t *spte = NULL; | 3319 | pte_t *spte = NULL; |
3300 | pte_t *pte; | 3320 | pte_t *pte; |
3321 | spinlock_t *ptl; | ||
3301 | 3322 | ||
3302 | if (!vma_shareable(vma, addr)) | 3323 | if (!vma_shareable(vma, addr)) |
3303 | return (pte_t *)pmd_alloc(mm, pud, addr); | 3324 | return (pte_t *)pmd_alloc(mm, pud, addr); |
@@ -3320,13 +3341,14 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | |||
3320 | if (!spte) | 3341 | if (!spte) |
3321 | goto out; | 3342 | goto out; |
3322 | 3343 | ||
3323 | spin_lock(&mm->page_table_lock); | 3344 | ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte); |
3345 | spin_lock(ptl); | ||
3324 | if (pud_none(*pud)) | 3346 | if (pud_none(*pud)) |
3325 | pud_populate(mm, pud, | 3347 | pud_populate(mm, pud, |
3326 | (pmd_t *)((unsigned long)spte & PAGE_MASK)); | 3348 | (pmd_t *)((unsigned long)spte & PAGE_MASK)); |
3327 | else | 3349 | else |
3328 | put_page(virt_to_page(spte)); | 3350 | put_page(virt_to_page(spte)); |
3329 | spin_unlock(&mm->page_table_lock); | 3351 | spin_unlock(ptl); |
3330 | out: | 3352 | out: |
3331 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | 3353 | pte = (pte_t *)pmd_alloc(mm, pud, addr); |
3332 | mutex_unlock(&mapping->i_mmap_mutex); | 3354 | mutex_unlock(&mapping->i_mmap_mutex); |
@@ -3340,7 +3362,7 @@ out: | |||
3340 | * indicated by page_count > 1, unmap is achieved by clearing pud and | 3362 | * indicated by page_count > 1, unmap is achieved by clearing pud and |
3341 | * decrementing the ref count. If count == 1, the pte page is not shared. | 3363 | * decrementing the ref count. If count == 1, the pte page is not shared. |
3342 | * | 3364 | * |
3343 | * called with vma->vm_mm->page_table_lock held. | 3365 | * called with page table lock held. |
3344 | * | 3366 | * |
3345 | * returns: 1 successfully unmapped a shared pte page | 3367 | * returns: 1 successfully unmapped a shared pte page |
3346 | * 0 the underlying pte page is not shared, or it is the last user | 3368 | * 0 the underlying pte page is not shared, or it is the last user |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4cc19f6ab6c6..c4403cdf3433 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -525,8 +525,9 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, | |||
525 | #ifdef CONFIG_HUGETLB_PAGE | 525 | #ifdef CONFIG_HUGETLB_PAGE |
526 | int nid; | 526 | int nid; |
527 | struct page *page; | 527 | struct page *page; |
528 | spinlock_t *ptl; | ||
528 | 529 | ||
529 | spin_lock(&vma->vm_mm->page_table_lock); | 530 | ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd); |
530 | page = pte_page(huge_ptep_get((pte_t *)pmd)); | 531 | page = pte_page(huge_ptep_get((pte_t *)pmd)); |
531 | nid = page_to_nid(page); | 532 | nid = page_to_nid(page); |
532 | if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) | 533 | if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) |
@@ -536,7 +537,7 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, | |||
536 | (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) | 537 | (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) |
537 | isolate_huge_page(page, private); | 538 | isolate_huge_page(page, private); |
538 | unlock: | 539 | unlock: |
539 | spin_unlock(&vma->vm_mm->page_table_lock); | 540 | spin_unlock(ptl); |
540 | #else | 541 | #else |
541 | BUG(); | 542 | BUG(); |
542 | #endif | 543 | #endif |
diff --git a/mm/migrate.c b/mm/migrate.c index dfc8300ecbb2..01f45cefa4cd 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -130,7 +130,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
130 | ptep = huge_pte_offset(mm, addr); | 130 | ptep = huge_pte_offset(mm, addr); |
131 | if (!ptep) | 131 | if (!ptep) |
132 | goto out; | 132 | goto out; |
133 | ptl = &mm->page_table_lock; | 133 | ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep); |
134 | } else { | 134 | } else { |
135 | pmd = mm_find_pmd(mm, addr); | 135 | pmd = mm_find_pmd(mm, addr); |
136 | if (!pmd) | 136 | if (!pmd) |
@@ -249,9 +249,10 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | |||
249 | __migration_entry_wait(mm, ptep, ptl); | 249 | __migration_entry_wait(mm, ptep, ptl); |
250 | } | 250 | } |
251 | 251 | ||
252 | void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) | 252 | void migration_entry_wait_huge(struct vm_area_struct *vma, |
253 | struct mm_struct *mm, pte_t *pte) | ||
253 | { | 254 | { |
254 | spinlock_t *ptl = &(mm)->page_table_lock; | 255 | spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte); |
255 | __migration_entry_wait(mm, pte, ptl); | 256 | __migration_entry_wait(mm, pte, ptl); |
256 | } | 257 | } |
257 | 258 | ||
@@ -601,7 +601,7 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm, | |||
601 | 601 | ||
602 | if (unlikely(PageHuge(page))) { | 602 | if (unlikely(PageHuge(page))) { |
603 | pte = huge_pte_offset(mm, address); | 603 | pte = huge_pte_offset(mm, address); |
604 | ptl = &mm->page_table_lock; | 604 | ptl = huge_pte_lockptr(page_hstate(page), mm, pte); |
605 | goto check; | 605 | goto check; |
606 | } | 606 | } |
607 | 607 | ||