aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2013-11-14 17:31:02 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-14 19:32:14 -0500
commitcb900f41215447433cbc456d1c4294e858a84d7c (patch)
tree1f3704d9a023a20baa2872d6639a58387ef2d7c2 /mm
parentc389a250ab4cfa4a3775d9f2c45271618af6d5b2 (diff)
mm, hugetlb: convert hugetlbfs to use split pmd lock
Hugetlb supports multiple page sizes. We use split lock only for PMD level, but not for PUD. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Tested-by: Alex Thorlton <athorlton@sgi.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Dave Jones <davej@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kees Cook <keescook@chromium.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: Robin Holt <robinmholt@gmail.com> Cc: Sedat Dilek <sedat.dilek@gmail.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c110
-rw-r--r--mm/mempolicy.c5
-rw-r--r--mm/migrate.c7
-rw-r--r--mm/rmap.c2
4 files changed, 74 insertions, 50 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0b7656e804d1..7d57af21f49e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2376,6 +2376,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
2376 cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; 2376 cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
2377 2377
2378 for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { 2378 for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
2379 spinlock_t *src_ptl, *dst_ptl;
2379 src_pte = huge_pte_offset(src, addr); 2380 src_pte = huge_pte_offset(src, addr);
2380 if (!src_pte) 2381 if (!src_pte)
2381 continue; 2382 continue;
@@ -2387,8 +2388,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
2387 if (dst_pte == src_pte) 2388 if (dst_pte == src_pte)
2388 continue; 2389 continue;
2389 2390
2390 spin_lock(&dst->page_table_lock); 2391 dst_ptl = huge_pte_lock(h, dst, dst_pte);
2391 spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING); 2392 src_ptl = huge_pte_lockptr(h, src, src_pte);
2393 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
2392 if (!huge_pte_none(huge_ptep_get(src_pte))) { 2394 if (!huge_pte_none(huge_ptep_get(src_pte))) {
2393 if (cow) 2395 if (cow)
2394 huge_ptep_set_wrprotect(src, addr, src_pte); 2396 huge_ptep_set_wrprotect(src, addr, src_pte);
@@ -2398,8 +2400,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
2398 page_dup_rmap(ptepage); 2400 page_dup_rmap(ptepage);
2399 set_huge_pte_at(dst, addr, dst_pte, entry); 2401 set_huge_pte_at(dst, addr, dst_pte, entry);
2400 } 2402 }
2401 spin_unlock(&src->page_table_lock); 2403 spin_unlock(src_ptl);
2402 spin_unlock(&dst->page_table_lock); 2404 spin_unlock(dst_ptl);
2403 } 2405 }
2404 return 0; 2406 return 0;
2405 2407
@@ -2442,6 +2444,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
2442 unsigned long address; 2444 unsigned long address;
2443 pte_t *ptep; 2445 pte_t *ptep;
2444 pte_t pte; 2446 pte_t pte;
2447 spinlock_t *ptl;
2445 struct page *page; 2448 struct page *page;
2446 struct hstate *h = hstate_vma(vma); 2449 struct hstate *h = hstate_vma(vma);
2447 unsigned long sz = huge_page_size(h); 2450 unsigned long sz = huge_page_size(h);
@@ -2455,25 +2458,25 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
2455 tlb_start_vma(tlb, vma); 2458 tlb_start_vma(tlb, vma);
2456 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 2459 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2457again: 2460again:
2458 spin_lock(&mm->page_table_lock);
2459 for (address = start; address < end; address += sz) { 2461 for (address = start; address < end; address += sz) {
2460 ptep = huge_pte_offset(mm, address); 2462 ptep = huge_pte_offset(mm, address);
2461 if (!ptep) 2463 if (!ptep)
2462 continue; 2464 continue;
2463 2465
2466 ptl = huge_pte_lock(h, mm, ptep);
2464 if (huge_pmd_unshare(mm, &address, ptep)) 2467 if (huge_pmd_unshare(mm, &address, ptep))
2465 continue; 2468 goto unlock;
2466 2469
2467 pte = huge_ptep_get(ptep); 2470 pte = huge_ptep_get(ptep);
2468 if (huge_pte_none(pte)) 2471 if (huge_pte_none(pte))
2469 continue; 2472 goto unlock;
2470 2473
2471 /* 2474 /*
2472 * HWPoisoned hugepage is already unmapped and dropped reference 2475 * HWPoisoned hugepage is already unmapped and dropped reference
2473 */ 2476 */
2474 if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { 2477 if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
2475 huge_pte_clear(mm, address, ptep); 2478 huge_pte_clear(mm, address, ptep);
2476 continue; 2479 goto unlock;
2477 } 2480 }
2478 2481
2479 page = pte_page(pte); 2482 page = pte_page(pte);
@@ -2484,7 +2487,7 @@ again:
2484 */ 2487 */
2485 if (ref_page) { 2488 if (ref_page) {
2486 if (page != ref_page) 2489 if (page != ref_page)
2487 continue; 2490 goto unlock;
2488 2491
2489 /* 2492 /*
2490 * Mark the VMA as having unmapped its page so that 2493 * Mark the VMA as having unmapped its page so that
@@ -2501,13 +2504,18 @@ again:
2501 2504
2502 page_remove_rmap(page); 2505 page_remove_rmap(page);
2503 force_flush = !__tlb_remove_page(tlb, page); 2506 force_flush = !__tlb_remove_page(tlb, page);
2504 if (force_flush) 2507 if (force_flush) {
2508 spin_unlock(ptl);
2505 break; 2509 break;
2510 }
2506 /* Bail out after unmapping reference page if supplied */ 2511 /* Bail out after unmapping reference page if supplied */
2507 if (ref_page) 2512 if (ref_page) {
2513 spin_unlock(ptl);
2508 break; 2514 break;
2515 }
2516unlock:
2517 spin_unlock(ptl);
2509 } 2518 }
2510 spin_unlock(&mm->page_table_lock);
2511 /* 2519 /*
2512 * mmu_gather ran out of room to batch pages, we break out of 2520 * mmu_gather ran out of room to batch pages, we break out of
2513 * the PTE lock to avoid doing the potential expensive TLB invalidate 2521 * the PTE lock to avoid doing the potential expensive TLB invalidate
@@ -2613,7 +2621,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2613 */ 2621 */
2614static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, 2622static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
2615 unsigned long address, pte_t *ptep, pte_t pte, 2623 unsigned long address, pte_t *ptep, pte_t pte,
2616 struct page *pagecache_page) 2624 struct page *pagecache_page, spinlock_t *ptl)
2617{ 2625{
2618 struct hstate *h = hstate_vma(vma); 2626 struct hstate *h = hstate_vma(vma);
2619 struct page *old_page, *new_page; 2627 struct page *old_page, *new_page;
@@ -2647,8 +2655,8 @@ retry_avoidcopy:
2647 2655
2648 page_cache_get(old_page); 2656 page_cache_get(old_page);
2649 2657
2650 /* Drop page_table_lock as buddy allocator may be called */ 2658 /* Drop page table lock as buddy allocator may be called */
2651 spin_unlock(&mm->page_table_lock); 2659 spin_unlock(ptl);
2652 new_page = alloc_huge_page(vma, address, outside_reserve); 2660 new_page = alloc_huge_page(vma, address, outside_reserve);
2653 2661
2654 if (IS_ERR(new_page)) { 2662 if (IS_ERR(new_page)) {
@@ -2666,13 +2674,13 @@ retry_avoidcopy:
2666 BUG_ON(huge_pte_none(pte)); 2674 BUG_ON(huge_pte_none(pte));
2667 if (unmap_ref_private(mm, vma, old_page, address)) { 2675 if (unmap_ref_private(mm, vma, old_page, address)) {
2668 BUG_ON(huge_pte_none(pte)); 2676 BUG_ON(huge_pte_none(pte));
2669 spin_lock(&mm->page_table_lock); 2677 spin_lock(ptl);
2670 ptep = huge_pte_offset(mm, address & huge_page_mask(h)); 2678 ptep = huge_pte_offset(mm, address & huge_page_mask(h));
2671 if (likely(pte_same(huge_ptep_get(ptep), pte))) 2679 if (likely(pte_same(huge_ptep_get(ptep), pte)))
2672 goto retry_avoidcopy; 2680 goto retry_avoidcopy;
2673 /* 2681 /*
2674 * race occurs while re-acquiring page_table_lock, and 2682 * race occurs while re-acquiring page table
2675 * our job is done. 2683 * lock, and our job is done.
2676 */ 2684 */
2677 return 0; 2685 return 0;
2678 } 2686 }
@@ -2680,7 +2688,7 @@ retry_avoidcopy:
2680 } 2688 }
2681 2689
2682 /* Caller expects lock to be held */ 2690 /* Caller expects lock to be held */
2683 spin_lock(&mm->page_table_lock); 2691 spin_lock(ptl);
2684 if (err == -ENOMEM) 2692 if (err == -ENOMEM)
2685 return VM_FAULT_OOM; 2693 return VM_FAULT_OOM;
2686 else 2694 else
@@ -2695,7 +2703,7 @@ retry_avoidcopy:
2695 page_cache_release(new_page); 2703 page_cache_release(new_page);
2696 page_cache_release(old_page); 2704 page_cache_release(old_page);
2697 /* Caller expects lock to be held */ 2705 /* Caller expects lock to be held */
2698 spin_lock(&mm->page_table_lock); 2706 spin_lock(ptl);
2699 return VM_FAULT_OOM; 2707 return VM_FAULT_OOM;
2700 } 2708 }
2701 2709
@@ -2707,10 +2715,10 @@ retry_avoidcopy:
2707 mmun_end = mmun_start + huge_page_size(h); 2715 mmun_end = mmun_start + huge_page_size(h);
2708 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 2716 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2709 /* 2717 /*
2710 * Retake the page_table_lock to check for racing updates 2718 * Retake the page table lock to check for racing updates
2711 * before the page tables are altered 2719 * before the page tables are altered
2712 */ 2720 */
2713 spin_lock(&mm->page_table_lock); 2721 spin_lock(ptl);
2714 ptep = huge_pte_offset(mm, address & huge_page_mask(h)); 2722 ptep = huge_pte_offset(mm, address & huge_page_mask(h));
2715 if (likely(pte_same(huge_ptep_get(ptep), pte))) { 2723 if (likely(pte_same(huge_ptep_get(ptep), pte))) {
2716 ClearPagePrivate(new_page); 2724 ClearPagePrivate(new_page);
@@ -2724,13 +2732,13 @@ retry_avoidcopy:
2724 /* Make the old page be freed below */ 2732 /* Make the old page be freed below */
2725 new_page = old_page; 2733 new_page = old_page;
2726 } 2734 }
2727 spin_unlock(&mm->page_table_lock); 2735 spin_unlock(ptl);
2728 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 2736 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2729 page_cache_release(new_page); 2737 page_cache_release(new_page);
2730 page_cache_release(old_page); 2738 page_cache_release(old_page);
2731 2739
2732 /* Caller expects lock to be held */ 2740 /* Caller expects lock to be held */
2733 spin_lock(&mm->page_table_lock); 2741 spin_lock(ptl);
2734 return 0; 2742 return 0;
2735} 2743}
2736 2744
@@ -2778,6 +2786,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2778 struct page *page; 2786 struct page *page;
2779 struct address_space *mapping; 2787 struct address_space *mapping;
2780 pte_t new_pte; 2788 pte_t new_pte;
2789 spinlock_t *ptl;
2781 2790
2782 /* 2791 /*
2783 * Currently, we are forced to kill the process in the event the 2792 * Currently, we are forced to kill the process in the event the
@@ -2864,7 +2873,8 @@ retry:
2864 goto backout_unlocked; 2873 goto backout_unlocked;
2865 } 2874 }
2866 2875
2867 spin_lock(&mm->page_table_lock); 2876 ptl = huge_pte_lockptr(h, mm, ptep);
2877 spin_lock(ptl);
2868 size = i_size_read(mapping->host) >> huge_page_shift(h); 2878 size = i_size_read(mapping->host) >> huge_page_shift(h);
2869 if (idx >= size) 2879 if (idx >= size)
2870 goto backout; 2880 goto backout;
@@ -2885,16 +2895,16 @@ retry:
2885 2895
2886 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { 2896 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
2887 /* Optimization, do the COW without a second fault */ 2897 /* Optimization, do the COW without a second fault */
2888 ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page); 2898 ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl);
2889 } 2899 }
2890 2900
2891 spin_unlock(&mm->page_table_lock); 2901 spin_unlock(ptl);
2892 unlock_page(page); 2902 unlock_page(page);
2893out: 2903out:
2894 return ret; 2904 return ret;
2895 2905
2896backout: 2906backout:
2897 spin_unlock(&mm->page_table_lock); 2907 spin_unlock(ptl);
2898backout_unlocked: 2908backout_unlocked:
2899 unlock_page(page); 2909 unlock_page(page);
2900 put_page(page); 2910 put_page(page);
@@ -2906,6 +2916,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2906{ 2916{
2907 pte_t *ptep; 2917 pte_t *ptep;
2908 pte_t entry; 2918 pte_t entry;
2919 spinlock_t *ptl;
2909 int ret; 2920 int ret;
2910 struct page *page = NULL; 2921 struct page *page = NULL;
2911 struct page *pagecache_page = NULL; 2922 struct page *pagecache_page = NULL;
@@ -2918,7 +2929,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2918 if (ptep) { 2929 if (ptep) {
2919 entry = huge_ptep_get(ptep); 2930 entry = huge_ptep_get(ptep);
2920 if (unlikely(is_hugetlb_entry_migration(entry))) { 2931 if (unlikely(is_hugetlb_entry_migration(entry))) {
2921 migration_entry_wait_huge(mm, ptep); 2932 migration_entry_wait_huge(vma, mm, ptep);
2922 return 0; 2933 return 0;
2923 } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) 2934 } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
2924 return VM_FAULT_HWPOISON_LARGE | 2935 return VM_FAULT_HWPOISON_LARGE |
@@ -2974,17 +2985,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2974 if (page != pagecache_page) 2985 if (page != pagecache_page)
2975 lock_page(page); 2986 lock_page(page);
2976 2987
2977 spin_lock(&mm->page_table_lock); 2988 ptl = huge_pte_lockptr(h, mm, ptep);
2989 spin_lock(ptl);
2978 /* Check for a racing update before calling hugetlb_cow */ 2990 /* Check for a racing update before calling hugetlb_cow */
2979 if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) 2991 if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
2980 goto out_page_table_lock; 2992 goto out_ptl;
2981 2993
2982 2994
2983 if (flags & FAULT_FLAG_WRITE) { 2995 if (flags & FAULT_FLAG_WRITE) {
2984 if (!huge_pte_write(entry)) { 2996 if (!huge_pte_write(entry)) {
2985 ret = hugetlb_cow(mm, vma, address, ptep, entry, 2997 ret = hugetlb_cow(mm, vma, address, ptep, entry,
2986 pagecache_page); 2998 pagecache_page, ptl);
2987 goto out_page_table_lock; 2999 goto out_ptl;
2988 } 3000 }
2989 entry = huge_pte_mkdirty(entry); 3001 entry = huge_pte_mkdirty(entry);
2990 } 3002 }
@@ -2993,8 +3005,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2993 flags & FAULT_FLAG_WRITE)) 3005 flags & FAULT_FLAG_WRITE))
2994 update_mmu_cache(vma, address, ptep); 3006 update_mmu_cache(vma, address, ptep);
2995 3007
2996out_page_table_lock: 3008out_ptl:
2997 spin_unlock(&mm->page_table_lock); 3009 spin_unlock(ptl);
2998 3010
2999 if (pagecache_page) { 3011 if (pagecache_page) {
3000 unlock_page(pagecache_page); 3012 unlock_page(pagecache_page);
@@ -3020,9 +3032,9 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
3020 unsigned long remainder = *nr_pages; 3032 unsigned long remainder = *nr_pages;
3021 struct hstate *h = hstate_vma(vma); 3033 struct hstate *h = hstate_vma(vma);
3022 3034
3023 spin_lock(&mm->page_table_lock);
3024 while (vaddr < vma->vm_end && remainder) { 3035 while (vaddr < vma->vm_end && remainder) {
3025 pte_t *pte; 3036 pte_t *pte;
3037 spinlock_t *ptl = NULL;
3026 int absent; 3038 int absent;
3027 struct page *page; 3039 struct page *page;
3028 3040
@@ -3030,8 +3042,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
3030 * Some archs (sparc64, sh*) have multiple pte_ts to 3042 * Some archs (sparc64, sh*) have multiple pte_ts to
3031 * each hugepage. We have to make sure we get the 3043 * each hugepage. We have to make sure we get the
3032 * first, for the page indexing below to work. 3044 * first, for the page indexing below to work.
3045 *
3046 * Note that page table lock is not held when pte is null.
3033 */ 3047 */
3034 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); 3048 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
3049 if (pte)
3050 ptl = huge_pte_lock(h, mm, pte);
3035 absent = !pte || huge_pte_none(huge_ptep_get(pte)); 3051 absent = !pte || huge_pte_none(huge_ptep_get(pte));
3036 3052
3037 /* 3053 /*
@@ -3043,6 +3059,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
3043 */ 3059 */
3044 if (absent && (flags & FOLL_DUMP) && 3060 if (absent && (flags & FOLL_DUMP) &&
3045 !hugetlbfs_pagecache_present(h, vma, vaddr)) { 3061 !hugetlbfs_pagecache_present(h, vma, vaddr)) {
3062 if (pte)
3063 spin_unlock(ptl);
3046 remainder = 0; 3064 remainder = 0;
3047 break; 3065 break;
3048 } 3066 }
@@ -3062,10 +3080,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
3062 !huge_pte_write(huge_ptep_get(pte)))) { 3080 !huge_pte_write(huge_ptep_get(pte)))) {
3063 int ret; 3081 int ret;
3064 3082
3065 spin_unlock(&mm->page_table_lock); 3083 if (pte)
3084 spin_unlock(ptl);
3066 ret = hugetlb_fault(mm, vma, vaddr, 3085 ret = hugetlb_fault(mm, vma, vaddr,
3067 (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); 3086 (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
3068 spin_lock(&mm->page_table_lock);
3069 if (!(ret & VM_FAULT_ERROR)) 3087 if (!(ret & VM_FAULT_ERROR))
3070 continue; 3088 continue;
3071 3089
@@ -3096,8 +3114,8 @@ same_page:
3096 */ 3114 */
3097 goto same_page; 3115 goto same_page;
3098 } 3116 }
3117 spin_unlock(ptl);
3099 } 3118 }
3100 spin_unlock(&mm->page_table_lock);
3101 *nr_pages = remainder; 3119 *nr_pages = remainder;
3102 *position = vaddr; 3120 *position = vaddr;
3103 3121
@@ -3118,13 +3136,15 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
3118 flush_cache_range(vma, address, end); 3136 flush_cache_range(vma, address, end);
3119 3137
3120 mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); 3138 mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
3121 spin_lock(&mm->page_table_lock);
3122 for (; address < end; address += huge_page_size(h)) { 3139 for (; address < end; address += huge_page_size(h)) {
3140 spinlock_t *ptl;
3123 ptep = huge_pte_offset(mm, address); 3141 ptep = huge_pte_offset(mm, address);
3124 if (!ptep) 3142 if (!ptep)
3125 continue; 3143 continue;
3144 ptl = huge_pte_lock(h, mm, ptep);
3126 if (huge_pmd_unshare(mm, &address, ptep)) { 3145 if (huge_pmd_unshare(mm, &address, ptep)) {
3127 pages++; 3146 pages++;
3147 spin_unlock(ptl);
3128 continue; 3148 continue;
3129 } 3149 }
3130 if (!huge_pte_none(huge_ptep_get(ptep))) { 3150 if (!huge_pte_none(huge_ptep_get(ptep))) {
@@ -3134,8 +3154,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
3134 set_huge_pte_at(mm, address, ptep, pte); 3154 set_huge_pte_at(mm, address, ptep, pte);
3135 pages++; 3155 pages++;
3136 } 3156 }
3157 spin_unlock(ptl);
3137 } 3158 }
3138 spin_unlock(&mm->page_table_lock);
3139 /* 3159 /*
3140 * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare 3160 * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
3141 * may have cleared our pud entry and done put_page on the page table: 3161 * may have cleared our pud entry and done put_page on the page table:
@@ -3298,6 +3318,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
3298 unsigned long saddr; 3318 unsigned long saddr;
3299 pte_t *spte = NULL; 3319 pte_t *spte = NULL;
3300 pte_t *pte; 3320 pte_t *pte;
3321 spinlock_t *ptl;
3301 3322
3302 if (!vma_shareable(vma, addr)) 3323 if (!vma_shareable(vma, addr))
3303 return (pte_t *)pmd_alloc(mm, pud, addr); 3324 return (pte_t *)pmd_alloc(mm, pud, addr);
@@ -3320,13 +3341,14 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
3320 if (!spte) 3341 if (!spte)
3321 goto out; 3342 goto out;
3322 3343
3323 spin_lock(&mm->page_table_lock); 3344 ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte);
3345 spin_lock(ptl);
3324 if (pud_none(*pud)) 3346 if (pud_none(*pud))
3325 pud_populate(mm, pud, 3347 pud_populate(mm, pud,
3326 (pmd_t *)((unsigned long)spte & PAGE_MASK)); 3348 (pmd_t *)((unsigned long)spte & PAGE_MASK));
3327 else 3349 else
3328 put_page(virt_to_page(spte)); 3350 put_page(virt_to_page(spte));
3329 spin_unlock(&mm->page_table_lock); 3351 spin_unlock(ptl);
3330out: 3352out:
3331 pte = (pte_t *)pmd_alloc(mm, pud, addr); 3353 pte = (pte_t *)pmd_alloc(mm, pud, addr);
3332 mutex_unlock(&mapping->i_mmap_mutex); 3354 mutex_unlock(&mapping->i_mmap_mutex);
@@ -3340,7 +3362,7 @@ out:
3340 * indicated by page_count > 1, unmap is achieved by clearing pud and 3362 * indicated by page_count > 1, unmap is achieved by clearing pud and
3341 * decrementing the ref count. If count == 1, the pte page is not shared. 3363 * decrementing the ref count. If count == 1, the pte page is not shared.
3342 * 3364 *
3343 * called with vma->vm_mm->page_table_lock held. 3365 * called with page table lock held.
3344 * 3366 *
3345 * returns: 1 successfully unmapped a shared pte page 3367 * returns: 1 successfully unmapped a shared pte page
3346 * 0 the underlying pte page is not shared, or it is the last user 3368 * 0 the underlying pte page is not shared, or it is the last user
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4cc19f6ab6c6..c4403cdf3433 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -525,8 +525,9 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
525#ifdef CONFIG_HUGETLB_PAGE 525#ifdef CONFIG_HUGETLB_PAGE
526 int nid; 526 int nid;
527 struct page *page; 527 struct page *page;
528 spinlock_t *ptl;
528 529
529 spin_lock(&vma->vm_mm->page_table_lock); 530 ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd);
530 page = pte_page(huge_ptep_get((pte_t *)pmd)); 531 page = pte_page(huge_ptep_get((pte_t *)pmd));
531 nid = page_to_nid(page); 532 nid = page_to_nid(page);
532 if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) 533 if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
@@ -536,7 +537,7 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
536 (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) 537 (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
537 isolate_huge_page(page, private); 538 isolate_huge_page(page, private);
538unlock: 539unlock:
539 spin_unlock(&vma->vm_mm->page_table_lock); 540 spin_unlock(ptl);
540#else 541#else
541 BUG(); 542 BUG();
542#endif 543#endif
diff --git a/mm/migrate.c b/mm/migrate.c
index dfc8300ecbb2..01f45cefa4cd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -130,7 +130,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
130 ptep = huge_pte_offset(mm, addr); 130 ptep = huge_pte_offset(mm, addr);
131 if (!ptep) 131 if (!ptep)
132 goto out; 132 goto out;
133 ptl = &mm->page_table_lock; 133 ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
134 } else { 134 } else {
135 pmd = mm_find_pmd(mm, addr); 135 pmd = mm_find_pmd(mm, addr);
136 if (!pmd) 136 if (!pmd)
@@ -249,9 +249,10 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
249 __migration_entry_wait(mm, ptep, ptl); 249 __migration_entry_wait(mm, ptep, ptl);
250} 250}
251 251
252void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) 252void migration_entry_wait_huge(struct vm_area_struct *vma,
253 struct mm_struct *mm, pte_t *pte)
253{ 254{
254 spinlock_t *ptl = &(mm)->page_table_lock; 255 spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
255 __migration_entry_wait(mm, pte, ptl); 256 __migration_entry_wait(mm, pte, ptl);
256} 257}
257 258
diff --git a/mm/rmap.c b/mm/rmap.c
index b59d741dcf65..55c8b8dc9ffb 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -601,7 +601,7 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
601 601
602 if (unlikely(PageHuge(page))) { 602 if (unlikely(PageHuge(page))) {
603 pte = huge_pte_offset(mm, address); 603 pte = huge_pte_offset(mm, address);
604 ptl = &mm->page_table_lock; 604 ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
605 goto check; 605 goto check;
606 } 606 }
607 607