aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Kravetz <mike.kravetz@oracle.com>2018-10-05 18:51:33 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-10-05 19:32:04 -0400
commitdff11abe280b47c21b804a8ace318e0638bb9a49 (patch)
treedb5156b3f8602967aba6949c3ef389afd0055ac2
parent017b1660df89f5fb4bfe66c34e35f7d2031100c7 (diff)
hugetlb: take PMD sharing into account when flushing tlb/caches
When fixing an issue with PMD sharing and migration, it was discovered via code inspection that other callers of huge_pmd_unshare potentially have an issue with cache and tlb flushing. Use the routine adjust_range_if_pmd_sharing_possible() to calculate worst case ranges for mmu notifiers. Ensure that this range is flushed if huge_pmd_unshare succeeds and unmaps a PUD_SUZE area. Link: http://lkml.kernel.org/r/20180823205917.16297-3-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Michal Hocko <mhocko@kernel.org> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--mm/hugetlb.c53
1 files changed, 44 insertions, 9 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b903d746e132..5c390f5a5207 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
3326 struct page *page; 3326 struct page *page;
3327 struct hstate *h = hstate_vma(vma); 3327 struct hstate *h = hstate_vma(vma);
3328 unsigned long sz = huge_page_size(h); 3328 unsigned long sz = huge_page_size(h);
3329 const unsigned long mmun_start = start; /* For mmu_notifiers */ 3329 unsigned long mmun_start = start; /* For mmu_notifiers */
3330 const unsigned long mmun_end = end; /* For mmu_notifiers */ 3330 unsigned long mmun_end = end; /* For mmu_notifiers */
3331 3331
3332 WARN_ON(!is_vm_hugetlb_page(vma)); 3332 WARN_ON(!is_vm_hugetlb_page(vma));
3333 BUG_ON(start & ~huge_page_mask(h)); 3333 BUG_ON(start & ~huge_page_mask(h));
@@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
3339 */ 3339 */
3340 tlb_remove_check_page_size_change(tlb, sz); 3340 tlb_remove_check_page_size_change(tlb, sz);
3341 tlb_start_vma(tlb, vma); 3341 tlb_start_vma(tlb, vma);
3342
3343 /*
3344 * If sharing possible, alert mmu notifiers of worst case.
3345 */
3346 adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
3342 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 3347 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
3343 address = start; 3348 address = start;
3344 for (; address < end; address += sz) { 3349 for (; address < end; address += sz) {
@@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
3349 ptl = huge_pte_lock(h, mm, ptep); 3354 ptl = huge_pte_lock(h, mm, ptep);
3350 if (huge_pmd_unshare(mm, &address, ptep)) { 3355 if (huge_pmd_unshare(mm, &address, ptep)) {
3351 spin_unlock(ptl); 3356 spin_unlock(ptl);
3357 /*
3358 * We just unmapped a page of PMDs by clearing a PUD.
3359 * The caller's TLB flush range should cover this area.
3360 */
3352 continue; 3361 continue;
3353 } 3362 }
3354 3363
@@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
3431{ 3440{
3432 struct mm_struct *mm; 3441 struct mm_struct *mm;
3433 struct mmu_gather tlb; 3442 struct mmu_gather tlb;
3443 unsigned long tlb_start = start;
3444 unsigned long tlb_end = end;
3445
3446 /*
3447 * If shared PMDs were possibly used within this vma range, adjust
3448 * start/end for worst case tlb flushing.
3449 * Note that we can not be sure if PMDs are shared until we try to
3450 * unmap pages. However, we want to make sure TLB flushing covers
3451 * the largest possible range.
3452 */
3453 adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);
3434 3454
3435 mm = vma->vm_mm; 3455 mm = vma->vm_mm;
3436 3456
3437 tlb_gather_mmu(&tlb, mm, start, end); 3457 tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
3438 __unmap_hugepage_range(&tlb, vma, start, end, ref_page); 3458 __unmap_hugepage_range(&tlb, vma, start, end, ref_page);
3439 tlb_finish_mmu(&tlb, start, end); 3459 tlb_finish_mmu(&tlb, tlb_start, tlb_end);
3440} 3460}
3441 3461
3442/* 3462/*
@@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
4298 pte_t pte; 4318 pte_t pte;
4299 struct hstate *h = hstate_vma(vma); 4319 struct hstate *h = hstate_vma(vma);
4300 unsigned long pages = 0; 4320 unsigned long pages = 0;
4321 unsigned long f_start = start;
4322 unsigned long f_end = end;
4323 bool shared_pmd = false;
4324
4325 /*
4326 * In the case of shared PMDs, the area to flush could be beyond
4327 * start/end. Set f_start/f_end to cover the maximum possible
4328 * range if PMD sharing is possible.
4329 */
4330 adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);
4301 4331
4302 BUG_ON(address >= end); 4332 BUG_ON(address >= end);
4303 flush_cache_range(vma, address, end); 4333 flush_cache_range(vma, f_start, f_end);
4304 4334
4305 mmu_notifier_invalidate_range_start(mm, start, end); 4335 mmu_notifier_invalidate_range_start(mm, f_start, f_end);
4306 i_mmap_lock_write(vma->vm_file->f_mapping); 4336 i_mmap_lock_write(vma->vm_file->f_mapping);
4307 for (; address < end; address += huge_page_size(h)) { 4337 for (; address < end; address += huge_page_size(h)) {
4308 spinlock_t *ptl; 4338 spinlock_t *ptl;
@@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
4313 if (huge_pmd_unshare(mm, &address, ptep)) { 4343 if (huge_pmd_unshare(mm, &address, ptep)) {
4314 pages++; 4344 pages++;
4315 spin_unlock(ptl); 4345 spin_unlock(ptl);
4346 shared_pmd = true;
4316 continue; 4347 continue;
4317 } 4348 }
4318 pte = huge_ptep_get(ptep); 4349 pte = huge_ptep_get(ptep);
@@ -4348,9 +4379,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
4348 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare 4379 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
4349 * may have cleared our pud entry and done put_page on the page table: 4380 * may have cleared our pud entry and done put_page on the page table:
4350 * once we release i_mmap_rwsem, another task can do the final put_page 4381 * once we release i_mmap_rwsem, another task can do the final put_page
4351 * and that page table be reused and filled with junk. 4382 * and that page table be reused and filled with junk. If we actually
4383 * did unshare a page of pmds, flush the range corresponding to the pud.
4352 */ 4384 */
4353 flush_hugetlb_tlb_range(vma, start, end); 4385 if (shared_pmd)
4386 flush_hugetlb_tlb_range(vma, f_start, f_end);
4387 else
4388 flush_hugetlb_tlb_range(vma, start, end);
4354 /* 4389 /*
4355 * No need to call mmu_notifier_invalidate_range() we are downgrading 4390 * No need to call mmu_notifier_invalidate_range() we are downgrading
4356 * page table protection not changing it to point to a new page. 4391 * page table protection not changing it to point to a new page.
@@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
4358 * See Documentation/vm/mmu_notifier.rst 4393 * See Documentation/vm/mmu_notifier.rst
4359 */ 4394 */
4360 i_mmap_unlock_write(vma->vm_file->f_mapping); 4395 i_mmap_unlock_write(vma->vm_file->f_mapping);
4361 mmu_notifier_invalidate_range_end(mm, start, end); 4396 mmu_notifier_invalidate_range_end(mm, f_start, f_end);
4362 4397
4363 return pages << h->order; 4398 return pages << h->order;
4364} 4399}