diff options
author | Mike Kravetz <mike.kravetz@oracle.com> | 2018-10-05 18:51:33 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2018-10-05 19:32:04 -0400 |
commit | dff11abe280b47c21b804a8ace318e0638bb9a49 (patch) | |
tree | db5156b3f8602967aba6949c3ef389afd0055ac2 | |
parent | 017b1660df89f5fb4bfe66c34e35f7d2031100c7 (diff) |
hugetlb: take PMD sharing into account when flushing tlb/caches
When fixing an issue with PMD sharing and migration, it was discovered via
code inspection that other callers of huge_pmd_unshare potentially have an
issue with cache and tlb flushing.
Use the routine adjust_range_if_pmd_sharing_possible() to calculate worst
case ranges for mmu notifiers. Ensure that this range is flushed if
huge_pmd_unshare succeeds and unmaps a PUD_SUZE area.
Link: http://lkml.kernel.org/r/20180823205917.16297-3-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | mm/hugetlb.c | 53 |
1 files changed, 44 insertions, 9 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b903d746e132..5c390f5a5207 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
3326 | struct page *page; | 3326 | struct page *page; |
3327 | struct hstate *h = hstate_vma(vma); | 3327 | struct hstate *h = hstate_vma(vma); |
3328 | unsigned long sz = huge_page_size(h); | 3328 | unsigned long sz = huge_page_size(h); |
3329 | const unsigned long mmun_start = start; /* For mmu_notifiers */ | 3329 | unsigned long mmun_start = start; /* For mmu_notifiers */ |
3330 | const unsigned long mmun_end = end; /* For mmu_notifiers */ | 3330 | unsigned long mmun_end = end; /* For mmu_notifiers */ |
3331 | 3331 | ||
3332 | WARN_ON(!is_vm_hugetlb_page(vma)); | 3332 | WARN_ON(!is_vm_hugetlb_page(vma)); |
3333 | BUG_ON(start & ~huge_page_mask(h)); | 3333 | BUG_ON(start & ~huge_page_mask(h)); |
@@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
3339 | */ | 3339 | */ |
3340 | tlb_remove_check_page_size_change(tlb, sz); | 3340 | tlb_remove_check_page_size_change(tlb, sz); |
3341 | tlb_start_vma(tlb, vma); | 3341 | tlb_start_vma(tlb, vma); |
3342 | |||
3343 | /* | ||
3344 | * If sharing possible, alert mmu notifiers of worst case. | ||
3345 | */ | ||
3346 | adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end); | ||
3342 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 3347 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
3343 | address = start; | 3348 | address = start; |
3344 | for (; address < end; address += sz) { | 3349 | for (; address < end; address += sz) { |
@@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
3349 | ptl = huge_pte_lock(h, mm, ptep); | 3354 | ptl = huge_pte_lock(h, mm, ptep); |
3350 | if (huge_pmd_unshare(mm, &address, ptep)) { | 3355 | if (huge_pmd_unshare(mm, &address, ptep)) { |
3351 | spin_unlock(ptl); | 3356 | spin_unlock(ptl); |
3357 | /* | ||
3358 | * We just unmapped a page of PMDs by clearing a PUD. | ||
3359 | * The caller's TLB flush range should cover this area. | ||
3360 | */ | ||
3352 | continue; | 3361 | continue; |
3353 | } | 3362 | } |
3354 | 3363 | ||
@@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
3431 | { | 3440 | { |
3432 | struct mm_struct *mm; | 3441 | struct mm_struct *mm; |
3433 | struct mmu_gather tlb; | 3442 | struct mmu_gather tlb; |
3443 | unsigned long tlb_start = start; | ||
3444 | unsigned long tlb_end = end; | ||
3445 | |||
3446 | /* | ||
3447 | * If shared PMDs were possibly used within this vma range, adjust | ||
3448 | * start/end for worst case tlb flushing. | ||
3449 | * Note that we can not be sure if PMDs are shared until we try to | ||
3450 | * unmap pages. However, we want to make sure TLB flushing covers | ||
3451 | * the largest possible range. | ||
3452 | */ | ||
3453 | adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end); | ||
3434 | 3454 | ||
3435 | mm = vma->vm_mm; | 3455 | mm = vma->vm_mm; |
3436 | 3456 | ||
3437 | tlb_gather_mmu(&tlb, mm, start, end); | 3457 | tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end); |
3438 | __unmap_hugepage_range(&tlb, vma, start, end, ref_page); | 3458 | __unmap_hugepage_range(&tlb, vma, start, end, ref_page); |
3439 | tlb_finish_mmu(&tlb, start, end); | 3459 | tlb_finish_mmu(&tlb, tlb_start, tlb_end); |
3440 | } | 3460 | } |
3441 | 3461 | ||
3442 | /* | 3462 | /* |
@@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
4298 | pte_t pte; | 4318 | pte_t pte; |
4299 | struct hstate *h = hstate_vma(vma); | 4319 | struct hstate *h = hstate_vma(vma); |
4300 | unsigned long pages = 0; | 4320 | unsigned long pages = 0; |
4321 | unsigned long f_start = start; | ||
4322 | unsigned long f_end = end; | ||
4323 | bool shared_pmd = false; | ||
4324 | |||
4325 | /* | ||
4326 | * In the case of shared PMDs, the area to flush could be beyond | ||
4327 | * start/end. Set f_start/f_end to cover the maximum possible | ||
4328 | * range if PMD sharing is possible. | ||
4329 | */ | ||
4330 | adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end); | ||
4301 | 4331 | ||
4302 | BUG_ON(address >= end); | 4332 | BUG_ON(address >= end); |
4303 | flush_cache_range(vma, address, end); | 4333 | flush_cache_range(vma, f_start, f_end); |
4304 | 4334 | ||
4305 | mmu_notifier_invalidate_range_start(mm, start, end); | 4335 | mmu_notifier_invalidate_range_start(mm, f_start, f_end); |
4306 | i_mmap_lock_write(vma->vm_file->f_mapping); | 4336 | i_mmap_lock_write(vma->vm_file->f_mapping); |
4307 | for (; address < end; address += huge_page_size(h)) { | 4337 | for (; address < end; address += huge_page_size(h)) { |
4308 | spinlock_t *ptl; | 4338 | spinlock_t *ptl; |
@@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
4313 | if (huge_pmd_unshare(mm, &address, ptep)) { | 4343 | if (huge_pmd_unshare(mm, &address, ptep)) { |
4314 | pages++; | 4344 | pages++; |
4315 | spin_unlock(ptl); | 4345 | spin_unlock(ptl); |
4346 | shared_pmd = true; | ||
4316 | continue; | 4347 | continue; |
4317 | } | 4348 | } |
4318 | pte = huge_ptep_get(ptep); | 4349 | pte = huge_ptep_get(ptep); |
@@ -4348,9 +4379,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
4348 | * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare | 4379 | * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare |
4349 | * may have cleared our pud entry and done put_page on the page table: | 4380 | * may have cleared our pud entry and done put_page on the page table: |
4350 | * once we release i_mmap_rwsem, another task can do the final put_page | 4381 | * once we release i_mmap_rwsem, another task can do the final put_page |
4351 | * and that page table be reused and filled with junk. | 4382 | * and that page table be reused and filled with junk. If we actually |
4383 | * did unshare a page of pmds, flush the range corresponding to the pud. | ||
4352 | */ | 4384 | */ |
4353 | flush_hugetlb_tlb_range(vma, start, end); | 4385 | if (shared_pmd) |
4386 | flush_hugetlb_tlb_range(vma, f_start, f_end); | ||
4387 | else | ||
4388 | flush_hugetlb_tlb_range(vma, start, end); | ||
4354 | /* | 4389 | /* |
4355 | * No need to call mmu_notifier_invalidate_range() we are downgrading | 4390 | * No need to call mmu_notifier_invalidate_range() we are downgrading |
4356 | * page table protection not changing it to point to a new page. | 4391 | * page table protection not changing it to point to a new page. |
@@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
4358 | * See Documentation/vm/mmu_notifier.rst | 4393 | * See Documentation/vm/mmu_notifier.rst |
4359 | */ | 4394 | */ |
4360 | i_mmap_unlock_write(vma->vm_file->f_mapping); | 4395 | i_mmap_unlock_write(vma->vm_file->f_mapping); |
4361 | mmu_notifier_invalidate_range_end(mm, start, end); | 4396 | mmu_notifier_invalidate_range_end(mm, f_start, f_end); |
4362 | 4397 | ||
4363 | return pages << h->order; | 4398 | return pages << h->order; |
4364 | } | 4399 | } |