aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c90
1 files changed, 79 insertions, 11 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3c21775f196b..5c390f5a5207 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
3326 struct page *page; 3326 struct page *page;
3327 struct hstate *h = hstate_vma(vma); 3327 struct hstate *h = hstate_vma(vma);
3328 unsigned long sz = huge_page_size(h); 3328 unsigned long sz = huge_page_size(h);
3329 const unsigned long mmun_start = start; /* For mmu_notifiers */ 3329 unsigned long mmun_start = start; /* For mmu_notifiers */
3330 const unsigned long mmun_end = end; /* For mmu_notifiers */ 3330 unsigned long mmun_end = end; /* For mmu_notifiers */
3331 3331
3332 WARN_ON(!is_vm_hugetlb_page(vma)); 3332 WARN_ON(!is_vm_hugetlb_page(vma));
3333 BUG_ON(start & ~huge_page_mask(h)); 3333 BUG_ON(start & ~huge_page_mask(h));
@@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
3339 */ 3339 */
3340 tlb_remove_check_page_size_change(tlb, sz); 3340 tlb_remove_check_page_size_change(tlb, sz);
3341 tlb_start_vma(tlb, vma); 3341 tlb_start_vma(tlb, vma);
3342
3343 /*
3344 * If sharing possible, alert mmu notifiers of worst case.
3345 */
3346 adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
3342 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 3347 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
3343 address = start; 3348 address = start;
3344 for (; address < end; address += sz) { 3349 for (; address < end; address += sz) {
@@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
3349 ptl = huge_pte_lock(h, mm, ptep); 3354 ptl = huge_pte_lock(h, mm, ptep);
3350 if (huge_pmd_unshare(mm, &address, ptep)) { 3355 if (huge_pmd_unshare(mm, &address, ptep)) {
3351 spin_unlock(ptl); 3356 spin_unlock(ptl);
3357 /*
3358 * We just unmapped a page of PMDs by clearing a PUD.
3359 * The caller's TLB flush range should cover this area.
3360 */
3352 continue; 3361 continue;
3353 } 3362 }
3354 3363
@@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
3431{ 3440{
3432 struct mm_struct *mm; 3441 struct mm_struct *mm;
3433 struct mmu_gather tlb; 3442 struct mmu_gather tlb;
3443 unsigned long tlb_start = start;
3444 unsigned long tlb_end = end;
3445
3446 /*
3447 * If shared PMDs were possibly used within this vma range, adjust
3448 * start/end for worst case tlb flushing.
3449 * Note that we can not be sure if PMDs are shared until we try to
3450 * unmap pages. However, we want to make sure TLB flushing covers
3451 * the largest possible range.
3452 */
3453 adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);
3434 3454
3435 mm = vma->vm_mm; 3455 mm = vma->vm_mm;
3436 3456
3437 tlb_gather_mmu(&tlb, mm, start, end); 3457 tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
3438 __unmap_hugepage_range(&tlb, vma, start, end, ref_page); 3458 __unmap_hugepage_range(&tlb, vma, start, end, ref_page);
3439 tlb_finish_mmu(&tlb, start, end); 3459 tlb_finish_mmu(&tlb, tlb_start, tlb_end);
3440} 3460}
3441 3461
3442/* 3462/*
@@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
4298 pte_t pte; 4318 pte_t pte;
4299 struct hstate *h = hstate_vma(vma); 4319 struct hstate *h = hstate_vma(vma);
4300 unsigned long pages = 0; 4320 unsigned long pages = 0;
4321 unsigned long f_start = start;
4322 unsigned long f_end = end;
4323 bool shared_pmd = false;
4324
4325 /*
4326 * In the case of shared PMDs, the area to flush could be beyond
4327 * start/end. Set f_start/f_end to cover the maximum possible
4328 * range if PMD sharing is possible.
4329 */
4330 adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);
4301 4331
4302 BUG_ON(address >= end); 4332 BUG_ON(address >= end);
4303 flush_cache_range(vma, address, end); 4333 flush_cache_range(vma, f_start, f_end);
4304 4334
4305 mmu_notifier_invalidate_range_start(mm, start, end); 4335 mmu_notifier_invalidate_range_start(mm, f_start, f_end);
4306 i_mmap_lock_write(vma->vm_file->f_mapping); 4336 i_mmap_lock_write(vma->vm_file->f_mapping);
4307 for (; address < end; address += huge_page_size(h)) { 4337 for (; address < end; address += huge_page_size(h)) {
4308 spinlock_t *ptl; 4338 spinlock_t *ptl;
@@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
4313 if (huge_pmd_unshare(mm, &address, ptep)) { 4343 if (huge_pmd_unshare(mm, &address, ptep)) {
4314 pages++; 4344 pages++;
4315 spin_unlock(ptl); 4345 spin_unlock(ptl);
4346 shared_pmd = true;
4316 continue; 4347 continue;
4317 } 4348 }
4318 pte = huge_ptep_get(ptep); 4349 pte = huge_ptep_get(ptep);
@@ -4348,9 +4379,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
4348 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare 4379 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
4349 * may have cleared our pud entry and done put_page on the page table: 4380 * may have cleared our pud entry and done put_page on the page table:
4350 * once we release i_mmap_rwsem, another task can do the final put_page 4381 * once we release i_mmap_rwsem, another task can do the final put_page
4351 * and that page table be reused and filled with junk. 4382 * and that page table be reused and filled with junk. If we actually
4383 * did unshare a page of pmds, flush the range corresponding to the pud.
4352 */ 4384 */
4353 flush_hugetlb_tlb_range(vma, start, end); 4385 if (shared_pmd)
4386 flush_hugetlb_tlb_range(vma, f_start, f_end);
4387 else
4388 flush_hugetlb_tlb_range(vma, start, end);
4354 /* 4389 /*
4355 * No need to call mmu_notifier_invalidate_range() we are downgrading 4390 * No need to call mmu_notifier_invalidate_range() we are downgrading
4356 * page table protection not changing it to point to a new page. 4391 * page table protection not changing it to point to a new page.
@@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
4358 * See Documentation/vm/mmu_notifier.rst 4393 * See Documentation/vm/mmu_notifier.rst
4359 */ 4394 */
4360 i_mmap_unlock_write(vma->vm_file->f_mapping); 4395 i_mmap_unlock_write(vma->vm_file->f_mapping);
4361 mmu_notifier_invalidate_range_end(mm, start, end); 4396 mmu_notifier_invalidate_range_end(mm, f_start, f_end);
4362 4397
4363 return pages << h->order; 4398 return pages << h->order;
4364} 4399}
@@ -4545,13 +4580,41 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
4545 /* 4580 /*
4546 * check on proper vm_flags and page table alignment 4581 * check on proper vm_flags and page table alignment
4547 */ 4582 */
4548 if (vma->vm_flags & VM_MAYSHARE && 4583 if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
4549 vma->vm_start <= base && end <= vma->vm_end)
4550 return true; 4584 return true;
4551 return false; 4585 return false;
4552} 4586}
4553 4587
4554/* 4588/*
4589 * Determine if start,end range within vma could be mapped by shared pmd.
4590 * If yes, adjust start and end to cover range associated with possible
4591 * shared pmd mappings.
4592 */
4593void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
4594 unsigned long *start, unsigned long *end)
4595{
4596 unsigned long check_addr = *start;
4597
4598 if (!(vma->vm_flags & VM_MAYSHARE))
4599 return;
4600
4601 for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
4602 unsigned long a_start = check_addr & PUD_MASK;
4603 unsigned long a_end = a_start + PUD_SIZE;
4604
4605 /*
4606 * If sharing is possible, adjust start/end if necessary.
4607 */
4608 if (range_in_vma(vma, a_start, a_end)) {
4609 if (a_start < *start)
4610 *start = a_start;
4611 if (a_end > *end)
4612 *end = a_end;
4613 }
4614 }
4615}
4616
4617/*
4555 * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() 4618 * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
4556 * and returns the corresponding pte. While this is not necessary for the 4619 * and returns the corresponding pte. While this is not necessary for the
4557 * !shared pmd case because we can allocate the pmd later as well, it makes the 4620 * !shared pmd case because we can allocate the pmd later as well, it makes the
@@ -4648,6 +4711,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
4648{ 4711{
4649 return 0; 4712 return 0;
4650} 4713}
4714
4715void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
4716 unsigned long *start, unsigned long *end)
4717{
4718}
4651#define want_pmd_share() (0) 4719#define want_pmd_share() (0)
4652#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ 4720#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
4653 4721