diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 90 |
1 files changed, 79 insertions, 11 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3c21775f196b..5c390f5a5207 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
3326 | struct page *page; | 3326 | struct page *page; |
3327 | struct hstate *h = hstate_vma(vma); | 3327 | struct hstate *h = hstate_vma(vma); |
3328 | unsigned long sz = huge_page_size(h); | 3328 | unsigned long sz = huge_page_size(h); |
3329 | const unsigned long mmun_start = start; /* For mmu_notifiers */ | 3329 | unsigned long mmun_start = start; /* For mmu_notifiers */ |
3330 | const unsigned long mmun_end = end; /* For mmu_notifiers */ | 3330 | unsigned long mmun_end = end; /* For mmu_notifiers */ |
3331 | 3331 | ||
3332 | WARN_ON(!is_vm_hugetlb_page(vma)); | 3332 | WARN_ON(!is_vm_hugetlb_page(vma)); |
3333 | BUG_ON(start & ~huge_page_mask(h)); | 3333 | BUG_ON(start & ~huge_page_mask(h)); |
@@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
3339 | */ | 3339 | */ |
3340 | tlb_remove_check_page_size_change(tlb, sz); | 3340 | tlb_remove_check_page_size_change(tlb, sz); |
3341 | tlb_start_vma(tlb, vma); | 3341 | tlb_start_vma(tlb, vma); |
3342 | |||
3343 | /* | ||
3344 | * If sharing possible, alert mmu notifiers of worst case. | ||
3345 | */ | ||
3346 | adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end); | ||
3342 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 3347 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
3343 | address = start; | 3348 | address = start; |
3344 | for (; address < end; address += sz) { | 3349 | for (; address < end; address += sz) { |
@@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
3349 | ptl = huge_pte_lock(h, mm, ptep); | 3354 | ptl = huge_pte_lock(h, mm, ptep); |
3350 | if (huge_pmd_unshare(mm, &address, ptep)) { | 3355 | if (huge_pmd_unshare(mm, &address, ptep)) { |
3351 | spin_unlock(ptl); | 3356 | spin_unlock(ptl); |
3357 | /* | ||
3358 | * We just unmapped a page of PMDs by clearing a PUD. | ||
3359 | * The caller's TLB flush range should cover this area. | ||
3360 | */ | ||
3352 | continue; | 3361 | continue; |
3353 | } | 3362 | } |
3354 | 3363 | ||
@@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
3431 | { | 3440 | { |
3432 | struct mm_struct *mm; | 3441 | struct mm_struct *mm; |
3433 | struct mmu_gather tlb; | 3442 | struct mmu_gather tlb; |
3443 | unsigned long tlb_start = start; | ||
3444 | unsigned long tlb_end = end; | ||
3445 | |||
3446 | /* | ||
3447 | * If shared PMDs were possibly used within this vma range, adjust | ||
3448 | * start/end for worst case tlb flushing. | ||
3449 | * Note that we can not be sure if PMDs are shared until we try to | ||
3450 | * unmap pages. However, we want to make sure TLB flushing covers | ||
3451 | * the largest possible range. | ||
3452 | */ | ||
3453 | adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end); | ||
3434 | 3454 | ||
3435 | mm = vma->vm_mm; | 3455 | mm = vma->vm_mm; |
3436 | 3456 | ||
3437 | tlb_gather_mmu(&tlb, mm, start, end); | 3457 | tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end); |
3438 | __unmap_hugepage_range(&tlb, vma, start, end, ref_page); | 3458 | __unmap_hugepage_range(&tlb, vma, start, end, ref_page); |
3439 | tlb_finish_mmu(&tlb, start, end); | 3459 | tlb_finish_mmu(&tlb, tlb_start, tlb_end); |
3440 | } | 3460 | } |
3441 | 3461 | ||
3442 | /* | 3462 | /* |
@@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
4298 | pte_t pte; | 4318 | pte_t pte; |
4299 | struct hstate *h = hstate_vma(vma); | 4319 | struct hstate *h = hstate_vma(vma); |
4300 | unsigned long pages = 0; | 4320 | unsigned long pages = 0; |
4321 | unsigned long f_start = start; | ||
4322 | unsigned long f_end = end; | ||
4323 | bool shared_pmd = false; | ||
4324 | |||
4325 | /* | ||
4326 | * In the case of shared PMDs, the area to flush could be beyond | ||
4327 | * start/end. Set f_start/f_end to cover the maximum possible | ||
4328 | * range if PMD sharing is possible. | ||
4329 | */ | ||
4330 | adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end); | ||
4301 | 4331 | ||
4302 | BUG_ON(address >= end); | 4332 | BUG_ON(address >= end); |
4303 | flush_cache_range(vma, address, end); | 4333 | flush_cache_range(vma, f_start, f_end); |
4304 | 4334 | ||
4305 | mmu_notifier_invalidate_range_start(mm, start, end); | 4335 | mmu_notifier_invalidate_range_start(mm, f_start, f_end); |
4306 | i_mmap_lock_write(vma->vm_file->f_mapping); | 4336 | i_mmap_lock_write(vma->vm_file->f_mapping); |
4307 | for (; address < end; address += huge_page_size(h)) { | 4337 | for (; address < end; address += huge_page_size(h)) { |
4308 | spinlock_t *ptl; | 4338 | spinlock_t *ptl; |
@@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
4313 | if (huge_pmd_unshare(mm, &address, ptep)) { | 4343 | if (huge_pmd_unshare(mm, &address, ptep)) { |
4314 | pages++; | 4344 | pages++; |
4315 | spin_unlock(ptl); | 4345 | spin_unlock(ptl); |
4346 | shared_pmd = true; | ||
4316 | continue; | 4347 | continue; |
4317 | } | 4348 | } |
4318 | pte = huge_ptep_get(ptep); | 4349 | pte = huge_ptep_get(ptep); |
@@ -4348,9 +4379,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
4348 | * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare | 4379 | * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare |
4349 | * may have cleared our pud entry and done put_page on the page table: | 4380 | * may have cleared our pud entry and done put_page on the page table: |
4350 | * once we release i_mmap_rwsem, another task can do the final put_page | 4381 | * once we release i_mmap_rwsem, another task can do the final put_page |
4351 | * and that page table be reused and filled with junk. | 4382 | * and that page table be reused and filled with junk. If we actually |
4383 | * did unshare a page of pmds, flush the range corresponding to the pud. | ||
4352 | */ | 4384 | */ |
4353 | flush_hugetlb_tlb_range(vma, start, end); | 4385 | if (shared_pmd) |
4386 | flush_hugetlb_tlb_range(vma, f_start, f_end); | ||
4387 | else | ||
4388 | flush_hugetlb_tlb_range(vma, start, end); | ||
4354 | /* | 4389 | /* |
4355 | * No need to call mmu_notifier_invalidate_range() we are downgrading | 4390 | * No need to call mmu_notifier_invalidate_range() we are downgrading |
4356 | * page table protection not changing it to point to a new page. | 4391 | * page table protection not changing it to point to a new page. |
@@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
4358 | * See Documentation/vm/mmu_notifier.rst | 4393 | * See Documentation/vm/mmu_notifier.rst |
4359 | */ | 4394 | */ |
4360 | i_mmap_unlock_write(vma->vm_file->f_mapping); | 4395 | i_mmap_unlock_write(vma->vm_file->f_mapping); |
4361 | mmu_notifier_invalidate_range_end(mm, start, end); | 4396 | mmu_notifier_invalidate_range_end(mm, f_start, f_end); |
4362 | 4397 | ||
4363 | return pages << h->order; | 4398 | return pages << h->order; |
4364 | } | 4399 | } |
@@ -4545,13 +4580,41 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) | |||
4545 | /* | 4580 | /* |
4546 | * check on proper vm_flags and page table alignment | 4581 | * check on proper vm_flags and page table alignment |
4547 | */ | 4582 | */ |
4548 | if (vma->vm_flags & VM_MAYSHARE && | 4583 | if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end)) |
4549 | vma->vm_start <= base && end <= vma->vm_end) | ||
4550 | return true; | 4584 | return true; |
4551 | return false; | 4585 | return false; |
4552 | } | 4586 | } |
4553 | 4587 | ||
4554 | /* | 4588 | /* |
4589 | * Determine if start,end range within vma could be mapped by shared pmd. | ||
4590 | * If yes, adjust start and end to cover range associated with possible | ||
4591 | * shared pmd mappings. | ||
4592 | */ | ||
4593 | void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, | ||
4594 | unsigned long *start, unsigned long *end) | ||
4595 | { | ||
4596 | unsigned long check_addr = *start; | ||
4597 | |||
4598 | if (!(vma->vm_flags & VM_MAYSHARE)) | ||
4599 | return; | ||
4600 | |||
4601 | for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) { | ||
4602 | unsigned long a_start = check_addr & PUD_MASK; | ||
4603 | unsigned long a_end = a_start + PUD_SIZE; | ||
4604 | |||
4605 | /* | ||
4606 | * If sharing is possible, adjust start/end if necessary. | ||
4607 | */ | ||
4608 | if (range_in_vma(vma, a_start, a_end)) { | ||
4609 | if (a_start < *start) | ||
4610 | *start = a_start; | ||
4611 | if (a_end > *end) | ||
4612 | *end = a_end; | ||
4613 | } | ||
4614 | } | ||
4615 | } | ||
4616 | |||
4617 | /* | ||
4555 | * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() | 4618 | * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() |
4556 | * and returns the corresponding pte. While this is not necessary for the | 4619 | * and returns the corresponding pte. While this is not necessary for the |
4557 | * !shared pmd case because we can allocate the pmd later as well, it makes the | 4620 | * !shared pmd case because we can allocate the pmd later as well, it makes the |
@@ -4648,6 +4711,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | |||
4648 | { | 4711 | { |
4649 | return 0; | 4712 | return 0; |
4650 | } | 4713 | } |
4714 | |||
4715 | void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, | ||
4716 | unsigned long *start, unsigned long *end) | ||
4717 | { | ||
4718 | } | ||
4651 | #define want_pmd_share() (0) | 4719 | #define want_pmd_share() (0) |
4652 | #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ | 4720 | #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ |
4653 | 4721 | ||