summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/hugetlbfs/inode.c61
-rw-r--r--mm/hugetlb.c21
2 files changed, 38 insertions, 44 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 32920a10100e..a2fcea5f8225 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -383,17 +383,16 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
383 * truncation is indicated by end of range being LLONG_MAX 383 * truncation is indicated by end of range being LLONG_MAX
384 * In this case, we first scan the range and release found pages. 384 * In this case, we first scan the range and release found pages.
385 * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv 385 * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
386 * maps and global counts. Page faults can not race with truncation 386 * maps and global counts.
387 * in this routine. hugetlb_no_page() prevents page faults in the
388 * truncated range. It checks i_size before allocation, and again after
389 * with the page table lock for the page held. The same lock must be
390 * acquired to unmap a page.
391 * hole punch is indicated if end is not LLONG_MAX 387 * hole punch is indicated if end is not LLONG_MAX
392 * In the hole punch case we scan the range and release found pages. 388 * In the hole punch case we scan the range and release found pages.
393 * Only when releasing a page is the associated region/reserv map 389 * Only when releasing a page is the associated region/reserv map
394 * deleted. The region/reserv map for ranges without associated 390 * deleted. The region/reserv map for ranges without associated
395 * pages are not modified. Page faults can race with hole punch. 391 * pages are not modified.
396 * This is indicated if we find a mapped page. 392 *
393 * Callers of this routine must hold the i_mmap_rwsem in write mode to prevent
394 * races with page faults.
395 *
397 * Note: If the passed end of range value is beyond the end of file, but 396 * Note: If the passed end of range value is beyond the end of file, but
398 * not LLONG_MAX this routine still performs a hole punch operation. 397 * not LLONG_MAX this routine still performs a hole punch operation.
399 */ 398 */
@@ -423,32 +422,14 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
423 422
424 for (i = 0; i < pagevec_count(&pvec); ++i) { 423 for (i = 0; i < pagevec_count(&pvec); ++i) {
425 struct page *page = pvec.pages[i]; 424 struct page *page = pvec.pages[i];
426 u32 hash;
427 425
428 index = page->index; 426 index = page->index;
429 hash = hugetlb_fault_mutex_hash(h, current->mm,
430 &pseudo_vma,
431 mapping, index, 0);
432 mutex_lock(&hugetlb_fault_mutex_table[hash]);
433
434 /* 427 /*
435 * If page is mapped, it was faulted in after being 428 * A mapped page is impossible as callers should unmap
436 * unmapped in caller. Unmap (again) now after taking 429 * all references before calling. And, i_mmap_rwsem
437 * the fault mutex. The mutex will prevent faults 430 * prevents the creation of additional mappings.
438 * until we finish removing the page.
439 *
440 * This race can only happen in the hole punch case.
441 * Getting here in a truncate operation is a bug.
442 */ 431 */
443 if (unlikely(page_mapped(page))) { 432 VM_BUG_ON(page_mapped(page));
444 BUG_ON(truncate_op);
445
446 i_mmap_lock_write(mapping);
447 hugetlb_vmdelete_list(&mapping->i_mmap,
448 index * pages_per_huge_page(h),
449 (index + 1) * pages_per_huge_page(h));
450 i_mmap_unlock_write(mapping);
451 }
452 433
453 lock_page(page); 434 lock_page(page);
454 /* 435 /*
@@ -470,7 +451,6 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
470 } 451 }
471 452
472 unlock_page(page); 453 unlock_page(page);
473 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
474 } 454 }
475 huge_pagevec_release(&pvec); 455 huge_pagevec_release(&pvec);
476 cond_resched(); 456 cond_resched();
@@ -482,9 +462,20 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
482 462
483static void hugetlbfs_evict_inode(struct inode *inode) 463static void hugetlbfs_evict_inode(struct inode *inode)
484{ 464{
465 struct address_space *mapping = inode->i_mapping;
485 struct resv_map *resv_map; 466 struct resv_map *resv_map;
486 467
468 /*
469 * The vfs layer guarantees that there are no other users of this
470 * inode. Therefore, it would be safe to call remove_inode_hugepages
471 * without holding i_mmap_rwsem. We acquire and hold here to be
472 * consistent with other callers. Since there will be no contention
473 * on the semaphore, overhead is negligible.
474 */
475 i_mmap_lock_write(mapping);
487 remove_inode_hugepages(inode, 0, LLONG_MAX); 476 remove_inode_hugepages(inode, 0, LLONG_MAX);
477 i_mmap_unlock_write(mapping);
478
488 resv_map = (struct resv_map *)inode->i_mapping->private_data; 479 resv_map = (struct resv_map *)inode->i_mapping->private_data;
489 /* root inode doesn't have the resv_map, so we should check it */ 480 /* root inode doesn't have the resv_map, so we should check it */
490 if (resv_map) 481 if (resv_map)
@@ -505,8 +496,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
505 i_mmap_lock_write(mapping); 496 i_mmap_lock_write(mapping);
506 if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) 497 if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
507 hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); 498 hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
508 i_mmap_unlock_write(mapping);
509 remove_inode_hugepages(inode, offset, LLONG_MAX); 499 remove_inode_hugepages(inode, offset, LLONG_MAX);
500 i_mmap_unlock_write(mapping);
510 return 0; 501 return 0;
511} 502}
512 503
@@ -540,8 +531,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
540 hugetlb_vmdelete_list(&mapping->i_mmap, 531 hugetlb_vmdelete_list(&mapping->i_mmap,
541 hole_start >> PAGE_SHIFT, 532 hole_start >> PAGE_SHIFT,
542 hole_end >> PAGE_SHIFT); 533 hole_end >> PAGE_SHIFT);
543 i_mmap_unlock_write(mapping);
544 remove_inode_hugepages(inode, hole_start, hole_end); 534 remove_inode_hugepages(inode, hole_start, hole_end);
535 i_mmap_unlock_write(mapping);
545 inode_unlock(inode); 536 inode_unlock(inode);
546 } 537 }
547 538
@@ -624,7 +615,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
624 /* addr is the offset within the file (zero based) */ 615 /* addr is the offset within the file (zero based) */
625 addr = index * hpage_size; 616 addr = index * hpage_size;
626 617
627 /* mutex taken here, fault path and hole punch */ 618 /*
619 * fault mutex taken here, protects against fault path
620 * and hole punch. inode_lock previously taken protects
621 * against truncation.
622 */
628 hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, 623 hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
629 index, addr); 624 index, addr);
630 mutex_lock(&hugetlb_fault_mutex_table[hash]); 625 mutex_lock(&hugetlb_fault_mutex_table[hash]);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 87fd3ab809c6..e37efd5d8318 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3755,16 +3755,16 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
3755 } 3755 }
3756 3756
3757 /* 3757 /*
3758 * Use page lock to guard against racing truncation 3758 * We can not race with truncation due to holding i_mmap_rwsem.
3759 * before we get page_table_lock. 3759 * Check once here for faults beyond end of file.
3760 */ 3760 */
3761 size = i_size_read(mapping->host) >> huge_page_shift(h);
3762 if (idx >= size)
3763 goto out;
3764
3761retry: 3765retry:
3762 page = find_lock_page(mapping, idx); 3766 page = find_lock_page(mapping, idx);
3763 if (!page) { 3767 if (!page) {
3764 size = i_size_read(mapping->host) >> huge_page_shift(h);
3765 if (idx >= size)
3766 goto out;
3767
3768 /* 3768 /*
3769 * Check for page in userfault range 3769 * Check for page in userfault range
3770 */ 3770 */
@@ -3854,9 +3854,6 @@ retry:
3854 } 3854 }
3855 3855
3856 ptl = huge_pte_lock(h, mm, ptep); 3856 ptl = huge_pte_lock(h, mm, ptep);
3857 size = i_size_read(mapping->host) >> huge_page_shift(h);
3858 if (idx >= size)
3859 goto backout;
3860 3857
3861 ret = 0; 3858 ret = 0;
3862 if (!huge_pte_none(huge_ptep_get(ptep))) 3859 if (!huge_pte_none(huge_ptep_get(ptep)))
@@ -3959,8 +3956,10 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3959 3956
3960 /* 3957 /*
3961 * Acquire i_mmap_rwsem before calling huge_pte_alloc and hold 3958 * Acquire i_mmap_rwsem before calling huge_pte_alloc and hold
3962 * until finished with ptep. This prevents huge_pmd_unshare from 3959 * until finished with ptep. This serves two purposes:
3963 * being called elsewhere and making the ptep no longer valid. 3960 * 1) It prevents huge_pmd_unshare from being called elsewhere
3961 * and making the ptep no longer valid.
3962 * 2) It synchronizes us with file truncation.
3964 * 3963 *
3965 * ptep could have already be assigned via huge_pte_offset. That 3964 * ptep could have already be assigned via huge_pte_offset. That
3966 * is OK, as huge_pte_alloc will return the same value unless 3965 * is OK, as huge_pte_alloc will return the same value unless