aboutsummaryrefslogtreecommitdiffstats
path: root/fs/hugetlbfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r--fs/hugetlbfs/inode.c61
1 files changed, 33 insertions, 28 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a2fcea5f8225..32920a10100e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -383,16 +383,17 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
383 * truncation is indicated by end of range being LLONG_MAX 383 * truncation is indicated by end of range being LLONG_MAX
384 * In this case, we first scan the range and release found pages. 384 * In this case, we first scan the range and release found pages.
385 * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv 385 * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
386 * maps and global counts. 386 * maps and global counts. Page faults can not race with truncation
387 * in this routine. hugetlb_no_page() prevents page faults in the
388 * truncated range. It checks i_size before allocation, and again after
389 * with the page table lock for the page held. The same lock must be
390 * acquired to unmap a page.
387 * hole punch is indicated if end is not LLONG_MAX 391 * hole punch is indicated if end is not LLONG_MAX
388 * In the hole punch case we scan the range and release found pages. 392 * In the hole punch case we scan the range and release found pages.
389 * Only when releasing a page is the associated region/reserv map 393 * Only when releasing a page is the associated region/reserv map
390 * deleted. The region/reserv map for ranges without associated 394 * deleted. The region/reserv map for ranges without associated
391 * pages are not modified. 395 * pages are not modified. Page faults can race with hole punch.
392 * 396 * This is indicated if we find a mapped page.
393 * Callers of this routine must hold the i_mmap_rwsem in write mode to prevent
394 * races with page faults.
395 *
396 * Note: If the passed end of range value is beyond the end of file, but 397 * Note: If the passed end of range value is beyond the end of file, but
397 * not LLONG_MAX this routine still performs a hole punch operation. 398 * not LLONG_MAX this routine still performs a hole punch operation.
398 */ 399 */
@@ -422,14 +423,32 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
422 423
423 for (i = 0; i < pagevec_count(&pvec); ++i) { 424 for (i = 0; i < pagevec_count(&pvec); ++i) {
424 struct page *page = pvec.pages[i]; 425 struct page *page = pvec.pages[i];
426 u32 hash;
425 427
426 index = page->index; 428 index = page->index;
429 hash = hugetlb_fault_mutex_hash(h, current->mm,
430 &pseudo_vma,
431 mapping, index, 0);
432 mutex_lock(&hugetlb_fault_mutex_table[hash]);
433
427 /* 434 /*
428 * A mapped page is impossible as callers should unmap 435 * If page is mapped, it was faulted in after being
429 * all references before calling. And, i_mmap_rwsem 436 * unmapped in caller. Unmap (again) now after taking
430 * prevents the creation of additional mappings. 437 * the fault mutex. The mutex will prevent faults
438 * until we finish removing the page.
439 *
440 * This race can only happen in the hole punch case.
441 * Getting here in a truncate operation is a bug.
431 */ 442 */
432 VM_BUG_ON(page_mapped(page)); 443 if (unlikely(page_mapped(page))) {
444 BUG_ON(truncate_op);
445
446 i_mmap_lock_write(mapping);
447 hugetlb_vmdelete_list(&mapping->i_mmap,
448 index * pages_per_huge_page(h),
449 (index + 1) * pages_per_huge_page(h));
450 i_mmap_unlock_write(mapping);
451 }
433 452
434 lock_page(page); 453 lock_page(page);
435 /* 454 /*
@@ -451,6 +470,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
451 } 470 }
452 471
453 unlock_page(page); 472 unlock_page(page);
473 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
454 } 474 }
455 huge_pagevec_release(&pvec); 475 huge_pagevec_release(&pvec);
456 cond_resched(); 476 cond_resched();
@@ -462,20 +482,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
462 482
463static void hugetlbfs_evict_inode(struct inode *inode) 483static void hugetlbfs_evict_inode(struct inode *inode)
464{ 484{
465 struct address_space *mapping = inode->i_mapping;
466 struct resv_map *resv_map; 485 struct resv_map *resv_map;
467 486
468 /*
469 * The vfs layer guarantees that there are no other users of this
470 * inode. Therefore, it would be safe to call remove_inode_hugepages
471 * without holding i_mmap_rwsem. We acquire and hold here to be
472 * consistent with other callers. Since there will be no contention
473 * on the semaphore, overhead is negligible.
474 */
475 i_mmap_lock_write(mapping);
476 remove_inode_hugepages(inode, 0, LLONG_MAX); 487 remove_inode_hugepages(inode, 0, LLONG_MAX);
477 i_mmap_unlock_write(mapping);
478
479 resv_map = (struct resv_map *)inode->i_mapping->private_data; 488 resv_map = (struct resv_map *)inode->i_mapping->private_data;
480 /* root inode doesn't have the resv_map, so we should check it */ 489 /* root inode doesn't have the resv_map, so we should check it */
481 if (resv_map) 490 if (resv_map)
@@ -496,8 +505,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
496 i_mmap_lock_write(mapping); 505 i_mmap_lock_write(mapping);
497 if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) 506 if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
498 hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); 507 hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
499 remove_inode_hugepages(inode, offset, LLONG_MAX);
500 i_mmap_unlock_write(mapping); 508 i_mmap_unlock_write(mapping);
509 remove_inode_hugepages(inode, offset, LLONG_MAX);
501 return 0; 510 return 0;
502} 511}
503 512
@@ -531,8 +540,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
531 hugetlb_vmdelete_list(&mapping->i_mmap, 540 hugetlb_vmdelete_list(&mapping->i_mmap,
532 hole_start >> PAGE_SHIFT, 541 hole_start >> PAGE_SHIFT,
533 hole_end >> PAGE_SHIFT); 542 hole_end >> PAGE_SHIFT);
534 remove_inode_hugepages(inode, hole_start, hole_end);
535 i_mmap_unlock_write(mapping); 543 i_mmap_unlock_write(mapping);
544 remove_inode_hugepages(inode, hole_start, hole_end);
536 inode_unlock(inode); 545 inode_unlock(inode);
537 } 546 }
538 547
@@ -615,11 +624,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
615 /* addr is the offset within the file (zero based) */ 624 /* addr is the offset within the file (zero based) */
616 addr = index * hpage_size; 625 addr = index * hpage_size;
617 626
618 /* 627 /* mutex taken here, fault path and hole punch */
619 * fault mutex taken here, protects against fault path
620 * and hole punch. inode_lock previously taken protects
621 * against truncation.
622 */
623 hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, 628 hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
624 index, addr); 629 index, addr);
625 mutex_lock(&hugetlb_fault_mutex_table[hash]); 630 mutex_lock(&hugetlb_fault_mutex_table[hash]);