diff options
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r-- | fs/hugetlbfs/inode.c | 61 |
1 files changed, 28 insertions, 33 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 32920a10100e..a2fcea5f8225 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -383,17 +383,16 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end) | |||
383 | * truncation is indicated by end of range being LLONG_MAX | 383 | * truncation is indicated by end of range being LLONG_MAX |
384 | * In this case, we first scan the range and release found pages. | 384 | * In this case, we first scan the range and release found pages. |
385 | * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv | 385 | * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv |
386 | * maps and global counts. Page faults can not race with truncation | 386 | * maps and global counts. |
387 | * in this routine. hugetlb_no_page() prevents page faults in the | ||
388 | * truncated range. It checks i_size before allocation, and again after | ||
389 | * with the page table lock for the page held. The same lock must be | ||
390 | * acquired to unmap a page. | ||
391 | * hole punch is indicated if end is not LLONG_MAX | 387 | * hole punch is indicated if end is not LLONG_MAX |
392 | * In the hole punch case we scan the range and release found pages. | 388 | * In the hole punch case we scan the range and release found pages. |
393 | * Only when releasing a page is the associated region/reserv map | 389 | * Only when releasing a page is the associated region/reserv map |
394 | * deleted. The region/reserv map for ranges without associated | 390 | * deleted. The region/reserv map for ranges without associated |
395 | * pages are not modified. Page faults can race with hole punch. | 391 | * pages are not modified. |
396 | * This is indicated if we find a mapped page. | 392 | * |
393 | * Callers of this routine must hold the i_mmap_rwsem in write mode to prevent | ||
394 | * races with page faults. | ||
395 | * | ||
397 | * Note: If the passed end of range value is beyond the end of file, but | 396 | * Note: If the passed end of range value is beyond the end of file, but |
398 | * not LLONG_MAX this routine still performs a hole punch operation. | 397 | * not LLONG_MAX this routine still performs a hole punch operation. |
399 | */ | 398 | */ |
@@ -423,32 +422,14 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
423 | 422 | ||
424 | for (i = 0; i < pagevec_count(&pvec); ++i) { | 423 | for (i = 0; i < pagevec_count(&pvec); ++i) { |
425 | struct page *page = pvec.pages[i]; | 424 | struct page *page = pvec.pages[i]; |
426 | u32 hash; | ||
427 | 425 | ||
428 | index = page->index; | 426 | index = page->index; |
429 | hash = hugetlb_fault_mutex_hash(h, current->mm, | ||
430 | &pseudo_vma, | ||
431 | mapping, index, 0); | ||
432 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | ||
433 | |||
434 | /* | 427 | /* |
435 | * If page is mapped, it was faulted in after being | 428 | * A mapped page is impossible as callers should unmap |
436 | * unmapped in caller. Unmap (again) now after taking | 429 | * all references before calling. And, i_mmap_rwsem |
437 | * the fault mutex. The mutex will prevent faults | 430 | * prevents the creation of additional mappings. |
438 | * until we finish removing the page. | ||
439 | * | ||
440 | * This race can only happen in the hole punch case. | ||
441 | * Getting here in a truncate operation is a bug. | ||
442 | */ | 431 | */ |
443 | if (unlikely(page_mapped(page))) { | 432 | VM_BUG_ON(page_mapped(page)); |
444 | BUG_ON(truncate_op); | ||
445 | |||
446 | i_mmap_lock_write(mapping); | ||
447 | hugetlb_vmdelete_list(&mapping->i_mmap, | ||
448 | index * pages_per_huge_page(h), | ||
449 | (index + 1) * pages_per_huge_page(h)); | ||
450 | i_mmap_unlock_write(mapping); | ||
451 | } | ||
452 | 433 | ||
453 | lock_page(page); | 434 | lock_page(page); |
454 | /* | 435 | /* |
@@ -470,7 +451,6 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
470 | } | 451 | } |
471 | 452 | ||
472 | unlock_page(page); | 453 | unlock_page(page); |
473 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | ||
474 | } | 454 | } |
475 | huge_pagevec_release(&pvec); | 455 | huge_pagevec_release(&pvec); |
476 | cond_resched(); | 456 | cond_resched(); |
@@ -482,9 +462,20 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
482 | 462 | ||
483 | static void hugetlbfs_evict_inode(struct inode *inode) | 463 | static void hugetlbfs_evict_inode(struct inode *inode) |
484 | { | 464 | { |
465 | struct address_space *mapping = inode->i_mapping; | ||
485 | struct resv_map *resv_map; | 466 | struct resv_map *resv_map; |
486 | 467 | ||
468 | /* | ||
469 | * The vfs layer guarantees that there are no other users of this | ||
470 | * inode. Therefore, it would be safe to call remove_inode_hugepages | ||
471 | * without holding i_mmap_rwsem. We acquire and hold here to be | ||
472 | * consistent with other callers. Since there will be no contention | ||
473 | * on the semaphore, overhead is negligible. | ||
474 | */ | ||
475 | i_mmap_lock_write(mapping); | ||
487 | remove_inode_hugepages(inode, 0, LLONG_MAX); | 476 | remove_inode_hugepages(inode, 0, LLONG_MAX); |
477 | i_mmap_unlock_write(mapping); | ||
478 | |||
488 | resv_map = (struct resv_map *)inode->i_mapping->private_data; | 479 | resv_map = (struct resv_map *)inode->i_mapping->private_data; |
489 | /* root inode doesn't have the resv_map, so we should check it */ | 480 | /* root inode doesn't have the resv_map, so we should check it */ |
490 | if (resv_map) | 481 | if (resv_map) |
@@ -505,8 +496,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | |||
505 | i_mmap_lock_write(mapping); | 496 | i_mmap_lock_write(mapping); |
506 | if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) | 497 | if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) |
507 | hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); | 498 | hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); |
508 | i_mmap_unlock_write(mapping); | ||
509 | remove_inode_hugepages(inode, offset, LLONG_MAX); | 499 | remove_inode_hugepages(inode, offset, LLONG_MAX); |
500 | i_mmap_unlock_write(mapping); | ||
510 | return 0; | 501 | return 0; |
511 | } | 502 | } |
512 | 503 | ||
@@ -540,8 +531,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
540 | hugetlb_vmdelete_list(&mapping->i_mmap, | 531 | hugetlb_vmdelete_list(&mapping->i_mmap, |
541 | hole_start >> PAGE_SHIFT, | 532 | hole_start >> PAGE_SHIFT, |
542 | hole_end >> PAGE_SHIFT); | 533 | hole_end >> PAGE_SHIFT); |
543 | i_mmap_unlock_write(mapping); | ||
544 | remove_inode_hugepages(inode, hole_start, hole_end); | 534 | remove_inode_hugepages(inode, hole_start, hole_end); |
535 | i_mmap_unlock_write(mapping); | ||
545 | inode_unlock(inode); | 536 | inode_unlock(inode); |
546 | } | 537 | } |
547 | 538 | ||
@@ -624,7 +615,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, | |||
624 | /* addr is the offset within the file (zero based) */ | 615 | /* addr is the offset within the file (zero based) */ |
625 | addr = index * hpage_size; | 616 | addr = index * hpage_size; |
626 | 617 | ||
627 | /* mutex taken here, fault path and hole punch */ | 618 | /* |
619 | * fault mutex taken here, protects against fault path | ||
620 | * and hole punch. inode_lock previously taken protects | ||
621 | * against truncation. | ||
622 | */ | ||
628 | hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, | 623 | hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, |
629 | index, addr); | 624 | index, addr); |
630 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | 625 | mutex_lock(&hugetlb_fault_mutex_table[hash]); |