diff options
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r-- | fs/hugetlbfs/inode.c | 61 |
1 files changed, 33 insertions, 28 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a2fcea5f8225..32920a10100e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -383,16 +383,17 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end) | |||
383 | * truncation is indicated by end of range being LLONG_MAX | 383 | * truncation is indicated by end of range being LLONG_MAX |
384 | * In this case, we first scan the range and release found pages. | 384 | * In this case, we first scan the range and release found pages. |
385 | * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv | 385 | * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv |
386 | * maps and global counts. | 386 | * maps and global counts. Page faults can not race with truncation |
387 | * in this routine. hugetlb_no_page() prevents page faults in the | ||
388 | * truncated range. It checks i_size before allocation, and again after | ||
389 | * with the page table lock for the page held. The same lock must be | ||
390 | * acquired to unmap a page. | ||
387 | * hole punch is indicated if end is not LLONG_MAX | 391 | * hole punch is indicated if end is not LLONG_MAX |
388 | * In the hole punch case we scan the range and release found pages. | 392 | * In the hole punch case we scan the range and release found pages. |
389 | * Only when releasing a page is the associated region/reserv map | 393 | * Only when releasing a page is the associated region/reserv map |
390 | * deleted. The region/reserv map for ranges without associated | 394 | * deleted. The region/reserv map for ranges without associated |
391 | * pages are not modified. | 395 | * pages are not modified. Page faults can race with hole punch. |
392 | * | 396 | * This is indicated if we find a mapped page. |
393 | * Callers of this routine must hold the i_mmap_rwsem in write mode to prevent | ||
394 | * races with page faults. | ||
395 | * | ||
396 | * Note: If the passed end of range value is beyond the end of file, but | 397 | * Note: If the passed end of range value is beyond the end of file, but |
397 | * not LLONG_MAX this routine still performs a hole punch operation. | 398 | * not LLONG_MAX this routine still performs a hole punch operation. |
398 | */ | 399 | */ |
@@ -422,14 +423,32 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
422 | 423 | ||
423 | for (i = 0; i < pagevec_count(&pvec); ++i) { | 424 | for (i = 0; i < pagevec_count(&pvec); ++i) { |
424 | struct page *page = pvec.pages[i]; | 425 | struct page *page = pvec.pages[i]; |
426 | u32 hash; | ||
425 | 427 | ||
426 | index = page->index; | 428 | index = page->index; |
429 | hash = hugetlb_fault_mutex_hash(h, current->mm, | ||
430 | &pseudo_vma, | ||
431 | mapping, index, 0); | ||
432 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | ||
433 | |||
427 | /* | 434 | /* |
428 | * A mapped page is impossible as callers should unmap | 435 | * If page is mapped, it was faulted in after being |
429 | * all references before calling. And, i_mmap_rwsem | 436 | * unmapped in caller. Unmap (again) now after taking |
430 | * prevents the creation of additional mappings. | 437 | * the fault mutex. The mutex will prevent faults |
438 | * until we finish removing the page. | ||
439 | * | ||
440 | * This race can only happen in the hole punch case. | ||
441 | * Getting here in a truncate operation is a bug. | ||
431 | */ | 442 | */ |
432 | VM_BUG_ON(page_mapped(page)); | 443 | if (unlikely(page_mapped(page))) { |
444 | BUG_ON(truncate_op); | ||
445 | |||
446 | i_mmap_lock_write(mapping); | ||
447 | hugetlb_vmdelete_list(&mapping->i_mmap, | ||
448 | index * pages_per_huge_page(h), | ||
449 | (index + 1) * pages_per_huge_page(h)); | ||
450 | i_mmap_unlock_write(mapping); | ||
451 | } | ||
433 | 452 | ||
434 | lock_page(page); | 453 | lock_page(page); |
435 | /* | 454 | /* |
@@ -451,6 +470,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
451 | } | 470 | } |
452 | 471 | ||
453 | unlock_page(page); | 472 | unlock_page(page); |
473 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | ||
454 | } | 474 | } |
455 | huge_pagevec_release(&pvec); | 475 | huge_pagevec_release(&pvec); |
456 | cond_resched(); | 476 | cond_resched(); |
@@ -462,20 +482,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
462 | 482 | ||
463 | static void hugetlbfs_evict_inode(struct inode *inode) | 483 | static void hugetlbfs_evict_inode(struct inode *inode) |
464 | { | 484 | { |
465 | struct address_space *mapping = inode->i_mapping; | ||
466 | struct resv_map *resv_map; | 485 | struct resv_map *resv_map; |
467 | 486 | ||
468 | /* | ||
469 | * The vfs layer guarantees that there are no other users of this | ||
470 | * inode. Therefore, it would be safe to call remove_inode_hugepages | ||
471 | * without holding i_mmap_rwsem. We acquire and hold here to be | ||
472 | * consistent with other callers. Since there will be no contention | ||
473 | * on the semaphore, overhead is negligible. | ||
474 | */ | ||
475 | i_mmap_lock_write(mapping); | ||
476 | remove_inode_hugepages(inode, 0, LLONG_MAX); | 487 | remove_inode_hugepages(inode, 0, LLONG_MAX); |
477 | i_mmap_unlock_write(mapping); | ||
478 | |||
479 | resv_map = (struct resv_map *)inode->i_mapping->private_data; | 488 | resv_map = (struct resv_map *)inode->i_mapping->private_data; |
480 | /* root inode doesn't have the resv_map, so we should check it */ | 489 | /* root inode doesn't have the resv_map, so we should check it */ |
481 | if (resv_map) | 490 | if (resv_map) |
@@ -496,8 +505,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | |||
496 | i_mmap_lock_write(mapping); | 505 | i_mmap_lock_write(mapping); |
497 | if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) | 506 | if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) |
498 | hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); | 507 | hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); |
499 | remove_inode_hugepages(inode, offset, LLONG_MAX); | ||
500 | i_mmap_unlock_write(mapping); | 508 | i_mmap_unlock_write(mapping); |
509 | remove_inode_hugepages(inode, offset, LLONG_MAX); | ||
501 | return 0; | 510 | return 0; |
502 | } | 511 | } |
503 | 512 | ||
@@ -531,8 +540,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
531 | hugetlb_vmdelete_list(&mapping->i_mmap, | 540 | hugetlb_vmdelete_list(&mapping->i_mmap, |
532 | hole_start >> PAGE_SHIFT, | 541 | hole_start >> PAGE_SHIFT, |
533 | hole_end >> PAGE_SHIFT); | 542 | hole_end >> PAGE_SHIFT); |
534 | remove_inode_hugepages(inode, hole_start, hole_end); | ||
535 | i_mmap_unlock_write(mapping); | 543 | i_mmap_unlock_write(mapping); |
544 | remove_inode_hugepages(inode, hole_start, hole_end); | ||
536 | inode_unlock(inode); | 545 | inode_unlock(inode); |
537 | } | 546 | } |
538 | 547 | ||
@@ -615,11 +624,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, | |||
615 | /* addr is the offset within the file (zero based) */ | 624 | /* addr is the offset within the file (zero based) */ |
616 | addr = index * hpage_size; | 625 | addr = index * hpage_size; |
617 | 626 | ||
618 | /* | 627 | /* mutex taken here, fault path and hole punch */ |
619 | * fault mutex taken here, protects against fault path | ||
620 | * and hole punch. inode_lock previously taken protects | ||
621 | * against truncation. | ||
622 | */ | ||
623 | hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, | 628 | hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, |
624 | index, addr); | 629 | index, addr); |
625 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | 630 | mutex_lock(&hugetlb_fault_mutex_table[hash]); |