diff options
Diffstat (limited to 'fs/hugetlbfs/inode.c')
| -rw-r--r-- | fs/hugetlbfs/inode.c | 61 |
1 files changed, 33 insertions, 28 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a2fcea5f8225..32920a10100e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -383,16 +383,17 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end) | |||
| 383 | * truncation is indicated by end of range being LLONG_MAX | 383 | * truncation is indicated by end of range being LLONG_MAX |
| 384 | * In this case, we first scan the range and release found pages. | 384 | * In this case, we first scan the range and release found pages. |
| 385 | * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv | 385 | * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv |
| 386 | * maps and global counts. | 386 | * maps and global counts. Page faults can not race with truncation |
| 387 | * in this routine. hugetlb_no_page() prevents page faults in the | ||
| 388 | * truncated range. It checks i_size before allocation, and again after | ||
| 389 | * with the page table lock for the page held. The same lock must be | ||
| 390 | * acquired to unmap a page. | ||
| 387 | * hole punch is indicated if end is not LLONG_MAX | 391 | * hole punch is indicated if end is not LLONG_MAX |
| 388 | * In the hole punch case we scan the range and release found pages. | 392 | * In the hole punch case we scan the range and release found pages. |
| 389 | * Only when releasing a page is the associated region/reserv map | 393 | * Only when releasing a page is the associated region/reserv map |
| 390 | * deleted. The region/reserv map for ranges without associated | 394 | * deleted. The region/reserv map for ranges without associated |
| 391 | * pages are not modified. | 395 | * pages are not modified. Page faults can race with hole punch. |
| 392 | * | 396 | * This is indicated if we find a mapped page. |
| 393 | * Callers of this routine must hold the i_mmap_rwsem in write mode to prevent | ||
| 394 | * races with page faults. | ||
| 395 | * | ||
| 396 | * Note: If the passed end of range value is beyond the end of file, but | 397 | * Note: If the passed end of range value is beyond the end of file, but |
| 397 | * not LLONG_MAX this routine still performs a hole punch operation. | 398 | * not LLONG_MAX this routine still performs a hole punch operation. |
| 398 | */ | 399 | */ |
| @@ -422,14 +423,32 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
| 422 | 423 | ||
| 423 | for (i = 0; i < pagevec_count(&pvec); ++i) { | 424 | for (i = 0; i < pagevec_count(&pvec); ++i) { |
| 424 | struct page *page = pvec.pages[i]; | 425 | struct page *page = pvec.pages[i]; |
| 426 | u32 hash; | ||
| 425 | 427 | ||
| 426 | index = page->index; | 428 | index = page->index; |
| 429 | hash = hugetlb_fault_mutex_hash(h, current->mm, | ||
| 430 | &pseudo_vma, | ||
| 431 | mapping, index, 0); | ||
| 432 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | ||
| 433 | |||
| 427 | /* | 434 | /* |
| 428 | * A mapped page is impossible as callers should unmap | 435 | * If page is mapped, it was faulted in after being |
| 429 | * all references before calling. And, i_mmap_rwsem | 436 | * unmapped in caller. Unmap (again) now after taking |
| 430 | * prevents the creation of additional mappings. | 437 | * the fault mutex. The mutex will prevent faults |
| 438 | * until we finish removing the page. | ||
| 439 | * | ||
| 440 | * This race can only happen in the hole punch case. | ||
| 441 | * Getting here in a truncate operation is a bug. | ||
| 431 | */ | 442 | */ |
| 432 | VM_BUG_ON(page_mapped(page)); | 443 | if (unlikely(page_mapped(page))) { |
| 444 | BUG_ON(truncate_op); | ||
| 445 | |||
| 446 | i_mmap_lock_write(mapping); | ||
| 447 | hugetlb_vmdelete_list(&mapping->i_mmap, | ||
| 448 | index * pages_per_huge_page(h), | ||
| 449 | (index + 1) * pages_per_huge_page(h)); | ||
| 450 | i_mmap_unlock_write(mapping); | ||
| 451 | } | ||
| 433 | 452 | ||
| 434 | lock_page(page); | 453 | lock_page(page); |
| 435 | /* | 454 | /* |
| @@ -451,6 +470,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
| 451 | } | 470 | } |
| 452 | 471 | ||
| 453 | unlock_page(page); | 472 | unlock_page(page); |
| 473 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | ||
| 454 | } | 474 | } |
| 455 | huge_pagevec_release(&pvec); | 475 | huge_pagevec_release(&pvec); |
| 456 | cond_resched(); | 476 | cond_resched(); |
| @@ -462,20 +482,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
| 462 | 482 | ||
| 463 | static void hugetlbfs_evict_inode(struct inode *inode) | 483 | static void hugetlbfs_evict_inode(struct inode *inode) |
| 464 | { | 484 | { |
| 465 | struct address_space *mapping = inode->i_mapping; | ||
| 466 | struct resv_map *resv_map; | 485 | struct resv_map *resv_map; |
| 467 | 486 | ||
| 468 | /* | ||
| 469 | * The vfs layer guarantees that there are no other users of this | ||
| 470 | * inode. Therefore, it would be safe to call remove_inode_hugepages | ||
| 471 | * without holding i_mmap_rwsem. We acquire and hold here to be | ||
| 472 | * consistent with other callers. Since there will be no contention | ||
| 473 | * on the semaphore, overhead is negligible. | ||
| 474 | */ | ||
| 475 | i_mmap_lock_write(mapping); | ||
| 476 | remove_inode_hugepages(inode, 0, LLONG_MAX); | 487 | remove_inode_hugepages(inode, 0, LLONG_MAX); |
| 477 | i_mmap_unlock_write(mapping); | ||
| 478 | |||
| 479 | resv_map = (struct resv_map *)inode->i_mapping->private_data; | 488 | resv_map = (struct resv_map *)inode->i_mapping->private_data; |
| 480 | /* root inode doesn't have the resv_map, so we should check it */ | 489 | /* root inode doesn't have the resv_map, so we should check it */ |
| 481 | if (resv_map) | 490 | if (resv_map) |
| @@ -496,8 +505,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | |||
| 496 | i_mmap_lock_write(mapping); | 505 | i_mmap_lock_write(mapping); |
| 497 | if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) | 506 | if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) |
| 498 | hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); | 507 | hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); |
| 499 | remove_inode_hugepages(inode, offset, LLONG_MAX); | ||
| 500 | i_mmap_unlock_write(mapping); | 508 | i_mmap_unlock_write(mapping); |
| 509 | remove_inode_hugepages(inode, offset, LLONG_MAX); | ||
| 501 | return 0; | 510 | return 0; |
| 502 | } | 511 | } |
| 503 | 512 | ||
| @@ -531,8 +540,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 531 | hugetlb_vmdelete_list(&mapping->i_mmap, | 540 | hugetlb_vmdelete_list(&mapping->i_mmap, |
| 532 | hole_start >> PAGE_SHIFT, | 541 | hole_start >> PAGE_SHIFT, |
| 533 | hole_end >> PAGE_SHIFT); | 542 | hole_end >> PAGE_SHIFT); |
| 534 | remove_inode_hugepages(inode, hole_start, hole_end); | ||
| 535 | i_mmap_unlock_write(mapping); | 543 | i_mmap_unlock_write(mapping); |
| 544 | remove_inode_hugepages(inode, hole_start, hole_end); | ||
| 536 | inode_unlock(inode); | 545 | inode_unlock(inode); |
| 537 | } | 546 | } |
| 538 | 547 | ||
| @@ -615,11 +624,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, | |||
| 615 | /* addr is the offset within the file (zero based) */ | 624 | /* addr is the offset within the file (zero based) */ |
| 616 | addr = index * hpage_size; | 625 | addr = index * hpage_size; |
| 617 | 626 | ||
| 618 | /* | 627 | /* mutex taken here, fault path and hole punch */ |
| 619 | * fault mutex taken here, protects against fault path | ||
| 620 | * and hole punch. inode_lock previously taken protects | ||
| 621 | * against truncation. | ||
| 622 | */ | ||
| 623 | hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, | 628 | hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, |
| 624 | index, addr); | 629 | index, addr); |
| 625 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | 630 | mutex_lock(&hugetlb_fault_mutex_table[hash]); |
