diff options
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r-- | fs/hugetlbfs/inode.c | 65 |
1 files changed, 32 insertions, 33 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 316adb968b65..de4bdfac0cec 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -332,12 +332,17 @@ static void remove_huge_page(struct page *page) | |||
332 | * truncation is indicated by end of range being LLONG_MAX | 332 | * truncation is indicated by end of range being LLONG_MAX |
333 | * In this case, we first scan the range and release found pages. | 333 | * In this case, we first scan the range and release found pages. |
334 | * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv | 334 | * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv |
335 | * maps and global counts. | 335 | * maps and global counts. Page faults can not race with truncation |
336 | * in this routine. hugetlb_no_page() prevents page faults in the | ||
337 | * truncated range. It checks i_size before allocation, and again after | ||
338 | * with the page table lock for the page held. The same lock must be | ||
339 | * acquired to unmap a page. | ||
336 | * hole punch is indicated if end is not LLONG_MAX | 340 | * hole punch is indicated if end is not LLONG_MAX |
337 | * In the hole punch case we scan the range and release found pages. | 341 | * In the hole punch case we scan the range and release found pages. |
338 | * Only when releasing a page is the associated region/reserv map | 342 | * Only when releasing a page is the associated region/reserv map |
339 | * deleted. The region/reserv map for ranges without associated | 343 | * deleted. The region/reserv map for ranges without associated |
340 | * pages are not modified. | 344 | * pages are not modified. Page faults can race with hole punch. |
345 | * This is indicated if we find a mapped page. | ||
341 | * Note: If the passed end of range value is beyond the end of file, but | 346 | * Note: If the passed end of range value is beyond the end of file, but |
342 | * not LLONG_MAX this routine still performs a hole punch operation. | 347 | * not LLONG_MAX this routine still performs a hole punch operation. |
343 | */ | 348 | */ |
@@ -361,46 +366,37 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
361 | next = start; | 366 | next = start; |
362 | while (next < end) { | 367 | while (next < end) { |
363 | /* | 368 | /* |
364 | * Make sure to never grab more pages that we | 369 | * Don't grab more pages than the number left in the range. |
365 | * might possibly need. | ||
366 | */ | 370 | */ |
367 | if (end - next < lookup_nr) | 371 | if (end - next < lookup_nr) |
368 | lookup_nr = end - next; | 372 | lookup_nr = end - next; |
369 | 373 | ||
370 | /* | 374 | /* |
371 | * This pagevec_lookup() may return pages past 'end', | 375 | * When no more pages are found, we are done. |
372 | * so we must check for page->index > end. | ||
373 | */ | 376 | */ |
374 | if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) { | 377 | if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) |
375 | if (next == start) | 378 | break; |
376 | break; | ||
377 | next = start; | ||
378 | continue; | ||
379 | } | ||
380 | 379 | ||
381 | for (i = 0; i < pagevec_count(&pvec); ++i) { | 380 | for (i = 0; i < pagevec_count(&pvec); ++i) { |
382 | struct page *page = pvec.pages[i]; | 381 | struct page *page = pvec.pages[i]; |
383 | u32 hash; | 382 | u32 hash; |
384 | 383 | ||
384 | /* | ||
385 | * The page (index) could be beyond end. This is | ||
386 | * only possible in the punch hole case as end is | ||
387 | * max page offset in the truncate case. | ||
388 | */ | ||
389 | next = page->index; | ||
390 | if (next >= end) | ||
391 | break; | ||
392 | |||
385 | hash = hugetlb_fault_mutex_hash(h, current->mm, | 393 | hash = hugetlb_fault_mutex_hash(h, current->mm, |
386 | &pseudo_vma, | 394 | &pseudo_vma, |
387 | mapping, next, 0); | 395 | mapping, next, 0); |
388 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | 396 | mutex_lock(&hugetlb_fault_mutex_table[hash]); |
389 | 397 | ||
390 | lock_page(page); | 398 | lock_page(page); |
391 | if (page->index >= end) { | 399 | if (likely(!page_mapped(page))) { |
392 | unlock_page(page); | ||
393 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | ||
394 | next = end; /* we are done */ | ||
395 | break; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * If page is mapped, it was faulted in after being | ||
400 | * unmapped. Do nothing in this race case. In the | ||
401 | * normal case page is not mapped. | ||
402 | */ | ||
403 | if (!page_mapped(page)) { | ||
404 | bool rsv_on_error = !PagePrivate(page); | 400 | bool rsv_on_error = !PagePrivate(page); |
405 | /* | 401 | /* |
406 | * We must free the huge page and remove | 402 | * We must free the huge page and remove |
@@ -421,17 +417,23 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
421 | hugetlb_fix_reserve_counts( | 417 | hugetlb_fix_reserve_counts( |
422 | inode, rsv_on_error); | 418 | inode, rsv_on_error); |
423 | } | 419 | } |
420 | } else { | ||
421 | /* | ||
422 | * If page is mapped, it was faulted in after | ||
423 | * being unmapped. It indicates a race between | ||
424 | * hole punch and page fault. Do nothing in | ||
425 | * this case. Getting here in a truncate | ||
426 | * operation is a bug. | ||
427 | */ | ||
428 | BUG_ON(truncate_op); | ||
424 | } | 429 | } |
425 | 430 | ||
426 | if (page->index > next) | ||
427 | next = page->index; | ||
428 | |||
429 | ++next; | ||
430 | unlock_page(page); | 431 | unlock_page(page); |
431 | |||
432 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | 432 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); |
433 | } | 433 | } |
434 | ++next; | ||
434 | huge_pagevec_release(&pvec); | 435 | huge_pagevec_release(&pvec); |
436 | cond_resched(); | ||
435 | } | 437 | } |
436 | 438 | ||
437 | if (truncate_op) | 439 | if (truncate_op) |
@@ -647,9 +649,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, | |||
647 | if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) | 649 | if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) |
648 | i_size_write(inode, offset + len); | 650 | i_size_write(inode, offset + len); |
649 | inode->i_ctime = CURRENT_TIME; | 651 | inode->i_ctime = CURRENT_TIME; |
650 | spin_lock(&inode->i_lock); | ||
651 | inode->i_private = NULL; | ||
652 | spin_unlock(&inode->i_lock); | ||
653 | out: | 652 | out: |
654 | mutex_unlock(&inode->i_mutex); | 653 | mutex_unlock(&inode->i_mutex); |
655 | return error; | 654 | return error; |