aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/hugetlbfs/inode.c144
1 files changed, 75 insertions, 69 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9c07d2d754c9..8bbf7f3e2a27 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -324,11 +324,48 @@ static void remove_huge_page(struct page *page)
324 delete_from_page_cache(page); 324 delete_from_page_cache(page);
325} 325}
326 326
327static void
328hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
329{
330 struct vm_area_struct *vma;
331
332 /*
333 * end == 0 indicates that the entire range after
334 * start should be unmapped.
335 */
336 vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
337 unsigned long v_offset;
338 unsigned long v_end;
339
340 /*
341 * Can the expression below overflow on 32-bit arches?
342 * No, because the interval tree returns us only those vmas
343 * which overlap the truncated area starting at pgoff,
344 * and no vma on a 32-bit arch can span beyond the 4GB.
345 */
346 if (vma->vm_pgoff < start)
347 v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
348 else
349 v_offset = 0;
350
351 if (!end)
352 v_end = vma->vm_end;
353 else {
354 v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
355 + vma->vm_start;
356 if (v_end > vma->vm_end)
357 v_end = vma->vm_end;
358 }
359
360 unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
361 NULL);
362 }
363}
327 364
328/* 365/*
329 * remove_inode_hugepages handles two distinct cases: truncation and hole 366 * remove_inode_hugepages handles two distinct cases: truncation and hole
330 * punch. There are subtle differences in operation for each case. 367 * punch. There are subtle differences in operation for each case.
331 368 *
332 * truncation is indicated by end of range being LLONG_MAX 369 * truncation is indicated by end of range being LLONG_MAX
333 * In this case, we first scan the range and release found pages. 370 * In this case, we first scan the range and release found pages.
334 * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv 371 * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
@@ -379,6 +416,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
379 416
380 for (i = 0; i < pagevec_count(&pvec); ++i) { 417 for (i = 0; i < pagevec_count(&pvec); ++i) {
381 struct page *page = pvec.pages[i]; 418 struct page *page = pvec.pages[i];
419 bool rsv_on_error;
382 u32 hash; 420 u32 hash;
383 421
384 /* 422 /*
@@ -395,37 +433,43 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
395 mapping, next, 0); 433 mapping, next, 0);
396 mutex_lock(&hugetlb_fault_mutex_table[hash]); 434 mutex_lock(&hugetlb_fault_mutex_table[hash]);
397 435
398 lock_page(page); 436 /*
399 if (likely(!page_mapped(page))) { 437 * If page is mapped, it was faulted in after being
400 bool rsv_on_error = !PagePrivate(page); 438 * unmapped in caller. Unmap (again) now after taking
401 /* 439 * the fault mutex. The mutex will prevent faults
402 * We must free the huge page and remove 440 * until we finish removing the page.
403 * from page cache (remove_huge_page) BEFORE 441 *
404 * removing the region/reserve map 442 * This race can only happen in the hole punch case.
405 * (hugetlb_unreserve_pages). In rare out 443 * Getting here in a truncate operation is a bug.
406 * of memory conditions, removal of the 444 */
407 * region/reserve map could fail. Before 445 if (unlikely(page_mapped(page))) {
408 * free'ing the page, note PagePrivate which
409 * is used in case of error.
410 */
411 remove_huge_page(page);
412 freed++;
413 if (!truncate_op) {
414 if (unlikely(hugetlb_unreserve_pages(
415 inode, next,
416 next + 1, 1)))
417 hugetlb_fix_reserve_counts(
418 inode, rsv_on_error);
419 }
420 } else {
421 /*
422 * If page is mapped, it was faulted in after
423 * being unmapped. It indicates a race between
424 * hole punch and page fault. Do nothing in
425 * this case. Getting here in a truncate
426 * operation is a bug.
427 */
428 BUG_ON(truncate_op); 446 BUG_ON(truncate_op);
447
448 i_mmap_lock_write(mapping);
449 hugetlb_vmdelete_list(&mapping->i_mmap,
450 next * pages_per_huge_page(h),
451 (next + 1) * pages_per_huge_page(h));
452 i_mmap_unlock_write(mapping);
453 }
454
455 lock_page(page);
456 /*
457 * We must free the huge page and remove from page
458 * cache (remove_huge_page) BEFORE removing the
459 * region/reserve map (hugetlb_unreserve_pages). In
460 * rare out of memory conditions, removal of the
461 * region/reserve map could fail. Before free'ing
462 * the page, note PagePrivate which is used in case
463 * of error.
464 */
465 rsv_on_error = !PagePrivate(page);
466 remove_huge_page(page);
467 freed++;
468 if (!truncate_op) {
469 if (unlikely(hugetlb_unreserve_pages(inode,
470 next, next + 1, 1)))
471 hugetlb_fix_reserve_counts(inode,
472 rsv_on_error);
429 } 473 }
430 474
431 unlock_page(page); 475 unlock_page(page);
@@ -452,44 +496,6 @@ static void hugetlbfs_evict_inode(struct inode *inode)
452 clear_inode(inode); 496 clear_inode(inode);
453} 497}
454 498
455static inline void
456hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
457{
458 struct vm_area_struct *vma;
459
460 /*
461 * end == 0 indicates that the entire range after
462 * start should be unmapped.
463 */
464 vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
465 unsigned long v_offset;
466 unsigned long v_end;
467
468 /*
469 * Can the expression below overflow on 32-bit arches?
470 * No, because the interval tree returns us only those vmas
471 * which overlap the truncated area starting at pgoff,
472 * and no vma on a 32-bit arch can span beyond the 4GB.
473 */
474 if (vma->vm_pgoff < start)
475 v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
476 else
477 v_offset = 0;
478
479 if (!end)
480 v_end = vma->vm_end;
481 else {
482 v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
483 + vma->vm_start;
484 if (v_end > vma->vm_end)
485 v_end = vma->vm_end;
486 }
487
488 unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
489 NULL);
490 }
491}
492
493static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) 499static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
494{ 500{
495 pgoff_t pgoff; 501 pgoff_t pgoff;