diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/hugetlbfs/inode.c | 144 |
1 files changed, 75 insertions, 69 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9c07d2d754c9..8bbf7f3e2a27 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -324,11 +324,48 @@ static void remove_huge_page(struct page *page) | |||
324 | delete_from_page_cache(page); | 324 | delete_from_page_cache(page); |
325 | } | 325 | } |
326 | 326 | ||
327 | static void | ||
328 | hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end) | ||
329 | { | ||
330 | struct vm_area_struct *vma; | ||
331 | |||
332 | /* | ||
333 | * end == 0 indicates that the entire range after | ||
334 | * start should be unmapped. | ||
335 | */ | ||
336 | vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) { | ||
337 | unsigned long v_offset; | ||
338 | unsigned long v_end; | ||
339 | |||
340 | /* | ||
341 | * Can the expression below overflow on 32-bit arches? | ||
342 | * No, because the interval tree returns us only those vmas | ||
343 | * which overlap the truncated area starting at pgoff, | ||
344 | * and no vma on a 32-bit arch can span beyond the 4GB. | ||
345 | */ | ||
346 | if (vma->vm_pgoff < start) | ||
347 | v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT; | ||
348 | else | ||
349 | v_offset = 0; | ||
350 | |||
351 | if (!end) | ||
352 | v_end = vma->vm_end; | ||
353 | else { | ||
354 | v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT) | ||
355 | + vma->vm_start; | ||
356 | if (v_end > vma->vm_end) | ||
357 | v_end = vma->vm_end; | ||
358 | } | ||
359 | |||
360 | unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end, | ||
361 | NULL); | ||
362 | } | ||
363 | } | ||
327 | 364 | ||
328 | /* | 365 | /* |
329 | * remove_inode_hugepages handles two distinct cases: truncation and hole | 366 | * remove_inode_hugepages handles two distinct cases: truncation and hole |
330 | * punch. There are subtle differences in operation for each case. | 367 | * punch. There are subtle differences in operation for each case. |
331 | 368 | * | |
332 | * truncation is indicated by end of range being LLONG_MAX | 369 | * truncation is indicated by end of range being LLONG_MAX |
333 | * In this case, we first scan the range and release found pages. | 370 | * In this case, we first scan the range and release found pages. |
334 | * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv | 371 | * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv |
@@ -379,6 +416,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
379 | 416 | ||
380 | for (i = 0; i < pagevec_count(&pvec); ++i) { | 417 | for (i = 0; i < pagevec_count(&pvec); ++i) { |
381 | struct page *page = pvec.pages[i]; | 418 | struct page *page = pvec.pages[i]; |
419 | bool rsv_on_error; | ||
382 | u32 hash; | 420 | u32 hash; |
383 | 421 | ||
384 | /* | 422 | /* |
@@ -395,37 +433,43 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
395 | mapping, next, 0); | 433 | mapping, next, 0); |
396 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | 434 | mutex_lock(&hugetlb_fault_mutex_table[hash]); |
397 | 435 | ||
398 | lock_page(page); | 436 | /* |
399 | if (likely(!page_mapped(page))) { | 437 | * If page is mapped, it was faulted in after being |
400 | bool rsv_on_error = !PagePrivate(page); | 438 | * unmapped in caller. Unmap (again) now after taking |
401 | /* | 439 | * the fault mutex. The mutex will prevent faults |
402 | * We must free the huge page and remove | 440 | * until we finish removing the page. |
403 | * from page cache (remove_huge_page) BEFORE | 441 | * |
404 | * removing the region/reserve map | 442 | * This race can only happen in the hole punch case. |
405 | * (hugetlb_unreserve_pages). In rare out | 443 | * Getting here in a truncate operation is a bug. |
406 | * of memory conditions, removal of the | 444 | */ |
407 | * region/reserve map could fail. Before | 445 | if (unlikely(page_mapped(page))) { |
408 | * free'ing the page, note PagePrivate which | ||
409 | * is used in case of error. | ||
410 | */ | ||
411 | remove_huge_page(page); | ||
412 | freed++; | ||
413 | if (!truncate_op) { | ||
414 | if (unlikely(hugetlb_unreserve_pages( | ||
415 | inode, next, | ||
416 | next + 1, 1))) | ||
417 | hugetlb_fix_reserve_counts( | ||
418 | inode, rsv_on_error); | ||
419 | } | ||
420 | } else { | ||
421 | /* | ||
422 | * If page is mapped, it was faulted in after | ||
423 | * being unmapped. It indicates a race between | ||
424 | * hole punch and page fault. Do nothing in | ||
425 | * this case. Getting here in a truncate | ||
426 | * operation is a bug. | ||
427 | */ | ||
428 | BUG_ON(truncate_op); | 446 | BUG_ON(truncate_op); |
447 | |||
448 | i_mmap_lock_write(mapping); | ||
449 | hugetlb_vmdelete_list(&mapping->i_mmap, | ||
450 | next * pages_per_huge_page(h), | ||
451 | (next + 1) * pages_per_huge_page(h)); | ||
452 | i_mmap_unlock_write(mapping); | ||
453 | } | ||
454 | |||
455 | lock_page(page); | ||
456 | /* | ||
457 | * We must free the huge page and remove from page | ||
458 | * cache (remove_huge_page) BEFORE removing the | ||
459 | * region/reserve map (hugetlb_unreserve_pages). In | ||
460 | * rare out of memory conditions, removal of the | ||
461 | * region/reserve map could fail. Before free'ing | ||
462 | * the page, note PagePrivate which is used in case | ||
463 | * of error. | ||
464 | */ | ||
465 | rsv_on_error = !PagePrivate(page); | ||
466 | remove_huge_page(page); | ||
467 | freed++; | ||
468 | if (!truncate_op) { | ||
469 | if (unlikely(hugetlb_unreserve_pages(inode, | ||
470 | next, next + 1, 1))) | ||
471 | hugetlb_fix_reserve_counts(inode, | ||
472 | rsv_on_error); | ||
429 | } | 473 | } |
430 | 474 | ||
431 | unlock_page(page); | 475 | unlock_page(page); |
@@ -452,44 +496,6 @@ static void hugetlbfs_evict_inode(struct inode *inode) | |||
452 | clear_inode(inode); | 496 | clear_inode(inode); |
453 | } | 497 | } |
454 | 498 | ||
455 | static inline void | ||
456 | hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end) | ||
457 | { | ||
458 | struct vm_area_struct *vma; | ||
459 | |||
460 | /* | ||
461 | * end == 0 indicates that the entire range after | ||
462 | * start should be unmapped. | ||
463 | */ | ||
464 | vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) { | ||
465 | unsigned long v_offset; | ||
466 | unsigned long v_end; | ||
467 | |||
468 | /* | ||
469 | * Can the expression below overflow on 32-bit arches? | ||
470 | * No, because the interval tree returns us only those vmas | ||
471 | * which overlap the truncated area starting at pgoff, | ||
472 | * and no vma on a 32-bit arch can span beyond the 4GB. | ||
473 | */ | ||
474 | if (vma->vm_pgoff < start) | ||
475 | v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT; | ||
476 | else | ||
477 | v_offset = 0; | ||
478 | |||
479 | if (!end) | ||
480 | v_end = vma->vm_end; | ||
481 | else { | ||
482 | v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT) | ||
483 | + vma->vm_start; | ||
484 | if (v_end > vma->vm_end) | ||
485 | v_end = vma->vm_end; | ||
486 | } | ||
487 | |||
488 | unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end, | ||
489 | NULL); | ||
490 | } | ||
491 | } | ||
492 | |||
493 | static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | 499 | static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) |
494 | { | 500 | { |
495 | pgoff_t pgoff; | 501 | pgoff_t pgoff; |