diff options
-rw-r--r-- | fs/dax.c | 70 | ||||
-rw-r--r-- | mm/memory.c | 2 |
2 files changed, 31 insertions, 41 deletions
@@ -285,6 +285,7 @@ static int copy_user_bh(struct page *to, struct buffer_head *bh, | |||
285 | static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, | 285 | static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, |
286 | struct vm_area_struct *vma, struct vm_fault *vmf) | 286 | struct vm_area_struct *vma, struct vm_fault *vmf) |
287 | { | 287 | { |
288 | struct address_space *mapping = inode->i_mapping; | ||
288 | sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9); | 289 | sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9); |
289 | unsigned long vaddr = (unsigned long)vmf->virtual_address; | 290 | unsigned long vaddr = (unsigned long)vmf->virtual_address; |
290 | void __pmem *addr; | 291 | void __pmem *addr; |
@@ -292,6 +293,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, | |||
292 | pgoff_t size; | 293 | pgoff_t size; |
293 | int error; | 294 | int error; |
294 | 295 | ||
296 | i_mmap_lock_read(mapping); | ||
297 | |||
295 | /* | 298 | /* |
296 | * Check truncate didn't happen while we were allocating a block. | 299 | * Check truncate didn't happen while we were allocating a block. |
297 | * If it did, this block may or may not be still allocated to the | 300 | * If it did, this block may or may not be still allocated to the |
@@ -321,6 +324,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, | |||
321 | error = vm_insert_mixed(vma, vaddr, pfn); | 324 | error = vm_insert_mixed(vma, vaddr, pfn); |
322 | 325 | ||
323 | out: | 326 | out: |
327 | i_mmap_unlock_read(mapping); | ||
328 | |||
324 | return error; | 329 | return error; |
325 | } | 330 | } |
326 | 331 | ||
@@ -382,17 +387,15 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
382 | * from a read fault and we've raced with a truncate | 387 | * from a read fault and we've raced with a truncate |
383 | */ | 388 | */ |
384 | error = -EIO; | 389 | error = -EIO; |
385 | goto unlock; | 390 | goto unlock_page; |
386 | } | 391 | } |
387 | } else { | ||
388 | i_mmap_lock_write(mapping); | ||
389 | } | 392 | } |
390 | 393 | ||
391 | error = get_block(inode, block, &bh, 0); | 394 | error = get_block(inode, block, &bh, 0); |
392 | if (!error && (bh.b_size < PAGE_SIZE)) | 395 | if (!error && (bh.b_size < PAGE_SIZE)) |
393 | error = -EIO; /* fs corruption? */ | 396 | error = -EIO; /* fs corruption? */ |
394 | if (error) | 397 | if (error) |
395 | goto unlock; | 398 | goto unlock_page; |
396 | 399 | ||
397 | if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) { | 400 | if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) { |
398 | if (vmf->flags & FAULT_FLAG_WRITE) { | 401 | if (vmf->flags & FAULT_FLAG_WRITE) { |
@@ -403,9 +406,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
403 | if (!error && (bh.b_size < PAGE_SIZE)) | 406 | if (!error && (bh.b_size < PAGE_SIZE)) |
404 | error = -EIO; | 407 | error = -EIO; |
405 | if (error) | 408 | if (error) |
406 | goto unlock; | 409 | goto unlock_page; |
407 | } else { | 410 | } else { |
408 | i_mmap_unlock_write(mapping); | ||
409 | return dax_load_hole(mapping, page, vmf); | 411 | return dax_load_hole(mapping, page, vmf); |
410 | } | 412 | } |
411 | } | 413 | } |
@@ -417,15 +419,17 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
417 | else | 419 | else |
418 | clear_user_highpage(new_page, vaddr); | 420 | clear_user_highpage(new_page, vaddr); |
419 | if (error) | 421 | if (error) |
420 | goto unlock; | 422 | goto unlock_page; |
421 | vmf->page = page; | 423 | vmf->page = page; |
422 | if (!page) { | 424 | if (!page) { |
425 | i_mmap_lock_read(mapping); | ||
423 | /* Check we didn't race with truncate */ | 426 | /* Check we didn't race with truncate */ |
424 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> | 427 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> |
425 | PAGE_SHIFT; | 428 | PAGE_SHIFT; |
426 | if (vmf->pgoff >= size) { | 429 | if (vmf->pgoff >= size) { |
430 | i_mmap_unlock_read(mapping); | ||
427 | error = -EIO; | 431 | error = -EIO; |
428 | goto unlock; | 432 | goto out; |
429 | } | 433 | } |
430 | } | 434 | } |
431 | return VM_FAULT_LOCKED; | 435 | return VM_FAULT_LOCKED; |
@@ -461,8 +465,6 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
461 | WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); | 465 | WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); |
462 | } | 466 | } |
463 | 467 | ||
464 | if (!page) | ||
465 | i_mmap_unlock_write(mapping); | ||
466 | out: | 468 | out: |
467 | if (error == -ENOMEM) | 469 | if (error == -ENOMEM) |
468 | return VM_FAULT_OOM | major; | 470 | return VM_FAULT_OOM | major; |
@@ -471,14 +473,11 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
471 | return VM_FAULT_SIGBUS | major; | 473 | return VM_FAULT_SIGBUS | major; |
472 | return VM_FAULT_NOPAGE | major; | 474 | return VM_FAULT_NOPAGE | major; |
473 | 475 | ||
474 | unlock: | 476 | unlock_page: |
475 | if (page) { | 477 | if (page) { |
476 | unlock_page(page); | 478 | unlock_page(page); |
477 | page_cache_release(page); | 479 | page_cache_release(page); |
478 | } else { | ||
479 | i_mmap_unlock_write(mapping); | ||
480 | } | 480 | } |
481 | |||
482 | goto out; | 481 | goto out; |
483 | } | 482 | } |
484 | EXPORT_SYMBOL(__dax_fault); | 483 | EXPORT_SYMBOL(__dax_fault); |
@@ -556,10 +555,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
556 | block = (sector_t)pgoff << (PAGE_SHIFT - blkbits); | 555 | block = (sector_t)pgoff << (PAGE_SHIFT - blkbits); |
557 | 556 | ||
558 | bh.b_size = PMD_SIZE; | 557 | bh.b_size = PMD_SIZE; |
559 | i_mmap_lock_write(mapping); | ||
560 | length = get_block(inode, block, &bh, write); | 558 | length = get_block(inode, block, &bh, write); |
561 | if (length) | 559 | if (length) |
562 | return VM_FAULT_SIGBUS; | 560 | return VM_FAULT_SIGBUS; |
561 | i_mmap_lock_read(mapping); | ||
563 | 562 | ||
564 | /* | 563 | /* |
565 | * If the filesystem isn't willing to tell us the length of a hole, | 564 | * If the filesystem isn't willing to tell us the length of a hole, |
@@ -569,36 +568,14 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
569 | if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) | 568 | if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) |
570 | goto fallback; | 569 | goto fallback; |
571 | 570 | ||
572 | sector = bh.b_blocknr << (blkbits - 9); | ||
573 | |||
574 | if (buffer_unwritten(&bh) || buffer_new(&bh)) { | ||
575 | int i; | ||
576 | |||
577 | length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, | ||
578 | bh.b_size); | ||
579 | if (length < 0) { | ||
580 | result = VM_FAULT_SIGBUS; | ||
581 | goto out; | ||
582 | } | ||
583 | if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) | ||
584 | goto fallback; | ||
585 | |||
586 | for (i = 0; i < PTRS_PER_PMD; i++) | ||
587 | clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE); | ||
588 | wmb_pmem(); | ||
589 | count_vm_event(PGMAJFAULT); | ||
590 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); | ||
591 | result |= VM_FAULT_MAJOR; | ||
592 | } | ||
593 | |||
594 | /* | 571 | /* |
595 | * If we allocated new storage, make sure no process has any | 572 | * If we allocated new storage, make sure no process has any |
596 | * zero pages covering this hole | 573 | * zero pages covering this hole |
597 | */ | 574 | */ |
598 | if (buffer_new(&bh)) { | 575 | if (buffer_new(&bh)) { |
599 | i_mmap_unlock_write(mapping); | 576 | i_mmap_unlock_read(mapping); |
600 | unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0); | 577 | unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0); |
601 | i_mmap_lock_write(mapping); | 578 | i_mmap_lock_read(mapping); |
602 | } | 579 | } |
603 | 580 | ||
604 | /* | 581 | /* |
@@ -635,6 +612,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
635 | result = VM_FAULT_NOPAGE; | 612 | result = VM_FAULT_NOPAGE; |
636 | spin_unlock(ptl); | 613 | spin_unlock(ptl); |
637 | } else { | 614 | } else { |
615 | sector = bh.b_blocknr << (blkbits - 9); | ||
638 | length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, | 616 | length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, |
639 | bh.b_size); | 617 | bh.b_size); |
640 | if (length < 0) { | 618 | if (length < 0) { |
@@ -644,15 +622,25 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
644 | if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) | 622 | if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) |
645 | goto fallback; | 623 | goto fallback; |
646 | 624 | ||
625 | if (buffer_unwritten(&bh) || buffer_new(&bh)) { | ||
626 | int i; | ||
627 | for (i = 0; i < PTRS_PER_PMD; i++) | ||
628 | clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE); | ||
629 | wmb_pmem(); | ||
630 | count_vm_event(PGMAJFAULT); | ||
631 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); | ||
632 | result |= VM_FAULT_MAJOR; | ||
633 | } | ||
634 | |||
647 | result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write); | 635 | result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write); |
648 | } | 636 | } |
649 | 637 | ||
650 | out: | 638 | out: |
639 | i_mmap_unlock_read(mapping); | ||
640 | |||
651 | if (buffer_unwritten(&bh)) | 641 | if (buffer_unwritten(&bh)) |
652 | complete_unwritten(&bh, !(result & VM_FAULT_ERROR)); | 642 | complete_unwritten(&bh, !(result & VM_FAULT_ERROR)); |
653 | 643 | ||
654 | i_mmap_unlock_write(mapping); | ||
655 | |||
656 | return result; | 644 | return result; |
657 | 645 | ||
658 | fallback: | 646 | fallback: |
diff --git a/mm/memory.c b/mm/memory.c index 9cb27470fee9..deb679c31f2a 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2426,6 +2426,8 @@ void unmap_mapping_range(struct address_space *mapping, | |||
2426 | if (details.last_index < details.first_index) | 2426 | if (details.last_index < details.first_index) |
2427 | details.last_index = ULONG_MAX; | 2427 | details.last_index = ULONG_MAX; |
2428 | 2428 | ||
2429 | |||
2430 | /* DAX uses i_mmap_lock to serialise file truncate vs page fault */ | ||
2429 | i_mmap_lock_write(mapping); | 2431 | i_mmap_lock_write(mapping); |
2430 | if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) | 2432 | if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) |
2431 | unmap_mapping_range_tree(&mapping->i_mmap, &details); | 2433 | unmap_mapping_range_tree(&mapping->i_mmap, &details); |