summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/dax.c70
-rw-r--r--mm/memory.c2
2 files changed, 31 insertions, 41 deletions
diff --git a/fs/dax.c b/fs/dax.c
index bcfb14bfc1e4..a86d3cc2b389 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -285,6 +285,7 @@ static int copy_user_bh(struct page *to, struct buffer_head *bh,
285static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, 285static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
286 struct vm_area_struct *vma, struct vm_fault *vmf) 286 struct vm_area_struct *vma, struct vm_fault *vmf)
287{ 287{
288 struct address_space *mapping = inode->i_mapping;
288 sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9); 289 sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
289 unsigned long vaddr = (unsigned long)vmf->virtual_address; 290 unsigned long vaddr = (unsigned long)vmf->virtual_address;
290 void __pmem *addr; 291 void __pmem *addr;
@@ -292,6 +293,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
292 pgoff_t size; 293 pgoff_t size;
293 int error; 294 int error;
294 295
296 i_mmap_lock_read(mapping);
297
295 /* 298 /*
296 * Check truncate didn't happen while we were allocating a block. 299 * Check truncate didn't happen while we were allocating a block.
297 * If it did, this block may or may not be still allocated to the 300 * If it did, this block may or may not be still allocated to the
@@ -321,6 +324,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
321 error = vm_insert_mixed(vma, vaddr, pfn); 324 error = vm_insert_mixed(vma, vaddr, pfn);
322 325
323 out: 326 out:
327 i_mmap_unlock_read(mapping);
328
324 return error; 329 return error;
325} 330}
326 331
@@ -382,17 +387,15 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
382 * from a read fault and we've raced with a truncate 387 * from a read fault and we've raced with a truncate
383 */ 388 */
384 error = -EIO; 389 error = -EIO;
385 goto unlock; 390 goto unlock_page;
386 } 391 }
387 } else {
388 i_mmap_lock_write(mapping);
389 } 392 }
390 393
391 error = get_block(inode, block, &bh, 0); 394 error = get_block(inode, block, &bh, 0);
392 if (!error && (bh.b_size < PAGE_SIZE)) 395 if (!error && (bh.b_size < PAGE_SIZE))
393 error = -EIO; /* fs corruption? */ 396 error = -EIO; /* fs corruption? */
394 if (error) 397 if (error)
395 goto unlock; 398 goto unlock_page;
396 399
397 if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) { 400 if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
398 if (vmf->flags & FAULT_FLAG_WRITE) { 401 if (vmf->flags & FAULT_FLAG_WRITE) {
@@ -403,9 +406,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
403 if (!error && (bh.b_size < PAGE_SIZE)) 406 if (!error && (bh.b_size < PAGE_SIZE))
404 error = -EIO; 407 error = -EIO;
405 if (error) 408 if (error)
406 goto unlock; 409 goto unlock_page;
407 } else { 410 } else {
408 i_mmap_unlock_write(mapping);
409 return dax_load_hole(mapping, page, vmf); 411 return dax_load_hole(mapping, page, vmf);
410 } 412 }
411 } 413 }
@@ -417,15 +419,17 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
417 else 419 else
418 clear_user_highpage(new_page, vaddr); 420 clear_user_highpage(new_page, vaddr);
419 if (error) 421 if (error)
420 goto unlock; 422 goto unlock_page;
421 vmf->page = page; 423 vmf->page = page;
422 if (!page) { 424 if (!page) {
425 i_mmap_lock_read(mapping);
423 /* Check we didn't race with truncate */ 426 /* Check we didn't race with truncate */
424 size = (i_size_read(inode) + PAGE_SIZE - 1) >> 427 size = (i_size_read(inode) + PAGE_SIZE - 1) >>
425 PAGE_SHIFT; 428 PAGE_SHIFT;
426 if (vmf->pgoff >= size) { 429 if (vmf->pgoff >= size) {
430 i_mmap_unlock_read(mapping);
427 error = -EIO; 431 error = -EIO;
428 goto unlock; 432 goto out;
429 } 433 }
430 } 434 }
431 return VM_FAULT_LOCKED; 435 return VM_FAULT_LOCKED;
@@ -461,8 +465,6 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
461 WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); 465 WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
462 } 466 }
463 467
464 if (!page)
465 i_mmap_unlock_write(mapping);
466 out: 468 out:
467 if (error == -ENOMEM) 469 if (error == -ENOMEM)
468 return VM_FAULT_OOM | major; 470 return VM_FAULT_OOM | major;
@@ -471,14 +473,11 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
471 return VM_FAULT_SIGBUS | major; 473 return VM_FAULT_SIGBUS | major;
472 return VM_FAULT_NOPAGE | major; 474 return VM_FAULT_NOPAGE | major;
473 475
474 unlock: 476 unlock_page:
475 if (page) { 477 if (page) {
476 unlock_page(page); 478 unlock_page(page);
477 page_cache_release(page); 479 page_cache_release(page);
478 } else {
479 i_mmap_unlock_write(mapping);
480 } 480 }
481
482 goto out; 481 goto out;
483} 482}
484EXPORT_SYMBOL(__dax_fault); 483EXPORT_SYMBOL(__dax_fault);
@@ -556,10 +555,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
556 block = (sector_t)pgoff << (PAGE_SHIFT - blkbits); 555 block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
557 556
558 bh.b_size = PMD_SIZE; 557 bh.b_size = PMD_SIZE;
559 i_mmap_lock_write(mapping);
560 length = get_block(inode, block, &bh, write); 558 length = get_block(inode, block, &bh, write);
561 if (length) 559 if (length)
562 return VM_FAULT_SIGBUS; 560 return VM_FAULT_SIGBUS;
561 i_mmap_lock_read(mapping);
563 562
564 /* 563 /*
565 * If the filesystem isn't willing to tell us the length of a hole, 564 * If the filesystem isn't willing to tell us the length of a hole,
@@ -569,36 +568,14 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
569 if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) 568 if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
570 goto fallback; 569 goto fallback;
571 570
572 sector = bh.b_blocknr << (blkbits - 9);
573
574 if (buffer_unwritten(&bh) || buffer_new(&bh)) {
575 int i;
576
577 length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
578 bh.b_size);
579 if (length < 0) {
580 result = VM_FAULT_SIGBUS;
581 goto out;
582 }
583 if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
584 goto fallback;
585
586 for (i = 0; i < PTRS_PER_PMD; i++)
587 clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
588 wmb_pmem();
589 count_vm_event(PGMAJFAULT);
590 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
591 result |= VM_FAULT_MAJOR;
592 }
593
594 /* 571 /*
595 * If we allocated new storage, make sure no process has any 572 * If we allocated new storage, make sure no process has any
596 * zero pages covering this hole 573 * zero pages covering this hole
597 */ 574 */
598 if (buffer_new(&bh)) { 575 if (buffer_new(&bh)) {
599 i_mmap_unlock_write(mapping); 576 i_mmap_unlock_read(mapping);
600 unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0); 577 unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
601 i_mmap_lock_write(mapping); 578 i_mmap_lock_read(mapping);
602 } 579 }
603 580
604 /* 581 /*
@@ -635,6 +612,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
635 result = VM_FAULT_NOPAGE; 612 result = VM_FAULT_NOPAGE;
636 spin_unlock(ptl); 613 spin_unlock(ptl);
637 } else { 614 } else {
615 sector = bh.b_blocknr << (blkbits - 9);
638 length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, 616 length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
639 bh.b_size); 617 bh.b_size);
640 if (length < 0) { 618 if (length < 0) {
@@ -644,15 +622,25 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
644 if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) 622 if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
645 goto fallback; 623 goto fallback;
646 624
625 if (buffer_unwritten(&bh) || buffer_new(&bh)) {
626 int i;
627 for (i = 0; i < PTRS_PER_PMD; i++)
628 clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
629 wmb_pmem();
630 count_vm_event(PGMAJFAULT);
631 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
632 result |= VM_FAULT_MAJOR;
633 }
634
647 result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write); 635 result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
648 } 636 }
649 637
650 out: 638 out:
639 i_mmap_unlock_read(mapping);
640
651 if (buffer_unwritten(&bh)) 641 if (buffer_unwritten(&bh))
652 complete_unwritten(&bh, !(result & VM_FAULT_ERROR)); 642 complete_unwritten(&bh, !(result & VM_FAULT_ERROR));
653 643
654 i_mmap_unlock_write(mapping);
655
656 return result; 644 return result;
657 645
658 fallback: 646 fallback:
diff --git a/mm/memory.c b/mm/memory.c
index 9cb27470fee9..deb679c31f2a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2426,6 +2426,8 @@ void unmap_mapping_range(struct address_space *mapping,
2426 if (details.last_index < details.first_index) 2426 if (details.last_index < details.first_index)
2427 details.last_index = ULONG_MAX; 2427 details.last_index = ULONG_MAX;
2428 2428
2429
2430 /* DAX uses i_mmap_lock to serialise file truncate vs page fault */
2429 i_mmap_lock_write(mapping); 2431 i_mmap_lock_write(mapping);
2430 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) 2432 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
2431 unmap_mapping_range_tree(&mapping->i_mmap, &details); 2433 unmap_mapping_range_tree(&mapping->i_mmap, &details);