diff options
author | Dan Williams <dan.j.williams@intel.com> | 2016-01-15 19:56:58 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-15 20:56:32 -0500 |
commit | cbb38e41a931aaae5b2fef3b1c74d0d6f866b698 (patch) | |
tree | a7c58d66c673740b1f072d3eed37779276ed2595 /fs/dax.c | |
parent | 3565fce3a6597e91b8dee3e8e36ebf70f8b7ef9b (diff) |
dax: provide diagnostics for pmd mapping failures
There is a wide gamut of conditions that can trigger the dax pmd path to
fallback to pte mappings. Ideally we'd have a syscall interface to
determine mapping characteristics after the fact. In the meantime
provide debug messages.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Suggested-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/dax.c')
-rw-r--r-- | fs/dax.c | 65 |
1 files changed, 57 insertions, 8 deletions
@@ -558,6 +558,24 @@ EXPORT_SYMBOL_GPL(dax_fault); | |||
558 | */ | 558 | */ |
559 | #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) | 559 | #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) |
560 | 560 | ||
561 | static void __dax_dbg(struct buffer_head *bh, unsigned long address, | ||
562 | const char *reason, const char *fn) | ||
563 | { | ||
564 | if (bh) { | ||
565 | char bname[BDEVNAME_SIZE]; | ||
566 | bdevname(bh->b_bdev, bname); | ||
567 | pr_debug("%s: %s addr: %lx dev %s state %lx start %lld " | ||
568 | "length %zd fallback: %s\n", fn, current->comm, | ||
569 | address, bname, bh->b_state, (u64)bh->b_blocknr, | ||
570 | bh->b_size, reason); | ||
571 | } else { | ||
572 | pr_debug("%s: %s addr: %lx fallback: %s\n", fn, | ||
573 | current->comm, address, reason); | ||
574 | } | ||
575 | } | ||
576 | |||
577 | #define dax_pmd_dbg(bh, address, reason) __dax_dbg(bh, address, reason, "dax_pmd") | ||
578 | |||
561 | int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | 579 | int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, |
562 | pmd_t *pmd, unsigned int flags, get_block_t get_block, | 580 | pmd_t *pmd, unsigned int flags, get_block_t get_block, |
563 | dax_iodone_t complete_unwritten) | 581 | dax_iodone_t complete_unwritten) |
@@ -581,21 +599,29 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
581 | /* Fall back to PTEs if we're going to COW */ | 599 | /* Fall back to PTEs if we're going to COW */ |
582 | if (write && !(vma->vm_flags & VM_SHARED)) { | 600 | if (write && !(vma->vm_flags & VM_SHARED)) { |
583 | split_huge_pmd(vma, pmd, address); | 601 | split_huge_pmd(vma, pmd, address); |
602 | dax_pmd_dbg(NULL, address, "cow write"); | ||
584 | return VM_FAULT_FALLBACK; | 603 | return VM_FAULT_FALLBACK; |
585 | } | 604 | } |
586 | /* If the PMD would extend outside the VMA */ | 605 | /* If the PMD would extend outside the VMA */ |
587 | if (pmd_addr < vma->vm_start) | 606 | if (pmd_addr < vma->vm_start) { |
607 | dax_pmd_dbg(NULL, address, "vma start unaligned"); | ||
588 | return VM_FAULT_FALLBACK; | 608 | return VM_FAULT_FALLBACK; |
589 | if ((pmd_addr + PMD_SIZE) > vma->vm_end) | 609 | } |
610 | if ((pmd_addr + PMD_SIZE) > vma->vm_end) { | ||
611 | dax_pmd_dbg(NULL, address, "vma end unaligned"); | ||
590 | return VM_FAULT_FALLBACK; | 612 | return VM_FAULT_FALLBACK; |
613 | } | ||
591 | 614 | ||
592 | pgoff = linear_page_index(vma, pmd_addr); | 615 | pgoff = linear_page_index(vma, pmd_addr); |
593 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | 616 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; |
594 | if (pgoff >= size) | 617 | if (pgoff >= size) |
595 | return VM_FAULT_SIGBUS; | 618 | return VM_FAULT_SIGBUS; |
596 | /* If the PMD would cover blocks out of the file */ | 619 | /* If the PMD would cover blocks out of the file */ |
597 | if ((pgoff | PG_PMD_COLOUR) >= size) | 620 | if ((pgoff | PG_PMD_COLOUR) >= size) { |
621 | dax_pmd_dbg(NULL, address, | ||
622 | "offset + huge page size > file size"); | ||
598 | return VM_FAULT_FALLBACK; | 623 | return VM_FAULT_FALLBACK; |
624 | } | ||
599 | 625 | ||
600 | memset(&bh, 0, sizeof(bh)); | 626 | memset(&bh, 0, sizeof(bh)); |
601 | block = (sector_t)pgoff << (PAGE_SHIFT - blkbits); | 627 | block = (sector_t)pgoff << (PAGE_SHIFT - blkbits); |
@@ -611,8 +637,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
611 | * just fall back to PTEs. Calling get_block 512 times in a loop | 637 | * just fall back to PTEs. Calling get_block 512 times in a loop |
612 | * would be silly. | 638 | * would be silly. |
613 | */ | 639 | */ |
614 | if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) | 640 | if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) { |
641 | dax_pmd_dbg(&bh, address, "allocated block too small"); | ||
615 | goto fallback; | 642 | goto fallback; |
643 | } | ||
616 | 644 | ||
617 | /* | 645 | /* |
618 | * If we allocated new storage, make sure no process has any | 646 | * If we allocated new storage, make sure no process has any |
@@ -635,23 +663,33 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
635 | result = VM_FAULT_SIGBUS; | 663 | result = VM_FAULT_SIGBUS; |
636 | goto out; | 664 | goto out; |
637 | } | 665 | } |
638 | if ((pgoff | PG_PMD_COLOUR) >= size) | 666 | if ((pgoff | PG_PMD_COLOUR) >= size) { |
667 | dax_pmd_dbg(&bh, address, "pgoff unaligned"); | ||
639 | goto fallback; | 668 | goto fallback; |
669 | } | ||
640 | 670 | ||
641 | if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) { | 671 | if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) { |
642 | spinlock_t *ptl; | 672 | spinlock_t *ptl; |
643 | pmd_t entry; | 673 | pmd_t entry; |
644 | struct page *zero_page = get_huge_zero_page(); | 674 | struct page *zero_page = get_huge_zero_page(); |
645 | 675 | ||
646 | if (unlikely(!zero_page)) | 676 | if (unlikely(!zero_page)) { |
677 | dax_pmd_dbg(&bh, address, "no zero page"); | ||
647 | goto fallback; | 678 | goto fallback; |
679 | } | ||
648 | 680 | ||
649 | ptl = pmd_lock(vma->vm_mm, pmd); | 681 | ptl = pmd_lock(vma->vm_mm, pmd); |
650 | if (!pmd_none(*pmd)) { | 682 | if (!pmd_none(*pmd)) { |
651 | spin_unlock(ptl); | 683 | spin_unlock(ptl); |
684 | dax_pmd_dbg(&bh, address, "pmd already present"); | ||
652 | goto fallback; | 685 | goto fallback; |
653 | } | 686 | } |
654 | 687 | ||
688 | dev_dbg(part_to_dev(bdev->bd_part), | ||
689 | "%s: %s addr: %lx pfn: <zero> sect: %llx\n", | ||
690 | __func__, current->comm, address, | ||
691 | (unsigned long long) to_sector(&bh, inode)); | ||
692 | |||
655 | entry = mk_pmd(zero_page, vma->vm_page_prot); | 693 | entry = mk_pmd(zero_page, vma->vm_page_prot); |
656 | entry = pmd_mkhuge(entry); | 694 | entry = pmd_mkhuge(entry); |
657 | set_pmd_at(vma->vm_mm, pmd_addr, pmd, entry); | 695 | set_pmd_at(vma->vm_mm, pmd_addr, pmd, entry); |
@@ -668,8 +706,13 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
668 | result = VM_FAULT_SIGBUS; | 706 | result = VM_FAULT_SIGBUS; |
669 | goto out; | 707 | goto out; |
670 | } | 708 | } |
671 | if (length < PMD_SIZE | 709 | if (length < PMD_SIZE) { |
672 | || (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)) { | 710 | dax_pmd_dbg(&bh, address, "dax-length too small"); |
711 | dax_unmap_atomic(bdev, &dax); | ||
712 | goto fallback; | ||
713 | } | ||
714 | if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) { | ||
715 | dax_pmd_dbg(&bh, address, "pfn unaligned"); | ||
673 | dax_unmap_atomic(bdev, &dax); | 716 | dax_unmap_atomic(bdev, &dax); |
674 | goto fallback; | 717 | goto fallback; |
675 | } | 718 | } |
@@ -680,6 +723,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
680 | */ | 723 | */ |
681 | if (pfn_t_has_page(dax.pfn)) { | 724 | if (pfn_t_has_page(dax.pfn)) { |
682 | dax_unmap_atomic(bdev, &dax); | 725 | dax_unmap_atomic(bdev, &dax); |
726 | dax_pmd_dbg(&bh, address, "pfn not in memmap"); | ||
683 | goto fallback; | 727 | goto fallback; |
684 | } | 728 | } |
685 | 729 | ||
@@ -692,6 +736,11 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
692 | } | 736 | } |
693 | dax_unmap_atomic(bdev, &dax); | 737 | dax_unmap_atomic(bdev, &dax); |
694 | 738 | ||
739 | dev_dbg(part_to_dev(bdev->bd_part), | ||
740 | "%s: %s addr: %lx pfn: %lx sect: %llx\n", | ||
741 | __func__, current->comm, address, | ||
742 | pfn_t_to_pfn(dax.pfn), | ||
743 | (unsigned long long) dax.sector); | ||
695 | result |= vmf_insert_pfn_pmd(vma, address, pmd, | 744 | result |= vmf_insert_pfn_pmd(vma, address, pmd, |
696 | dax.pfn, write); | 745 | dax.pfn, write); |
697 | } | 746 | } |