summaryrefslogtreecommitdiffstats
path: root/drivers/dax
diff options
context:
space:
mode:
authorDave Jiang <dave.jiang@intel.com>2017-02-24 17:56:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-24 20:46:54 -0500
commita2d581675d485eb7188f521f36efc114639a3096 (patch)
treeae566f77b965fed344458698fe6bb01280558647 /drivers/dax
parentbd233f538d51c2cae6f0bfc2cf7f0960e1683b8a (diff)
mm,fs,dax: change ->pmd_fault to ->huge_fault
Patch series "1G transparent hugepage support for device dax", v2. The following series implements support for 1G trasparent hugepage on x86 for device dax. The bulk of the code was written by Mathew Wilcox a while back supporting transparent 1G hugepage for fs DAX. I have forward ported the relevant bits to 4.10-rc. The current submission has only the necessary code to support device DAX. Comments from Dan Williams: So the motivation and intended user of this functionality mirrors the motivation and users of 1GB page support in hugetlbfs. Given expected capacities of persistent memory devices an in-memory database may want to reduce tlb pressure beyond what they can already achieve with 2MB mappings of a device-dax file. We have customer feedback to that effect as Willy mentioned in his previous version of these patches [1]. [1]: https://lkml.org/lkml/2016/1/31/52 Comments from Nilesh @ Oracle: There are applications which have a process model; and if you assume 10,000 processes attempting to mmap all the 6TB memory available on a server; we are looking at the following: processes : 10,000 memory : 6TB pte @ 4k page size: 8 bytes / 4K of memory * #processes = 6TB / 4k * 8 * 10000 = 1.5GB * 80000 = 120,000GB pmd @ 2M page size: 120,000 / 512 = ~240GB pud @ 1G page size: 240GB / 512 = ~480MB As you can see with 2M pages, this system will use up an exorbitant amount of DRAM to hold the page tables; but the 1G pages finally brings it down to a reasonable level. Memory sizes will keep increasing; so this number will keep increasing. An argument can be made to convert the applications from process model to thread model, but in the real world that may not be always practical. Hopefully this helps explain the use case where this is valuable. This patch (of 3): In preparation for adding the ability to handle PUD pages, convert vm_operations_struct.pmd_fault to vm_operations_struct.huge_fault. The vm_fault structure is extended to include a union of the different page table pointers that may be needed, and three flag bits are reserved to indicate which type of pointer is in the union. [ross.zwisler@linux.intel.com: remove unused function ext4_dax_huge_fault()] Link: http://lkml.kernel.org/r/1485813172-7284-1-git-send-email-ross.zwisler@linux.intel.com [dave.jiang@intel.com: clear PMD or PUD size flags when in fall through path] Link: http://lkml.kernel.org/r/148589842696.5820.16078080610311444794.stgit@djiang5-desk3.ch.intel.com Link: http://lkml.kernel.org/r/148545058784.17912.6353162518188733642.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Jan Kara <jack@suse.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Nilesh Choudhury <nilesh.choudhury@oracle.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/dax')
-rw-r--r--drivers/dax/dax.c34
1 files changed, 13 insertions, 21 deletions
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 0261f332bf3e..922ec461dcaa 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -419,7 +419,7 @@ static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff,
419 return -1; 419 return -1;
420} 420}
421 421
422static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) 422static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
423{ 423{
424 struct device *dev = &dax_dev->dev; 424 struct device *dev = &dax_dev->dev;
425 struct dax_region *dax_region; 425 struct dax_region *dax_region;
@@ -455,23 +455,6 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
455 return VM_FAULT_NOPAGE; 455 return VM_FAULT_NOPAGE;
456} 456}
457 457
458static int dax_dev_fault(struct vm_fault *vmf)
459{
460 struct vm_area_struct *vma = vmf->vma;
461 int rc;
462 struct file *filp = vma->vm_file;
463 struct dax_dev *dax_dev = filp->private_data;
464
465 dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
466 current->comm, (vmf->flags & FAULT_FLAG_WRITE)
467 ? "write" : "read", vma->vm_start, vma->vm_end);
468 rcu_read_lock();
469 rc = __dax_dev_fault(dax_dev, vmf);
470 rcu_read_unlock();
471
472 return rc;
473}
474
475static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) 458static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
476{ 459{
477 unsigned long pmd_addr = vmf->address & PMD_MASK; 460 unsigned long pmd_addr = vmf->address & PMD_MASK;
@@ -510,7 +493,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
510 vmf->flags & FAULT_FLAG_WRITE); 493 vmf->flags & FAULT_FLAG_WRITE);
511} 494}
512 495
513static int dax_dev_pmd_fault(struct vm_fault *vmf) 496static int dax_dev_fault(struct vm_fault *vmf)
514{ 497{
515 int rc; 498 int rc;
516 struct file *filp = vmf->vma->vm_file; 499 struct file *filp = vmf->vma->vm_file;
@@ -522,7 +505,16 @@ static int dax_dev_pmd_fault(struct vm_fault *vmf)
522 vmf->vma->vm_start, vmf->vma->vm_end); 505 vmf->vma->vm_start, vmf->vma->vm_end);
523 506
524 rcu_read_lock(); 507 rcu_read_lock();
525 rc = __dax_dev_pmd_fault(dax_dev, vmf); 508 switch (vmf->flags & FAULT_FLAG_SIZE_MASK) {
509 case FAULT_FLAG_SIZE_PTE:
510 rc = __dax_dev_pte_fault(dax_dev, vmf);
511 break;
512 case FAULT_FLAG_SIZE_PMD:
513 rc = __dax_dev_pmd_fault(dax_dev, vmf);
514 break;
515 default:
516 return VM_FAULT_FALLBACK;
517 }
526 rcu_read_unlock(); 518 rcu_read_unlock();
527 519
528 return rc; 520 return rc;
@@ -530,7 +522,7 @@ static int dax_dev_pmd_fault(struct vm_fault *vmf)
530 522
531static const struct vm_operations_struct dax_dev_vm_ops = { 523static const struct vm_operations_struct dax_dev_vm_ops = {
532 .fault = dax_dev_fault, 524 .fault = dax_dev_fault,
533 .pmd_fault = dax_dev_pmd_fault, 525 .huge_fault = dax_dev_fault,
534}; 526};
535 527
536static int dax_mmap(struct file *filp, struct vm_area_struct *vma) 528static int dax_mmap(struct file *filp, struct vm_area_struct *vma)