aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2016-01-15 19:56:55 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-15 20:56:32 -0500
commit3565fce3a6597e91b8dee3e8e36ebf70f8b7ef9b (patch)
tree54f05861c87cb2c2710552b61a46cb5831b06296 /mm
parent5c7fb56e5e3f7035dd798a8e1adee639f87043e5 (diff)
mm, x86: get_user_pages() for dax mappings
A dax mapping establishes a pte with _PAGE_DEVMAP set when the driver has established a devm_memremap_pages() mapping, i.e. when the pfn_t return from ->direct_access() has PFN_DEV and PFN_MAP set. Later, when encountering _PAGE_DEVMAP during a page table walk we lookup and pin a struct dev_pagemap instance to keep the result of pfn_to_page() valid until put_page(). Signed-off-by: Dan Williams <dan.j.williams@intel.com> Tested-by: Logan Gunthorpe <logang@deltatee.com> Cc: Dave Hansen <dave@sr71.net> Cc: Mel Gorman <mgorman@suse.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c30
-rw-r--r--mm/huge_memory.c75
-rw-r--r--mm/swap.c1
3 files changed, 89 insertions, 17 deletions
diff --git a/mm/gup.c b/mm/gup.c
index e95b0cb6ed81..aa21c4b865a5 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -4,6 +4,7 @@
4#include <linux/spinlock.h> 4#include <linux/spinlock.h>
5 5
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/memremap.h>
7#include <linux/pagemap.h> 8#include <linux/pagemap.h>
8#include <linux/rmap.h> 9#include <linux/rmap.h>
9#include <linux/swap.h> 10#include <linux/swap.h>
@@ -62,6 +63,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
62 unsigned long address, pmd_t *pmd, unsigned int flags) 63 unsigned long address, pmd_t *pmd, unsigned int flags)
63{ 64{
64 struct mm_struct *mm = vma->vm_mm; 65 struct mm_struct *mm = vma->vm_mm;
66 struct dev_pagemap *pgmap = NULL;
65 struct page *page; 67 struct page *page;
66 spinlock_t *ptl; 68 spinlock_t *ptl;
67 pte_t *ptep, pte; 69 pte_t *ptep, pte;
@@ -98,7 +100,17 @@ retry:
98 } 100 }
99 101
100 page = vm_normal_page(vma, address, pte); 102 page = vm_normal_page(vma, address, pte);
101 if (unlikely(!page)) { 103 if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
104 /*
105 * Only return device mapping pages in the FOLL_GET case since
106 * they are only valid while holding the pgmap reference.
107 */
108 pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
109 if (pgmap)
110 page = pte_page(pte);
111 else
112 goto no_page;
113 } else if (unlikely(!page)) {
102 if (flags & FOLL_DUMP) { 114 if (flags & FOLL_DUMP) {
103 /* Avoid special (like zero) pages in core dumps */ 115 /* Avoid special (like zero) pages in core dumps */
104 page = ERR_PTR(-EFAULT); 116 page = ERR_PTR(-EFAULT);
@@ -129,8 +141,15 @@ retry:
129 goto retry; 141 goto retry;
130 } 142 }
131 143
132 if (flags & FOLL_GET) 144 if (flags & FOLL_GET) {
133 get_page(page); 145 get_page(page);
146
147 /* drop the pgmap reference now that we hold the page */
148 if (pgmap) {
149 put_dev_pagemap(pgmap);
150 pgmap = NULL;
151 }
152 }
134 if (flags & FOLL_TOUCH) { 153 if (flags & FOLL_TOUCH) {
135 if ((flags & FOLL_WRITE) && 154 if ((flags & FOLL_WRITE) &&
136 !pte_dirty(pte) && !PageDirty(page)) 155 !pte_dirty(pte) && !PageDirty(page))
@@ -237,6 +256,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
237 } 256 }
238 if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) 257 if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
239 return no_page_table(vma, flags); 258 return no_page_table(vma, flags);
259 if (pmd_devmap(*pmd)) {
260 ptl = pmd_lock(mm, pmd);
261 page = follow_devmap_pmd(vma, address, pmd, flags);
262 spin_unlock(ptl);
263 if (page)
264 return page;
265 }
240 if (likely(!pmd_trans_huge(*pmd))) 266 if (likely(!pmd_trans_huge(*pmd)))
241 return follow_page_pte(vma, address, pmd, flags); 267 return follow_page_pte(vma, address, pmd, flags);
242 268
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 82bed2bec3ed..b2db98136af9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -23,6 +23,7 @@
23#include <linux/freezer.h> 23#include <linux/freezer.h>
24#include <linux/pfn_t.h> 24#include <linux/pfn_t.h>
25#include <linux/mman.h> 25#include <linux/mman.h>
26#include <linux/memremap.h>
26#include <linux/pagemap.h> 27#include <linux/pagemap.h>
27#include <linux/debugfs.h> 28#include <linux/debugfs.h>
28#include <linux/migrate.h> 29#include <linux/migrate.h>
@@ -974,6 +975,63 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
974 return VM_FAULT_NOPAGE; 975 return VM_FAULT_NOPAGE;
975} 976}
976 977
978static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
979 pmd_t *pmd)
980{
981 pmd_t _pmd;
982
983 /*
984 * We should set the dirty bit only for FOLL_WRITE but for now
985 * the dirty bit in the pmd is meaningless. And if the dirty
986 * bit will become meaningful and we'll only set it with
987 * FOLL_WRITE, an atomic set_bit will be required on the pmd to
988 * set the young bit, instead of the current set_pmd_at.
989 */
990 _pmd = pmd_mkyoung(pmd_mkdirty(*pmd));
991 if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
992 pmd, _pmd, 1))
993 update_mmu_cache_pmd(vma, addr, pmd);
994}
995
996struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
997 pmd_t *pmd, int flags)
998{
999 unsigned long pfn = pmd_pfn(*pmd);
1000 struct mm_struct *mm = vma->vm_mm;
1001 struct dev_pagemap *pgmap;
1002 struct page *page;
1003
1004 assert_spin_locked(pmd_lockptr(mm, pmd));
1005
1006 if (flags & FOLL_WRITE && !pmd_write(*pmd))
1007 return NULL;
1008
1009 if (pmd_present(*pmd) && pmd_devmap(*pmd))
1010 /* pass */;
1011 else
1012 return NULL;
1013
1014 if (flags & FOLL_TOUCH)
1015 touch_pmd(vma, addr, pmd);
1016
1017 /*
1018 * device mapped pages can only be returned if the
1019 * caller will manage the page reference count.
1020 */
1021 if (!(flags & FOLL_GET))
1022 return ERR_PTR(-EEXIST);
1023
1024 pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
1025 pgmap = get_dev_pagemap(pfn, NULL);
1026 if (!pgmap)
1027 return ERR_PTR(-EFAULT);
1028 page = pfn_to_page(pfn);
1029 get_page(page);
1030 put_dev_pagemap(pgmap);
1031
1032 return page;
1033}
1034
977int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, 1035int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
978 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, 1036 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
979 struct vm_area_struct *vma) 1037 struct vm_area_struct *vma)
@@ -1331,21 +1389,8 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
1331 1389
1332 page = pmd_page(*pmd); 1390 page = pmd_page(*pmd);
1333 VM_BUG_ON_PAGE(!PageHead(page), page); 1391 VM_BUG_ON_PAGE(!PageHead(page), page);
1334 if (flags & FOLL_TOUCH) { 1392 if (flags & FOLL_TOUCH)
1335 pmd_t _pmd; 1393 touch_pmd(vma, addr, pmd);
1336 /*
1337 * We should set the dirty bit only for FOLL_WRITE but
1338 * for now the dirty bit in the pmd is meaningless.
1339 * And if the dirty bit will become meaningful and
1340 * we'll only set it with FOLL_WRITE, an atomic
1341 * set_bit will be required on the pmd to set the
1342 * young bit, instead of the current set_pmd_at.
1343 */
1344 _pmd = pmd_mkyoung(pmd_mkdirty(*pmd));
1345 if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
1346 pmd, _pmd, 1))
1347 update_mmu_cache_pmd(vma, addr, pmd);
1348 }
1349 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { 1394 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
1350 /* 1395 /*
1351 * We don't mlock() pte-mapped THPs. This way we can avoid 1396 * We don't mlock() pte-mapped THPs. This way we can avoid
diff --git a/mm/swap.c b/mm/swap.c
index 674e2c93da4e..09fe5e97714a 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -24,6 +24,7 @@
24#include <linux/export.h> 24#include <linux/export.h>
25#include <linux/mm_inline.h> 25#include <linux/mm_inline.h>
26#include <linux/percpu_counter.h> 26#include <linux/percpu_counter.h>
27#include <linux/memremap.h>
27#include <linux/percpu.h> 28#include <linux/percpu.h>
28#include <linux/cpu.h> 29#include <linux/cpu.h>
29#include <linux/notifier.h> 30#include <linux/notifier.h>