summaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2016-05-12 12:29:19 -0400
committerRoss Zwisler <ross.zwisler@linux.intel.com>2016-05-19 17:27:49 -0400
commitbc2466e4257369d0ebee2b6265070d323343fa72 (patch)
treedc3c050e1b7bde8f0c93b1eb0764750f10331fed /mm/memory.c
parentac401cc782429cc8560ce4840b1405d603740917 (diff)
dax: Use radix tree entry lock to protect cow faults
When doing cow faults, we cannot directly fill in PTE as we do for other faults as we rely on generic code to do proper accounting of the cowed page. We also have no page to lock to protect against races with truncate as other faults have and we need the protection to extend until the moment generic code inserts cowed page into PTE thus at that point we have no protection of fs-specific i_mmap_sem. So far we relied on using i_mmap_lock for the protection however that is completely special to cow faults. To make fault locking more uniform use DAX entry lock instead. Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c38
1 files changed, 18 insertions, 20 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 93897f23cc11..f09cdb8d48fa 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -63,6 +63,7 @@
63#include <linux/dma-debug.h> 63#include <linux/dma-debug.h>
64#include <linux/debugfs.h> 64#include <linux/debugfs.h>
65#include <linux/userfaultfd_k.h> 65#include <linux/userfaultfd_k.h>
66#include <linux/dax.h>
66 67
67#include <asm/io.h> 68#include <asm/io.h>
68#include <asm/mmu_context.h> 69#include <asm/mmu_context.h>
@@ -2785,7 +2786,8 @@ oom:
2785 */ 2786 */
2786static int __do_fault(struct vm_area_struct *vma, unsigned long address, 2787static int __do_fault(struct vm_area_struct *vma, unsigned long address,
2787 pgoff_t pgoff, unsigned int flags, 2788 pgoff_t pgoff, unsigned int flags,
2788 struct page *cow_page, struct page **page) 2789 struct page *cow_page, struct page **page,
2790 void **entry)
2789{ 2791{
2790 struct vm_fault vmf; 2792 struct vm_fault vmf;
2791 int ret; 2793 int ret;
@@ -2800,8 +2802,10 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
2800 ret = vma->vm_ops->fault(vma, &vmf); 2802 ret = vma->vm_ops->fault(vma, &vmf);
2801 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 2803 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
2802 return ret; 2804 return ret;
2803 if (!vmf.page) 2805 if (ret & VM_FAULT_DAX_LOCKED) {
2804 goto out; 2806 *entry = vmf.entry;
2807 return ret;
2808 }
2805 2809
2806 if (unlikely(PageHWPoison(vmf.page))) { 2810 if (unlikely(PageHWPoison(vmf.page))) {
2807 if (ret & VM_FAULT_LOCKED) 2811 if (ret & VM_FAULT_LOCKED)
@@ -2815,7 +2819,6 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
2815 else 2819 else
2816 VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page); 2820 VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page);
2817 2821
2818 out:
2819 *page = vmf.page; 2822 *page = vmf.page;
2820 return ret; 2823 return ret;
2821} 2824}
@@ -2987,7 +2990,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2987 pte_unmap_unlock(pte, ptl); 2990 pte_unmap_unlock(pte, ptl);
2988 } 2991 }
2989 2992
2990 ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page); 2993 ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
2991 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 2994 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
2992 return ret; 2995 return ret;
2993 2996
@@ -3010,6 +3013,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3010 pgoff_t pgoff, unsigned int flags, pte_t orig_pte) 3013 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
3011{ 3014{
3012 struct page *fault_page, *new_page; 3015 struct page *fault_page, *new_page;
3016 void *fault_entry;
3013 struct mem_cgroup *memcg; 3017 struct mem_cgroup *memcg;
3014 spinlock_t *ptl; 3018 spinlock_t *ptl;
3015 pte_t *pte; 3019 pte_t *pte;
@@ -3027,26 +3031,24 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3027 return VM_FAULT_OOM; 3031 return VM_FAULT_OOM;
3028 } 3032 }
3029 3033
3030 ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page); 3034 ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page,
3035 &fault_entry);
3031 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 3036 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
3032 goto uncharge_out; 3037 goto uncharge_out;
3033 3038
3034 if (fault_page) 3039 if (!(ret & VM_FAULT_DAX_LOCKED))
3035 copy_user_highpage(new_page, fault_page, address, vma); 3040 copy_user_highpage(new_page, fault_page, address, vma);
3036 __SetPageUptodate(new_page); 3041 __SetPageUptodate(new_page);
3037 3042
3038 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 3043 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
3039 if (unlikely(!pte_same(*pte, orig_pte))) { 3044 if (unlikely(!pte_same(*pte, orig_pte))) {
3040 pte_unmap_unlock(pte, ptl); 3045 pte_unmap_unlock(pte, ptl);
3041 if (fault_page) { 3046 if (!(ret & VM_FAULT_DAX_LOCKED)) {
3042 unlock_page(fault_page); 3047 unlock_page(fault_page);
3043 put_page(fault_page); 3048 put_page(fault_page);
3044 } else { 3049 } else {
3045 /* 3050 dax_unlock_mapping_entry(vma->vm_file->f_mapping,
3046 * The fault handler has no page to lock, so it holds 3051 pgoff);
3047 * i_mmap_lock for read to protect against truncate.
3048 */
3049 i_mmap_unlock_read(vma->vm_file->f_mapping);
3050 } 3052 }
3051 goto uncharge_out; 3053 goto uncharge_out;
3052 } 3054 }
@@ -3054,15 +3056,11 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3054 mem_cgroup_commit_charge(new_page, memcg, false, false); 3056 mem_cgroup_commit_charge(new_page, memcg, false, false);
3055 lru_cache_add_active_or_unevictable(new_page, vma); 3057 lru_cache_add_active_or_unevictable(new_page, vma);
3056 pte_unmap_unlock(pte, ptl); 3058 pte_unmap_unlock(pte, ptl);
3057 if (fault_page) { 3059 if (!(ret & VM_FAULT_DAX_LOCKED)) {
3058 unlock_page(fault_page); 3060 unlock_page(fault_page);
3059 put_page(fault_page); 3061 put_page(fault_page);
3060 } else { 3062 } else {
3061 /* 3063 dax_unlock_mapping_entry(vma->vm_file->f_mapping, pgoff);
3062 * The fault handler has no page to lock, so it holds
3063 * i_mmap_lock for read to protect against truncate.
3064 */
3065 i_mmap_unlock_read(vma->vm_file->f_mapping);
3066 } 3064 }
3067 return ret; 3065 return ret;
3068uncharge_out: 3066uncharge_out:
@@ -3082,7 +3080,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3082 int dirtied = 0; 3080 int dirtied = 0;
3083 int ret, tmp; 3081 int ret, tmp;
3084 3082
3085 ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page); 3083 ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL);
3086 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 3084 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
3087 return ret; 3085 return ret;
3088 3086