diff options
author | Matthew Wilcox <matthew.r.wilcox@intel.com> | 2015-02-16 18:58:50 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-16 20:56:03 -0500 |
commit | 2e4cdab0584fa884e0a81c4f45b93ce875c9fcaa (patch) | |
tree | c20a4224ed2ac8d5e9416c7fcc451cc144025626 /mm/memory.c | |
parent | 283307c7607de2a06d3bfae4cfbf5a566d457090 (diff) |
mm: allow page fault handlers to perform the COW
Currently COW of an XIP file is done by first bringing in a read-only
mapping, then retrying the fault and copying the page. It is much more
efficient to tell the fault handler that a COW is being attempted (by
passing in the pre-allocated page in the vm_fault structure), and allow
the handler to perform the COW operation itself.
The handler cannot insert the page itself if there is already a read-only
mapping at that address, so allow the handler to return VM_FAULT_LOCKED
and set the fault_page to be NULL. This indicates to the MM code that the
i_mmap_lock is held instead of the page lock.
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andreas Dilger <andreas.dilger@intel.com>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 41 |
1 files changed, 32 insertions, 9 deletions
diff --git a/mm/memory.c b/mm/memory.c index 1b04e13b9993..8068893697bb 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1965,6 +1965,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, | |||
1965 | vmf.pgoff = page->index; | 1965 | vmf.pgoff = page->index; |
1966 | vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; | 1966 | vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; |
1967 | vmf.page = page; | 1967 | vmf.page = page; |
1968 | vmf.cow_page = NULL; | ||
1968 | 1969 | ||
1969 | ret = vma->vm_ops->page_mkwrite(vma, &vmf); | 1970 | ret = vma->vm_ops->page_mkwrite(vma, &vmf); |
1970 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) | 1971 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) |
@@ -2639,7 +2640,8 @@ oom: | |||
2639 | * See filemap_fault() and __lock_page_retry(). | 2640 | * See filemap_fault() and __lock_page_retry(). |
2640 | */ | 2641 | */ |
2641 | static int __do_fault(struct vm_area_struct *vma, unsigned long address, | 2642 | static int __do_fault(struct vm_area_struct *vma, unsigned long address, |
2642 | pgoff_t pgoff, unsigned int flags, struct page **page) | 2643 | pgoff_t pgoff, unsigned int flags, |
2644 | struct page *cow_page, struct page **page) | ||
2643 | { | 2645 | { |
2644 | struct vm_fault vmf; | 2646 | struct vm_fault vmf; |
2645 | int ret; | 2647 | int ret; |
@@ -2648,10 +2650,13 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, | |||
2648 | vmf.pgoff = pgoff; | 2650 | vmf.pgoff = pgoff; |
2649 | vmf.flags = flags; | 2651 | vmf.flags = flags; |
2650 | vmf.page = NULL; | 2652 | vmf.page = NULL; |
2653 | vmf.cow_page = cow_page; | ||
2651 | 2654 | ||
2652 | ret = vma->vm_ops->fault(vma, &vmf); | 2655 | ret = vma->vm_ops->fault(vma, &vmf); |
2653 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) | 2656 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) |
2654 | return ret; | 2657 | return ret; |
2658 | if (!vmf.page) | ||
2659 | goto out; | ||
2655 | 2660 | ||
2656 | if (unlikely(PageHWPoison(vmf.page))) { | 2661 | if (unlikely(PageHWPoison(vmf.page))) { |
2657 | if (ret & VM_FAULT_LOCKED) | 2662 | if (ret & VM_FAULT_LOCKED) |
@@ -2665,6 +2670,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, | |||
2665 | else | 2670 | else |
2666 | VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page); | 2671 | VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page); |
2667 | 2672 | ||
2673 | out: | ||
2668 | *page = vmf.page; | 2674 | *page = vmf.page; |
2669 | return ret; | 2675 | return ret; |
2670 | } | 2676 | } |
@@ -2835,7 +2841,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2835 | pte_unmap_unlock(pte, ptl); | 2841 | pte_unmap_unlock(pte, ptl); |
2836 | } | 2842 | } |
2837 | 2843 | ||
2838 | ret = __do_fault(vma, address, pgoff, flags, &fault_page); | 2844 | ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page); |
2839 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) | 2845 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) |
2840 | return ret; | 2846 | return ret; |
2841 | 2847 | ||
@@ -2875,26 +2881,43 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2875 | return VM_FAULT_OOM; | 2881 | return VM_FAULT_OOM; |
2876 | } | 2882 | } |
2877 | 2883 | ||
2878 | ret = __do_fault(vma, address, pgoff, flags, &fault_page); | 2884 | ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page); |
2879 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) | 2885 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) |
2880 | goto uncharge_out; | 2886 | goto uncharge_out; |
2881 | 2887 | ||
2882 | copy_user_highpage(new_page, fault_page, address, vma); | 2888 | if (fault_page) |
2889 | copy_user_highpage(new_page, fault_page, address, vma); | ||
2883 | __SetPageUptodate(new_page); | 2890 | __SetPageUptodate(new_page); |
2884 | 2891 | ||
2885 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | 2892 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); |
2886 | if (unlikely(!pte_same(*pte, orig_pte))) { | 2893 | if (unlikely(!pte_same(*pte, orig_pte))) { |
2887 | pte_unmap_unlock(pte, ptl); | 2894 | pte_unmap_unlock(pte, ptl); |
2888 | unlock_page(fault_page); | 2895 | if (fault_page) { |
2889 | page_cache_release(fault_page); | 2896 | unlock_page(fault_page); |
2897 | page_cache_release(fault_page); | ||
2898 | } else { | ||
2899 | /* | ||
2900 | * The fault handler has no page to lock, so it holds | ||
2901 | * i_mmap_lock for read to protect against truncate. | ||
2902 | */ | ||
2903 | i_mmap_unlock_read(vma->vm_file->f_mapping); | ||
2904 | } | ||
2890 | goto uncharge_out; | 2905 | goto uncharge_out; |
2891 | } | 2906 | } |
2892 | do_set_pte(vma, address, new_page, pte, true, true); | 2907 | do_set_pte(vma, address, new_page, pte, true, true); |
2893 | mem_cgroup_commit_charge(new_page, memcg, false); | 2908 | mem_cgroup_commit_charge(new_page, memcg, false); |
2894 | lru_cache_add_active_or_unevictable(new_page, vma); | 2909 | lru_cache_add_active_or_unevictable(new_page, vma); |
2895 | pte_unmap_unlock(pte, ptl); | 2910 | pte_unmap_unlock(pte, ptl); |
2896 | unlock_page(fault_page); | 2911 | if (fault_page) { |
2897 | page_cache_release(fault_page); | 2912 | unlock_page(fault_page); |
2913 | page_cache_release(fault_page); | ||
2914 | } else { | ||
2915 | /* | ||
2916 | * The fault handler has no page to lock, so it holds | ||
2917 | * i_mmap_lock for read to protect against truncate. | ||
2918 | */ | ||
2919 | i_mmap_unlock_read(vma->vm_file->f_mapping); | ||
2920 | } | ||
2898 | return ret; | 2921 | return ret; |
2899 | uncharge_out: | 2922 | uncharge_out: |
2900 | mem_cgroup_cancel_charge(new_page, memcg); | 2923 | mem_cgroup_cancel_charge(new_page, memcg); |
@@ -2913,7 +2936,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2913 | int dirtied = 0; | 2936 | int dirtied = 0; |
2914 | int ret, tmp; | 2937 | int ret, tmp; |
2915 | 2938 | ||
2916 | ret = __do_fault(vma, address, pgoff, flags, &fault_page); | 2939 | ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page); |
2917 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) | 2940 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) |
2918 | return ret; | 2941 | return ret; |
2919 | 2942 | ||