diff options
author | Matthew Wilcox <matthew.r.wilcox@intel.com> | 2015-02-16 18:59:02 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-16 20:56:03 -0500 |
commit | f7ca90b160307d63aaedab8bd451c24a182db20f (patch) | |
tree | 687eb94acbc8ebfab6d5e12a57dc336ce21b7c64 /mm | |
parent | 289c6aedac981533331428bc933fff21ae332c9e (diff) |
dax,ext2: replace the XIP page fault handler with the DAX page fault handler
Instead of calling aops->get_xip_mem from the fault handler, the
filesystem passes a get_block_t that is used to find the appropriate
blocks.
This requires that all architectures implement copy_user_page(). At the
time of writing, mips and arm do not. Patches exist and are in progress.
[akpm@linux-foundation.org: remap_file_pages went away]
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Andreas Dilger <andreas.dilger@intel.com>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap_xip.c | 206 |
1 files changed, 0 insertions, 206 deletions
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 9c869f402c07..59fb387b2238 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
@@ -23,212 +23,6 @@ | |||
23 | #include <asm/io.h> | 23 | #include <asm/io.h> |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * We do use our own empty page to avoid interference with other users | ||
27 | * of ZERO_PAGE(), such as /dev/zero | ||
28 | */ | ||
29 | static DEFINE_MUTEX(xip_sparse_mutex); | ||
30 | static seqcount_t xip_sparse_seq = SEQCNT_ZERO(xip_sparse_seq); | ||
31 | static struct page *__xip_sparse_page; | ||
32 | |||
33 | /* called under xip_sparse_mutex */ | ||
34 | static struct page *xip_sparse_page(void) | ||
35 | { | ||
36 | if (!__xip_sparse_page) { | ||
37 | struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); | ||
38 | |||
39 | if (page) | ||
40 | __xip_sparse_page = page; | ||
41 | } | ||
42 | return __xip_sparse_page; | ||
43 | } | ||
44 | |||
45 | /* | ||
46 | * __xip_unmap is invoked from xip_unmap and xip_write | ||
47 | * | ||
48 | * This function walks all vmas of the address_space and unmaps the | ||
49 | * __xip_sparse_page when found at pgoff. | ||
50 | */ | ||
51 | static void __xip_unmap(struct address_space * mapping, unsigned long pgoff) | ||
52 | { | ||
53 | struct vm_area_struct *vma; | ||
54 | struct page *page; | ||
55 | unsigned count; | ||
56 | int locked = 0; | ||
57 | |||
58 | count = read_seqcount_begin(&xip_sparse_seq); | ||
59 | |||
60 | page = __xip_sparse_page; | ||
61 | if (!page) | ||
62 | return; | ||
63 | |||
64 | retry: | ||
65 | i_mmap_lock_read(mapping); | ||
66 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { | ||
67 | pte_t *pte, pteval; | ||
68 | spinlock_t *ptl; | ||
69 | struct mm_struct *mm = vma->vm_mm; | ||
70 | unsigned long address = vma->vm_start + | ||
71 | ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | ||
72 | |||
73 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); | ||
74 | pte = page_check_address(page, mm, address, &ptl, 1); | ||
75 | if (pte) { | ||
76 | /* Nuke the page table entry. */ | ||
77 | flush_cache_page(vma, address, pte_pfn(*pte)); | ||
78 | pteval = ptep_clear_flush(vma, address, pte); | ||
79 | page_remove_rmap(page); | ||
80 | dec_mm_counter(mm, MM_FILEPAGES); | ||
81 | BUG_ON(pte_dirty(pteval)); | ||
82 | pte_unmap_unlock(pte, ptl); | ||
83 | /* must invalidate_page _before_ freeing the page */ | ||
84 | mmu_notifier_invalidate_page(mm, address); | ||
85 | page_cache_release(page); | ||
86 | } | ||
87 | } | ||
88 | i_mmap_unlock_read(mapping); | ||
89 | |||
90 | if (locked) { | ||
91 | mutex_unlock(&xip_sparse_mutex); | ||
92 | } else if (read_seqcount_retry(&xip_sparse_seq, count)) { | ||
93 | mutex_lock(&xip_sparse_mutex); | ||
94 | locked = 1; | ||
95 | goto retry; | ||
96 | } | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * xip_fault() is invoked via the vma operations vector for a | ||
101 | * mapped memory region to read in file data during a page fault. | ||
102 | * | ||
103 | * This function is derived from filemap_fault, but used for execute in place | ||
104 | */ | ||
105 | static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
106 | { | ||
107 | struct file *file = vma->vm_file; | ||
108 | struct address_space *mapping = file->f_mapping; | ||
109 | struct inode *inode = mapping->host; | ||
110 | pgoff_t size; | ||
111 | void *xip_mem; | ||
112 | unsigned long xip_pfn; | ||
113 | struct page *page; | ||
114 | int error; | ||
115 | |||
116 | /* XXX: are VM_FAULT_ codes OK? */ | ||
117 | again: | ||
118 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
119 | if (vmf->pgoff >= size) | ||
120 | return VM_FAULT_SIGBUS; | ||
121 | |||
122 | error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0, | ||
123 | &xip_mem, &xip_pfn); | ||
124 | if (likely(!error)) | ||
125 | goto found; | ||
126 | if (error != -ENODATA) | ||
127 | return VM_FAULT_OOM; | ||
128 | |||
129 | /* sparse block */ | ||
130 | if ((vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) && | ||
131 | (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) && | ||
132 | (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { | ||
133 | int err; | ||
134 | |||
135 | /* maybe shared writable, allocate new block */ | ||
136 | mutex_lock(&xip_sparse_mutex); | ||
137 | error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1, | ||
138 | &xip_mem, &xip_pfn); | ||
139 | mutex_unlock(&xip_sparse_mutex); | ||
140 | if (error) | ||
141 | return VM_FAULT_SIGBUS; | ||
142 | /* unmap sparse mappings at pgoff from all other vmas */ | ||
143 | __xip_unmap(mapping, vmf->pgoff); | ||
144 | |||
145 | found: | ||
146 | /* | ||
147 | * We must recheck i_size under i_mmap_rwsem to prevent races | ||
148 | * with truncation | ||
149 | */ | ||
150 | i_mmap_lock_read(mapping); | ||
151 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | ||
152 | PAGE_CACHE_SHIFT; | ||
153 | if (unlikely(vmf->pgoff >= size)) { | ||
154 | i_mmap_unlock_read(mapping); | ||
155 | return VM_FAULT_SIGBUS; | ||
156 | } | ||
157 | err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, | ||
158 | xip_pfn); | ||
159 | i_mmap_unlock_read(mapping); | ||
160 | if (err == -ENOMEM) | ||
161 | return VM_FAULT_OOM; | ||
162 | /* | ||
163 | * err == -EBUSY is fine, we've raced against another thread | ||
164 | * that faulted-in the same page | ||
165 | */ | ||
166 | if (err != -EBUSY) | ||
167 | BUG_ON(err); | ||
168 | return VM_FAULT_NOPAGE; | ||
169 | } else { | ||
170 | int err, ret = VM_FAULT_OOM; | ||
171 | |||
172 | mutex_lock(&xip_sparse_mutex); | ||
173 | write_seqcount_begin(&xip_sparse_seq); | ||
174 | error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0, | ||
175 | &xip_mem, &xip_pfn); | ||
176 | if (unlikely(!error)) { | ||
177 | write_seqcount_end(&xip_sparse_seq); | ||
178 | mutex_unlock(&xip_sparse_mutex); | ||
179 | goto again; | ||
180 | } | ||
181 | if (error != -ENODATA) | ||
182 | goto out; | ||
183 | |||
184 | /* | ||
185 | * We must recheck i_size under i_mmap_rwsem to prevent races | ||
186 | * with truncation | ||
187 | */ | ||
188 | i_mmap_lock_read(mapping); | ||
189 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | ||
190 | PAGE_CACHE_SHIFT; | ||
191 | if (unlikely(vmf->pgoff >= size)) { | ||
192 | ret = VM_FAULT_SIGBUS; | ||
193 | goto unlock; | ||
194 | } | ||
195 | /* not shared and writable, use xip_sparse_page() */ | ||
196 | page = xip_sparse_page(); | ||
197 | if (!page) | ||
198 | goto unlock; | ||
199 | err = vm_insert_page(vma, (unsigned long)vmf->virtual_address, | ||
200 | page); | ||
201 | if (err == -ENOMEM) | ||
202 | goto unlock; | ||
203 | |||
204 | ret = VM_FAULT_NOPAGE; | ||
205 | unlock: | ||
206 | i_mmap_unlock_read(mapping); | ||
207 | out: | ||
208 | write_seqcount_end(&xip_sparse_seq); | ||
209 | mutex_unlock(&xip_sparse_mutex); | ||
210 | |||
211 | return ret; | ||
212 | } | ||
213 | } | ||
214 | |||
215 | static const struct vm_operations_struct xip_file_vm_ops = { | ||
216 | .fault = xip_file_fault, | ||
217 | .page_mkwrite = filemap_page_mkwrite, | ||
218 | }; | ||
219 | |||
220 | int xip_file_mmap(struct file * file, struct vm_area_struct * vma) | ||
221 | { | ||
222 | BUG_ON(!file->f_mapping->a_ops->get_xip_mem); | ||
223 | |||
224 | file_accessed(file); | ||
225 | vma->vm_ops = &xip_file_vm_ops; | ||
226 | vma->vm_flags |= VM_MIXEDMAP; | ||
227 | return 0; | ||
228 | } | ||
229 | EXPORT_SYMBOL_GPL(xip_file_mmap); | ||
230 | |||
231 | /* | ||
232 | * truncate a page used for execute in place | 26 | * truncate a page used for execute in place |
233 | * functionality is analog to block_truncate_page but does use get_xip_mem | 27 | * functionality is analog to block_truncate_page but does use get_xip_mem |
234 | * to get the page instead of page cache | 28 | * to get the page instead of page cache |