aboutsummaryrefslogtreecommitdiffstats
path: root/mm/fremap.c
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2007-07-19 04:46:59 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-19 13:04:41 -0400
commit54cb8821de07f2ffcd28c380ce9b93d5784b40d7 (patch)
tree1de676534963d96af42863b20191bc9f80060dea /mm/fremap.c
parentd00806b183152af6d24f46f0c33f14162ca1262a (diff)
mm: merge populate and nopage into fault (fixes nonlinear)
Nonlinear mappings are (AFAIKS) simply a virtual memory concept that encodes the virtual address -> file offset differently from linear mappings. ->populate is a layering violation because the filesystem/pagecache code should need to know anything about the virtual memory mapping. The hitch here is that the ->nopage handler didn't pass down enough information (ie. pgoff). But it is more logical to pass pgoff rather than have the ->nopage function calculate it itself anyway (because that's a similar layering violation). Having the populate handler install the pte itself is likewise a nasty thing to be doing. This patch introduces a new fault handler that replaces ->nopage and ->populate and (later) ->nopfn. Most of the old mechanism is still in place so there is a lot of duplication and nice cleanups that can be removed if everyone switches over. The rationale for doing this in the first place is that nonlinear mappings are subject to the pagefault vs invalidate/truncate race too, and it seemed stupid to duplicate the synchronisation logic rather than just consolidate the two. After this patch, MAP_NONBLOCK no longer sets up ptes for pages present in pagecache. Seems like a fringe functionality anyway. NOPAGE_REFAULT is removed. This should be implemented with ->fault, and no users have hit mainline yet. [akpm@linux-foundation.org: cleanup] [randy.dunlap@oracle.com: doc. fixes for readahead] [akpm@linux-foundation.org: build fix] Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com> Cc: Mark Fasheh <mark.fasheh@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/fremap.c')
-rw-r--r--mm/fremap.c103
1 files changed, 72 insertions, 31 deletions
diff --git a/mm/fremap.c b/mm/fremap.c
index 4e3f53dd5fd..01e51f01b84 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -126,6 +126,25 @@ out:
126 return err; 126 return err;
127} 127}
128 128
129static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma,
130 unsigned long addr, unsigned long size, pgoff_t pgoff)
131{
132 int err;
133
134 do {
135 err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
136 if (err)
137 return err;
138
139 size -= PAGE_SIZE;
140 addr += PAGE_SIZE;
141 pgoff++;
142 } while (size);
143
144 return 0;
145
146}
147
129/*** 148/***
130 * sys_remap_file_pages - remap arbitrary pages of a shared backing store 149 * sys_remap_file_pages - remap arbitrary pages of a shared backing store
131 * file within an existing vma. 150 * file within an existing vma.
@@ -183,41 +202,63 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
183 * the single existing vma. vm_private_data is used as a 202 * the single existing vma. vm_private_data is used as a
184 * swapout cursor in a VM_NONLINEAR vma. 203 * swapout cursor in a VM_NONLINEAR vma.
185 */ 204 */
186 if (vma && (vma->vm_flags & VM_SHARED) && 205 if (!vma || !(vma->vm_flags & VM_SHARED))
187 (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) && 206 goto out;
188 vma->vm_ops && vma->vm_ops->populate && 207
189 end > start && start >= vma->vm_start && 208 if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR))
190 end <= vma->vm_end) { 209 goto out;
191 210
192 /* Must set VM_NONLINEAR before any pages are populated. */ 211 if ((!vma->vm_ops || !vma->vm_ops->populate) &&
193 if (pgoff != linear_page_index(vma, start) && 212 !(vma->vm_flags & VM_CAN_NONLINEAR))
194 !(vma->vm_flags & VM_NONLINEAR)) { 213 goto out;
195 if (!has_write_lock) { 214
196 up_read(&mm->mmap_sem); 215 if (end <= start || start < vma->vm_start || end > vma->vm_end)
197 down_write(&mm->mmap_sem); 216 goto out;
198 has_write_lock = 1; 217
199 goto retry; 218 /* Must set VM_NONLINEAR before any pages are populated. */
219 if (!(vma->vm_flags & VM_NONLINEAR)) {
220 /* Don't need a nonlinear mapping, exit success */
221 if (pgoff == linear_page_index(vma, start)) {
222 err = 0;
223 goto out;
224 }
225
226 if (!has_write_lock) {
227 up_read(&mm->mmap_sem);
228 down_write(&mm->mmap_sem);
229 has_write_lock = 1;
230 goto retry;
231 }
232 mapping = vma->vm_file->f_mapping;
233 spin_lock(&mapping->i_mmap_lock);
234 flush_dcache_mmap_lock(mapping);
235 vma->vm_flags |= VM_NONLINEAR;
236 vma_prio_tree_remove(vma, &mapping->i_mmap);
237 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
238 flush_dcache_mmap_unlock(mapping);
239 spin_unlock(&mapping->i_mmap_lock);
240 }
241
242 if (vma->vm_flags & VM_CAN_NONLINEAR) {
243 err = populate_range(mm, vma, start, size, pgoff);
244 if (!err && !(flags & MAP_NONBLOCK)) {
245 if (unlikely(has_write_lock)) {
246 downgrade_write(&mm->mmap_sem);
247 has_write_lock = 0;
200 } 248 }
201 mapping = vma->vm_file->f_mapping; 249 make_pages_present(start, start+size);
202 spin_lock(&mapping->i_mmap_lock);
203 flush_dcache_mmap_lock(mapping);
204 vma->vm_flags |= VM_NONLINEAR;
205 vma_prio_tree_remove(vma, &mapping->i_mmap);
206 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
207 flush_dcache_mmap_unlock(mapping);
208 spin_unlock(&mapping->i_mmap_lock);
209 } 250 }
251 } else
252 err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot,
253 pgoff, flags & MAP_NONBLOCK);
210 254
211 err = vma->vm_ops->populate(vma, start, size, 255 /*
212 vma->vm_page_prot, 256 * We can't clear VM_NONLINEAR because we'd have to do
213 pgoff, flags & MAP_NONBLOCK); 257 * it after ->populate completes, and that would prevent
258 * downgrading the lock. (Locks can't be upgraded).
259 */
214 260
215 /* 261out:
216 * We can't clear VM_NONLINEAR because we'd have to do
217 * it after ->populate completes, and that would prevent
218 * downgrading the lock. (Locks can't be upgraded).
219 */
220 }
221 if (likely(!has_write_lock)) 262 if (likely(!has_write_lock))
222 up_read(&mm->mmap_sem); 263 up_read(&mm->mmap_sem);
223 else 264 else