aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2008-08-20 17:09:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-08-20 18:40:32 -0400
commit538f8ea6c85232d00bfa5edd9ba85f16c01057c9 (patch)
treec1a4a49c0c7f16b4fe8ec560137084bf556efc50
parent479db0bf408e65baa14d2a9821abfcbc0804b847 (diff)
mm: xip fix fault vs sparse page invalidate race
XIP has a race between sparse pages being inserted into page tables, and sparse pages being zapped when its time to put a non-sparse page in. What can happen is that a process can be left with a dangling sparse page in a MAP_SHARED mapping, while the rest of the world sees the non-sparse version. Ie. data corruption. Guard these operations with a seqlock, making fault-in-sparse-pages the slowpath, and try-to-unmap-sparse-pages the fastpath. Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Jared Hulbert <jaredeh@gmail.com> Acked-by: Carsten Otte <cotte@freenet.de> Cc: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/filemap_xip.c60
1 files changed, 46 insertions, 14 deletions
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 8b710ca13247..5b9ec47ea25a 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -15,6 +15,8 @@
15#include <linux/rmap.h> 15#include <linux/rmap.h>
16#include <linux/mmu_notifier.h> 16#include <linux/mmu_notifier.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/seqlock.h>
19#include <linux/mutex.h>
18#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
19#include <asm/io.h> 21#include <asm/io.h>
20 22
@@ -22,22 +24,18 @@
22 * We do use our own empty page to avoid interference with other users 24 * We do use our own empty page to avoid interference with other users
23 * of ZERO_PAGE(), such as /dev/zero 25 * of ZERO_PAGE(), such as /dev/zero
24 */ 26 */
27static DEFINE_MUTEX(xip_sparse_mutex);
28static seqcount_t xip_sparse_seq = SEQCNT_ZERO;
25static struct page *__xip_sparse_page; 29static struct page *__xip_sparse_page;
26 30
31/* called under xip_sparse_mutex */
27static struct page *xip_sparse_page(void) 32static struct page *xip_sparse_page(void)
28{ 33{
29 if (!__xip_sparse_page) { 34 if (!__xip_sparse_page) {
30 struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); 35 struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
31 36
32 if (page) { 37 if (page)
33 static DEFINE_SPINLOCK(xip_alloc_lock); 38 __xip_sparse_page = page;
34 spin_lock(&xip_alloc_lock);
35 if (!__xip_sparse_page)
36 __xip_sparse_page = page;
37 else
38 __free_page(page);
39 spin_unlock(&xip_alloc_lock);
40 }
41 } 39 }
42 return __xip_sparse_page; 40 return __xip_sparse_page;
43} 41}
@@ -174,11 +172,16 @@ __xip_unmap (struct address_space * mapping,
174 pte_t pteval; 172 pte_t pteval;
175 spinlock_t *ptl; 173 spinlock_t *ptl;
176 struct page *page; 174 struct page *page;
175 unsigned count;
176 int locked = 0;
177
178 count = read_seqcount_begin(&xip_sparse_seq);
177 179
178 page = __xip_sparse_page; 180 page = __xip_sparse_page;
179 if (!page) 181 if (!page)
180 return; 182 return;
181 183
184retry:
182 spin_lock(&mapping->i_mmap_lock); 185 spin_lock(&mapping->i_mmap_lock);
183 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 186 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
184 mm = vma->vm_mm; 187 mm = vma->vm_mm;
@@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapping,
198 } 201 }
199 } 202 }
200 spin_unlock(&mapping->i_mmap_lock); 203 spin_unlock(&mapping->i_mmap_lock);
204
205 if (locked) {
206 mutex_unlock(&xip_sparse_mutex);
207 } else if (read_seqcount_retry(&xip_sparse_seq, count)) {
208 mutex_lock(&xip_sparse_mutex);
209 locked = 1;
210 goto retry;
211 }
201} 212}
202 213
203/* 214/*
@@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
218 int error; 229 int error;
219 230
220 /* XXX: are VM_FAULT_ codes OK? */ 231 /* XXX: are VM_FAULT_ codes OK? */
221 232again:
222 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 233 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
223 if (vmf->pgoff >= size) 234 if (vmf->pgoff >= size)
224 return VM_FAULT_SIGBUS; 235 return VM_FAULT_SIGBUS;
@@ -245,6 +256,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
245 __xip_unmap(mapping, vmf->pgoff); 256 __xip_unmap(mapping, vmf->pgoff);
246 257
247found: 258found:
259 printk("%s insert %lx@%lx\n", current->comm, (unsigned long)vmf->virtual_address, xip_pfn);
248 err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, 260 err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
249 xip_pfn); 261 xip_pfn);
250 if (err == -ENOMEM) 262 if (err == -ENOMEM)
@@ -252,14 +264,34 @@ found:
252 BUG_ON(err); 264 BUG_ON(err);
253 return VM_FAULT_NOPAGE; 265 return VM_FAULT_NOPAGE;
254 } else { 266 } else {
267 int err, ret = VM_FAULT_OOM;
268
269 mutex_lock(&xip_sparse_mutex);
270 write_seqcount_begin(&xip_sparse_seq);
271 error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
272 &xip_mem, &xip_pfn);
273 if (unlikely(!error)) {
274 write_seqcount_end(&xip_sparse_seq);
275 mutex_unlock(&xip_sparse_mutex);
276 goto again;
277 }
278 if (error != -ENODATA)
279 goto out;
255 /* not shared and writable, use xip_sparse_page() */ 280 /* not shared and writable, use xip_sparse_page() */
256 page = xip_sparse_page(); 281 page = xip_sparse_page();
257 if (!page) 282 if (!page)
258 return VM_FAULT_OOM; 283 goto out;
284 err = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
285 page);
286 if (err == -ENOMEM)
287 goto out;
259 288
260 page_cache_get(page); 289 ret = VM_FAULT_NOPAGE;
261 vmf->page = page; 290out:
262 return 0; 291 write_seqcount_end(&xip_sparse_seq);
292 mutex_unlock(&xip_sparse_mutex);
293
294 return ret;
263 } 295 }
264} 296}
265 297