diff options
author | Nick Piggin <npiggin@suse.de> | 2008-08-20 17:09:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-08-20 18:40:32 -0400 |
commit | 538f8ea6c85232d00bfa5edd9ba85f16c01057c9 (patch) | |
tree | c1a4a49c0c7f16b4fe8ec560137084bf556efc50 | |
parent | 479db0bf408e65baa14d2a9821abfcbc0804b847 (diff) |
mm: xip fix fault vs sparse page invalidate race
XIP has a race between sparse pages being inserted into page tables, and
sparse pages being zapped when its time to put a non-sparse page in.
What can happen is that a process can be left with a dangling sparse page
in a MAP_SHARED mapping, while the rest of the world sees the non-sparse
version. Ie. data corruption.
Guard these operations with a seqlock, making fault-in-sparse-pages the
slowpath, and try-to-unmap-sparse-pages the fastpath.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jared Hulbert <jaredeh@gmail.com>
Acked-by: Carsten Otte <cotte@freenet.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/filemap_xip.c | 60 |
1 files changed, 46 insertions, 14 deletions
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 8b710ca13247..5b9ec47ea25a 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <linux/rmap.h> | 15 | #include <linux/rmap.h> |
16 | #include <linux/mmu_notifier.h> | 16 | #include <linux/mmu_notifier.h> |
17 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
18 | #include <linux/seqlock.h> | ||
19 | #include <linux/mutex.h> | ||
18 | #include <asm/tlbflush.h> | 20 | #include <asm/tlbflush.h> |
19 | #include <asm/io.h> | 21 | #include <asm/io.h> |
20 | 22 | ||
@@ -22,22 +24,18 @@ | |||
22 | * We do use our own empty page to avoid interference with other users | 24 | * We do use our own empty page to avoid interference with other users |
23 | * of ZERO_PAGE(), such as /dev/zero | 25 | * of ZERO_PAGE(), such as /dev/zero |
24 | */ | 26 | */ |
27 | static DEFINE_MUTEX(xip_sparse_mutex); | ||
28 | static seqcount_t xip_sparse_seq = SEQCNT_ZERO; | ||
25 | static struct page *__xip_sparse_page; | 29 | static struct page *__xip_sparse_page; |
26 | 30 | ||
31 | /* called under xip_sparse_mutex */ | ||
27 | static struct page *xip_sparse_page(void) | 32 | static struct page *xip_sparse_page(void) |
28 | { | 33 | { |
29 | if (!__xip_sparse_page) { | 34 | if (!__xip_sparse_page) { |
30 | struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); | 35 | struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); |
31 | 36 | ||
32 | if (page) { | 37 | if (page) |
33 | static DEFINE_SPINLOCK(xip_alloc_lock); | 38 | __xip_sparse_page = page; |
34 | spin_lock(&xip_alloc_lock); | ||
35 | if (!__xip_sparse_page) | ||
36 | __xip_sparse_page = page; | ||
37 | else | ||
38 | __free_page(page); | ||
39 | spin_unlock(&xip_alloc_lock); | ||
40 | } | ||
41 | } | 39 | } |
42 | return __xip_sparse_page; | 40 | return __xip_sparse_page; |
43 | } | 41 | } |
@@ -174,11 +172,16 @@ __xip_unmap (struct address_space * mapping, | |||
174 | pte_t pteval; | 172 | pte_t pteval; |
175 | spinlock_t *ptl; | 173 | spinlock_t *ptl; |
176 | struct page *page; | 174 | struct page *page; |
175 | unsigned count; | ||
176 | int locked = 0; | ||
177 | |||
178 | count = read_seqcount_begin(&xip_sparse_seq); | ||
177 | 179 | ||
178 | page = __xip_sparse_page; | 180 | page = __xip_sparse_page; |
179 | if (!page) | 181 | if (!page) |
180 | return; | 182 | return; |
181 | 183 | ||
184 | retry: | ||
182 | spin_lock(&mapping->i_mmap_lock); | 185 | spin_lock(&mapping->i_mmap_lock); |
183 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 186 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
184 | mm = vma->vm_mm; | 187 | mm = vma->vm_mm; |
@@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapping, | |||
198 | } | 201 | } |
199 | } | 202 | } |
200 | spin_unlock(&mapping->i_mmap_lock); | 203 | spin_unlock(&mapping->i_mmap_lock); |
204 | |||
205 | if (locked) { | ||
206 | mutex_unlock(&xip_sparse_mutex); | ||
207 | } else if (read_seqcount_retry(&xip_sparse_seq, count)) { | ||
208 | mutex_lock(&xip_sparse_mutex); | ||
209 | locked = 1; | ||
210 | goto retry; | ||
211 | } | ||
201 | } | 212 | } |
202 | 213 | ||
203 | /* | 214 | /* |
@@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
218 | int error; | 229 | int error; |
219 | 230 | ||
220 | /* XXX: are VM_FAULT_ codes OK? */ | 231 | /* XXX: are VM_FAULT_ codes OK? */ |
221 | 232 | again: | |
222 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 233 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
223 | if (vmf->pgoff >= size) | 234 | if (vmf->pgoff >= size) |
224 | return VM_FAULT_SIGBUS; | 235 | return VM_FAULT_SIGBUS; |
@@ -245,6 +256,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
245 | __xip_unmap(mapping, vmf->pgoff); | 256 | __xip_unmap(mapping, vmf->pgoff); |
246 | 257 | ||
247 | found: | 258 | found: |
259 | printk("%s insert %lx@%lx\n", current->comm, (unsigned long)vmf->virtual_address, xip_pfn); | ||
248 | err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, | 260 | err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, |
249 | xip_pfn); | 261 | xip_pfn); |
250 | if (err == -ENOMEM) | 262 | if (err == -ENOMEM) |
@@ -252,14 +264,34 @@ found: | |||
252 | BUG_ON(err); | 264 | BUG_ON(err); |
253 | return VM_FAULT_NOPAGE; | 265 | return VM_FAULT_NOPAGE; |
254 | } else { | 266 | } else { |
267 | int err, ret = VM_FAULT_OOM; | ||
268 | |||
269 | mutex_lock(&xip_sparse_mutex); | ||
270 | write_seqcount_begin(&xip_sparse_seq); | ||
271 | error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0, | ||
272 | &xip_mem, &xip_pfn); | ||
273 | if (unlikely(!error)) { | ||
274 | write_seqcount_end(&xip_sparse_seq); | ||
275 | mutex_unlock(&xip_sparse_mutex); | ||
276 | goto again; | ||
277 | } | ||
278 | if (error != -ENODATA) | ||
279 | goto out; | ||
255 | /* not shared and writable, use xip_sparse_page() */ | 280 | /* not shared and writable, use xip_sparse_page() */ |
256 | page = xip_sparse_page(); | 281 | page = xip_sparse_page(); |
257 | if (!page) | 282 | if (!page) |
258 | return VM_FAULT_OOM; | 283 | goto out; |
284 | err = vm_insert_page(vma, (unsigned long)vmf->virtual_address, | ||
285 | page); | ||
286 | if (err == -ENOMEM) | ||
287 | goto out; | ||
259 | 288 | ||
260 | page_cache_get(page); | 289 | ret = VM_FAULT_NOPAGE; |
261 | vmf->page = page; | 290 | out: |
262 | return 0; | 291 | write_seqcount_end(&xip_sparse_seq); |
292 | mutex_unlock(&xip_sparse_mutex); | ||
293 | |||
294 | return ret; | ||
263 | } | 295 | } |
264 | } | 296 | } |
265 | 297 | ||