diff options
| author | Nick Piggin <npiggin@suse.de> | 2008-08-20 17:09:20 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-08-20 18:40:32 -0400 |
| commit | 538f8ea6c85232d00bfa5edd9ba85f16c01057c9 (patch) | |
| tree | c1a4a49c0c7f16b4fe8ec560137084bf556efc50 | |
| parent | 479db0bf408e65baa14d2a9821abfcbc0804b847 (diff) | |
mm: xip fix fault vs sparse page invalidate race
XIP has a race between sparse pages being inserted into page tables, and
sparse pages being zapped when its time to put a non-sparse page in.
What can happen is that a process can be left with a dangling sparse page
in a MAP_SHARED mapping, while the rest of the world sees the non-sparse
version. Ie. data corruption.
Guard these operations with a seqlock, making fault-in-sparse-pages the
slowpath, and try-to-unmap-sparse-pages the fastpath.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jared Hulbert <jaredeh@gmail.com>
Acked-by: Carsten Otte <cotte@freenet.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | mm/filemap_xip.c | 60 |
1 files changed, 46 insertions, 14 deletions
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 8b710ca13247..5b9ec47ea25a 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
| @@ -15,6 +15,8 @@ | |||
| 15 | #include <linux/rmap.h> | 15 | #include <linux/rmap.h> |
| 16 | #include <linux/mmu_notifier.h> | 16 | #include <linux/mmu_notifier.h> |
| 17 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
| 18 | #include <linux/seqlock.h> | ||
| 19 | #include <linux/mutex.h> | ||
| 18 | #include <asm/tlbflush.h> | 20 | #include <asm/tlbflush.h> |
| 19 | #include <asm/io.h> | 21 | #include <asm/io.h> |
| 20 | 22 | ||
| @@ -22,22 +24,18 @@ | |||
| 22 | * We do use our own empty page to avoid interference with other users | 24 | * We do use our own empty page to avoid interference with other users |
| 23 | * of ZERO_PAGE(), such as /dev/zero | 25 | * of ZERO_PAGE(), such as /dev/zero |
| 24 | */ | 26 | */ |
| 27 | static DEFINE_MUTEX(xip_sparse_mutex); | ||
| 28 | static seqcount_t xip_sparse_seq = SEQCNT_ZERO; | ||
| 25 | static struct page *__xip_sparse_page; | 29 | static struct page *__xip_sparse_page; |
| 26 | 30 | ||
| 31 | /* called under xip_sparse_mutex */ | ||
| 27 | static struct page *xip_sparse_page(void) | 32 | static struct page *xip_sparse_page(void) |
| 28 | { | 33 | { |
| 29 | if (!__xip_sparse_page) { | 34 | if (!__xip_sparse_page) { |
| 30 | struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); | 35 | struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); |
| 31 | 36 | ||
| 32 | if (page) { | 37 | if (page) |
| 33 | static DEFINE_SPINLOCK(xip_alloc_lock); | 38 | __xip_sparse_page = page; |
| 34 | spin_lock(&xip_alloc_lock); | ||
| 35 | if (!__xip_sparse_page) | ||
| 36 | __xip_sparse_page = page; | ||
| 37 | else | ||
| 38 | __free_page(page); | ||
| 39 | spin_unlock(&xip_alloc_lock); | ||
| 40 | } | ||
| 41 | } | 39 | } |
| 42 | return __xip_sparse_page; | 40 | return __xip_sparse_page; |
| 43 | } | 41 | } |
| @@ -174,11 +172,16 @@ __xip_unmap (struct address_space * mapping, | |||
| 174 | pte_t pteval; | 172 | pte_t pteval; |
| 175 | spinlock_t *ptl; | 173 | spinlock_t *ptl; |
| 176 | struct page *page; | 174 | struct page *page; |
| 175 | unsigned count; | ||
| 176 | int locked = 0; | ||
| 177 | |||
| 178 | count = read_seqcount_begin(&xip_sparse_seq); | ||
| 177 | 179 | ||
| 178 | page = __xip_sparse_page; | 180 | page = __xip_sparse_page; |
| 179 | if (!page) | 181 | if (!page) |
| 180 | return; | 182 | return; |
| 181 | 183 | ||
| 184 | retry: | ||
| 182 | spin_lock(&mapping->i_mmap_lock); | 185 | spin_lock(&mapping->i_mmap_lock); |
| 183 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 186 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
| 184 | mm = vma->vm_mm; | 187 | mm = vma->vm_mm; |
| @@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapping, | |||
| 198 | } | 201 | } |
| 199 | } | 202 | } |
| 200 | spin_unlock(&mapping->i_mmap_lock); | 203 | spin_unlock(&mapping->i_mmap_lock); |
| 204 | |||
| 205 | if (locked) { | ||
| 206 | mutex_unlock(&xip_sparse_mutex); | ||
| 207 | } else if (read_seqcount_retry(&xip_sparse_seq, count)) { | ||
| 208 | mutex_lock(&xip_sparse_mutex); | ||
| 209 | locked = 1; | ||
| 210 | goto retry; | ||
| 211 | } | ||
| 201 | } | 212 | } |
| 202 | 213 | ||
| 203 | /* | 214 | /* |
| @@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 218 | int error; | 229 | int error; |
| 219 | 230 | ||
| 220 | /* XXX: are VM_FAULT_ codes OK? */ | 231 | /* XXX: are VM_FAULT_ codes OK? */ |
| 221 | 232 | again: | |
| 222 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 233 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
| 223 | if (vmf->pgoff >= size) | 234 | if (vmf->pgoff >= size) |
| 224 | return VM_FAULT_SIGBUS; | 235 | return VM_FAULT_SIGBUS; |
| @@ -245,6 +256,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 245 | __xip_unmap(mapping, vmf->pgoff); | 256 | __xip_unmap(mapping, vmf->pgoff); |
| 246 | 257 | ||
| 247 | found: | 258 | found: |
| 259 | printk("%s insert %lx@%lx\n", current->comm, (unsigned long)vmf->virtual_address, xip_pfn); | ||
| 248 | err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, | 260 | err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, |
| 249 | xip_pfn); | 261 | xip_pfn); |
| 250 | if (err == -ENOMEM) | 262 | if (err == -ENOMEM) |
| @@ -252,14 +264,34 @@ found: | |||
| 252 | BUG_ON(err); | 264 | BUG_ON(err); |
| 253 | return VM_FAULT_NOPAGE; | 265 | return VM_FAULT_NOPAGE; |
| 254 | } else { | 266 | } else { |
| 267 | int err, ret = VM_FAULT_OOM; | ||
| 268 | |||
| 269 | mutex_lock(&xip_sparse_mutex); | ||
| 270 | write_seqcount_begin(&xip_sparse_seq); | ||
| 271 | error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0, | ||
| 272 | &xip_mem, &xip_pfn); | ||
| 273 | if (unlikely(!error)) { | ||
| 274 | write_seqcount_end(&xip_sparse_seq); | ||
| 275 | mutex_unlock(&xip_sparse_mutex); | ||
| 276 | goto again; | ||
| 277 | } | ||
| 278 | if (error != -ENODATA) | ||
| 279 | goto out; | ||
| 255 | /* not shared and writable, use xip_sparse_page() */ | 280 | /* not shared and writable, use xip_sparse_page() */ |
| 256 | page = xip_sparse_page(); | 281 | page = xip_sparse_page(); |
| 257 | if (!page) | 282 | if (!page) |
| 258 | return VM_FAULT_OOM; | 283 | goto out; |
| 284 | err = vm_insert_page(vma, (unsigned long)vmf->virtual_address, | ||
| 285 | page); | ||
| 286 | if (err == -ENOMEM) | ||
| 287 | goto out; | ||
| 259 | 288 | ||
| 260 | page_cache_get(page); | 289 | ret = VM_FAULT_NOPAGE; |
| 261 | vmf->page = page; | 290 | out: |
| 262 | return 0; | 291 | write_seqcount_end(&xip_sparse_seq); |
| 292 | mutex_unlock(&xip_sparse_mutex); | ||
| 293 | |||
| 294 | return ret; | ||
| 263 | } | 295 | } |
| 264 | } | 296 | } |
| 265 | 297 | ||
