summaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2015-08-17 20:34:27 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2015-08-21 02:33:07 -0400
commit6f179af88f60b32c2855e7f3e16ea8e336a7043f (patch)
tree7cf9a4aedd37c273a4318924080fdd154e601f66 /mm/swapfile.c
parent061f98e959ea025a5d87c3e089d59ec2ec5f5d6d (diff)
mm: fix potential data race in SyS_swapon
While running KernelThreadSanitizer (ktsan) on upstream kernel with trinity, we got a few reports from SyS_swapon, here is one of them: Read of size 8 by thread T307 (K7621): [< inlined >] SyS_swapon+0x3c0/0x1850 SYSC_swapon mm/swapfile.c:2395 [<ffffffff812242c0>] SyS_swapon+0x3c0/0x1850 mm/swapfile.c:2345 [<ffffffff81e97c8a>] ia32_do_call+0x1b/0x25 Looks like the swap_lock should be taken when iterating through the swap_info array on lines 2392 - 2401: q->swap_file may be reset to NULL by another thread before it is dereferenced for f_mapping. But why is that iteration needed at all? Doesn't the claim_swapfile() which follows do all that is needed to check for a duplicate entry - FMODE_EXCL on a bdev, testing IS_SWAPFILE under i_mutex on a regfile? Well, not quite: bd_may_claim() allows the same "holder" to claim the bdev again, so we do need to use a different holder than "sys_swapon"; and we should not replace appropriate -EBUSY by inappropriate -EINVAL. Index i was reused in a cpu loop further down: renamed cpu there. Reported-by: Andrey Konovalov <andreyknvl@google.com> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c25
1 files changed, 7 insertions, 18 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 41e4581af7c5..aebc2dd6e649 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2143,11 +2143,10 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
2143 if (S_ISBLK(inode->i_mode)) { 2143 if (S_ISBLK(inode->i_mode)) {
2144 p->bdev = bdgrab(I_BDEV(inode)); 2144 p->bdev = bdgrab(I_BDEV(inode));
2145 error = blkdev_get(p->bdev, 2145 error = blkdev_get(p->bdev,
2146 FMODE_READ | FMODE_WRITE | FMODE_EXCL, 2146 FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
2147 sys_swapon);
2148 if (error < 0) { 2147 if (error < 0) {
2149 p->bdev = NULL; 2148 p->bdev = NULL;
2150 return -EINVAL; 2149 return error;
2151 } 2150 }
2152 p->old_block_size = block_size(p->bdev); 2151 p->old_block_size = block_size(p->bdev);
2153 error = set_blocksize(p->bdev, PAGE_SIZE); 2152 error = set_blocksize(p->bdev, PAGE_SIZE);
@@ -2348,7 +2347,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2348 struct filename *name; 2347 struct filename *name;
2349 struct file *swap_file = NULL; 2348 struct file *swap_file = NULL;
2350 struct address_space *mapping; 2349 struct address_space *mapping;
2351 int i;
2352 int prio; 2350 int prio;
2353 int error; 2351 int error;
2354 union swap_header *swap_header; 2352 union swap_header *swap_header;
@@ -2388,19 +2386,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2388 2386
2389 p->swap_file = swap_file; 2387 p->swap_file = swap_file;
2390 mapping = swap_file->f_mapping; 2388 mapping = swap_file->f_mapping;
2391
2392 for (i = 0; i < nr_swapfiles; i++) {
2393 struct swap_info_struct *q = swap_info[i];
2394
2395 if (q == p || !q->swap_file)
2396 continue;
2397 if (mapping == q->swap_file->f_mapping) {
2398 error = -EBUSY;
2399 goto bad_swap;
2400 }
2401 }
2402
2403 inode = mapping->host; 2389 inode = mapping->host;
2390
2404 /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */ 2391 /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
2405 error = claim_swapfile(p, inode); 2392 error = claim_swapfile(p, inode);
2406 if (unlikely(error)) 2393 if (unlikely(error))
@@ -2433,6 +2420,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2433 goto bad_swap; 2420 goto bad_swap;
2434 } 2421 }
2435 if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { 2422 if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
2423 int cpu;
2424
2436 p->flags |= SWP_SOLIDSTATE; 2425 p->flags |= SWP_SOLIDSTATE;
2437 /* 2426 /*
2438 * select a random position to start with to help wear leveling 2427 * select a random position to start with to help wear leveling
@@ -2451,9 +2440,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2451 error = -ENOMEM; 2440 error = -ENOMEM;
2452 goto bad_swap; 2441 goto bad_swap;
2453 } 2442 }
2454 for_each_possible_cpu(i) { 2443 for_each_possible_cpu(cpu) {
2455 struct percpu_cluster *cluster; 2444 struct percpu_cluster *cluster;
2456 cluster = per_cpu_ptr(p->percpu_cluster, i); 2445 cluster = per_cpu_ptr(p->percpu_cluster, cpu);
2457 cluster_set_null(&cluster->index); 2446 cluster_set_null(&cluster->index);
2458 } 2447 }
2459 } 2448 }