diff options
author | Hugh Dickins <hughd@google.com> | 2015-08-17 20:34:27 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2015-08-21 02:33:07 -0400 |
commit | 6f179af88f60b32c2855e7f3e16ea8e336a7043f (patch) | |
tree | 7cf9a4aedd37c273a4318924080fdd154e601f66 /mm/swapfile.c | |
parent | 061f98e959ea025a5d87c3e089d59ec2ec5f5d6d (diff) |
mm: fix potential data race in SyS_swapon
While running KernelThreadSanitizer (ktsan) on upstream kernel with
trinity, we got a few reports from SyS_swapon, here is one of them:
Read of size 8 by thread T307 (K7621):
[< inlined >] SyS_swapon+0x3c0/0x1850 SYSC_swapon mm/swapfile.c:2395
[<ffffffff812242c0>] SyS_swapon+0x3c0/0x1850 mm/swapfile.c:2345
[<ffffffff81e97c8a>] ia32_do_call+0x1b/0x25
Looks like the swap_lock should be taken when iterating through the
swap_info array on lines 2392 - 2401: q->swap_file may be reset to
NULL by another thread before it is dereferenced for f_mapping.
But why is that iteration needed at all? Doesn't the claim_swapfile()
which follows do all that is needed to check for a duplicate entry -
FMODE_EXCL on a bdev, testing IS_SWAPFILE under i_mutex on a regfile?
Well, not quite: bd_may_claim() allows the same "holder" to claim the
bdev again, so we do need to use a different holder than "sys_swapon";
and we should not replace appropriate -EBUSY by inappropriate -EINVAL.
Index i was reused in a cpu loop further down: renamed cpu there.
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 25 |
1 files changed, 7 insertions, 18 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index 41e4581af7c5..aebc2dd6e649 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -2143,11 +2143,10 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) | |||
2143 | if (S_ISBLK(inode->i_mode)) { | 2143 | if (S_ISBLK(inode->i_mode)) { |
2144 | p->bdev = bdgrab(I_BDEV(inode)); | 2144 | p->bdev = bdgrab(I_BDEV(inode)); |
2145 | error = blkdev_get(p->bdev, | 2145 | error = blkdev_get(p->bdev, |
2146 | FMODE_READ | FMODE_WRITE | FMODE_EXCL, | 2146 | FMODE_READ | FMODE_WRITE | FMODE_EXCL, p); |
2147 | sys_swapon); | ||
2148 | if (error < 0) { | 2147 | if (error < 0) { |
2149 | p->bdev = NULL; | 2148 | p->bdev = NULL; |
2150 | return -EINVAL; | 2149 | return error; |
2151 | } | 2150 | } |
2152 | p->old_block_size = block_size(p->bdev); | 2151 | p->old_block_size = block_size(p->bdev); |
2153 | error = set_blocksize(p->bdev, PAGE_SIZE); | 2152 | error = set_blocksize(p->bdev, PAGE_SIZE); |
@@ -2348,7 +2347,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2348 | struct filename *name; | 2347 | struct filename *name; |
2349 | struct file *swap_file = NULL; | 2348 | struct file *swap_file = NULL; |
2350 | struct address_space *mapping; | 2349 | struct address_space *mapping; |
2351 | int i; | ||
2352 | int prio; | 2350 | int prio; |
2353 | int error; | 2351 | int error; |
2354 | union swap_header *swap_header; | 2352 | union swap_header *swap_header; |
@@ -2388,19 +2386,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2388 | 2386 | ||
2389 | p->swap_file = swap_file; | 2387 | p->swap_file = swap_file; |
2390 | mapping = swap_file->f_mapping; | 2388 | mapping = swap_file->f_mapping; |
2391 | |||
2392 | for (i = 0; i < nr_swapfiles; i++) { | ||
2393 | struct swap_info_struct *q = swap_info[i]; | ||
2394 | |||
2395 | if (q == p || !q->swap_file) | ||
2396 | continue; | ||
2397 | if (mapping == q->swap_file->f_mapping) { | ||
2398 | error = -EBUSY; | ||
2399 | goto bad_swap; | ||
2400 | } | ||
2401 | } | ||
2402 | |||
2403 | inode = mapping->host; | 2389 | inode = mapping->host; |
2390 | |||
2404 | /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */ | 2391 | /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */ |
2405 | error = claim_swapfile(p, inode); | 2392 | error = claim_swapfile(p, inode); |
2406 | if (unlikely(error)) | 2393 | if (unlikely(error)) |
@@ -2433,6 +2420,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2433 | goto bad_swap; | 2420 | goto bad_swap; |
2434 | } | 2421 | } |
2435 | if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { | 2422 | if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { |
2423 | int cpu; | ||
2424 | |||
2436 | p->flags |= SWP_SOLIDSTATE; | 2425 | p->flags |= SWP_SOLIDSTATE; |
2437 | /* | 2426 | /* |
2438 | * select a random position to start with to help wear leveling | 2427 | * select a random position to start with to help wear leveling |
@@ -2451,9 +2440,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2451 | error = -ENOMEM; | 2440 | error = -ENOMEM; |
2452 | goto bad_swap; | 2441 | goto bad_swap; |
2453 | } | 2442 | } |
2454 | for_each_possible_cpu(i) { | 2443 | for_each_possible_cpu(cpu) { |
2455 | struct percpu_cluster *cluster; | 2444 | struct percpu_cluster *cluster; |
2456 | cluster = per_cpu_ptr(p->percpu_cluster, i); | 2445 | cluster = per_cpu_ptr(p->percpu_cluster, cpu); |
2457 | cluster_set_null(&cluster->index); | 2446 | cluster_set_null(&cluster->index); |
2458 | } | 2447 | } |
2459 | } | 2448 | } |