diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-01 16:17:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-01 16:17:28 -0400 |
commit | 75723957673bfa10c98b735259f891cc79cf0450 (patch) | |
tree | 7382ea8e4bbda6ddf8d6f29f708aa531e262cd36 /fs | |
parent | e2cd68f7cd07cc898581bd736ebdd6f2c2323c2e (diff) |
Fix possible splice() mmap_sem deadlock
Nick Piggin points out that splice isn't being good about the mmap
semaphore: while two readers can nest inside each others, it does leave
a possible deadlock if a writer (ie a new mmap()) comes in during that
nesting.
Original "just move the locking" patch by Nick, replaced by one by me
based on an optimistic pagefault_disable(). And then Jens tested and
updated that patch.
Reported-by: Nick Piggin <npiggin@suse.de>
Tested-by: Jens Axboe <jens.axboe@oracle.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/splice.c | 46 |
1 files changed, 34 insertions, 12 deletions
diff --git a/fs/splice.c b/fs/splice.c index c010a72ca2d2..e95a36228863 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1224,6 +1224,33 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1224 | } | 1224 | } |
1225 | 1225 | ||
1226 | /* | 1226 | /* |
1227 | * Do a copy-from-user while holding the mmap_semaphore for reading, in a | ||
1228 | * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem | ||
1229 | * for writing) and page faulting on the user memory pointed to by src. | ||
1230 | * This assumes that we will very rarely hit the partial != 0 path, or this | ||
1231 | * will not be a win. | ||
1232 | */ | ||
1233 | static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n) | ||
1234 | { | ||
1235 | int partial; | ||
1236 | |||
1237 | pagefault_disable(); | ||
1238 | partial = __copy_from_user_inatomic(dst, src, n); | ||
1239 | pagefault_enable(); | ||
1240 | |||
1241 | /* | ||
1242 | * Didn't copy everything, drop the mmap_sem and do a faulting copy | ||
1243 | */ | ||
1244 | if (unlikely(partial)) { | ||
1245 | up_read(¤t->mm->mmap_sem); | ||
1246 | partial = copy_from_user(dst, src, n); | ||
1247 | down_read(¤t->mm->mmap_sem); | ||
1248 | } | ||
1249 | |||
1250 | return partial; | ||
1251 | } | ||
1252 | |||
1253 | /* | ||
1227 | * Map an iov into an array of pages and offset/length tupples. With the | 1254 | * Map an iov into an array of pages and offset/length tupples. With the |
1228 | * partial_page structure, we can map several non-contiguous ranges into | 1255 | * partial_page structure, we can map several non-contiguous ranges into |
1229 | * our ones pages[] map instead of splitting that operation into pieces. | 1256 | * our ones pages[] map instead of splitting that operation into pieces. |
@@ -1236,31 +1263,26 @@ static int get_iovec_page_array(const struct iovec __user *iov, | |||
1236 | { | 1263 | { |
1237 | int buffers = 0, error = 0; | 1264 | int buffers = 0, error = 0; |
1238 | 1265 | ||
1239 | /* | ||
1240 | * It's ok to take the mmap_sem for reading, even | ||
1241 | * across a "get_user()". | ||
1242 | */ | ||
1243 | down_read(¤t->mm->mmap_sem); | 1266 | down_read(¤t->mm->mmap_sem); |
1244 | 1267 | ||
1245 | while (nr_vecs) { | 1268 | while (nr_vecs) { |
1246 | unsigned long off, npages; | 1269 | unsigned long off, npages; |
1270 | struct iovec entry; | ||
1247 | void __user *base; | 1271 | void __user *base; |
1248 | size_t len; | 1272 | size_t len; |
1249 | int i; | 1273 | int i; |
1250 | 1274 | ||
1251 | /* | 1275 | error = -EFAULT; |
1252 | * Get user address base and length for this iovec. | 1276 | if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry))) |
1253 | */ | ||
1254 | error = get_user(base, &iov->iov_base); | ||
1255 | if (unlikely(error)) | ||
1256 | break; | ||
1257 | error = get_user(len, &iov->iov_len); | ||
1258 | if (unlikely(error)) | ||
1259 | break; | 1277 | break; |
1260 | 1278 | ||
1279 | base = entry.iov_base; | ||
1280 | len = entry.iov_len; | ||
1281 | |||
1261 | /* | 1282 | /* |
1262 | * Sanity check this iovec. 0 read succeeds. | 1283 | * Sanity check this iovec. 0 read succeeds. |
1263 | */ | 1284 | */ |
1285 | error = 0; | ||
1264 | if (unlikely(!len)) | 1286 | if (unlikely(!len)) |
1265 | break; | 1287 | break; |
1266 | error = -EFAULT; | 1288 | error = -EFAULT; |