aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-01 16:17:28 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-01 16:17:28 -0400
commit75723957673bfa10c98b735259f891cc79cf0450 (patch)
tree7382ea8e4bbda6ddf8d6f29f708aa531e262cd36
parente2cd68f7cd07cc898581bd736ebdd6f2c2323c2e (diff)
Fix possible splice() mmap_sem deadlock
Nick Piggin points out that splice isn't being good about the mmap semaphore: while two readers can nest inside each others, it does leave a possible deadlock if a writer (ie a new mmap()) comes in during that nesting. Original "just move the locking" patch by Nick, replaced by one by me based on an optimistic pagefault_disable(). And then Jens tested and updated that patch. Reported-by: Nick Piggin <npiggin@suse.de> Tested-by: Jens Axboe <jens.axboe@oracle.com> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/splice.c46
1 files changed, 34 insertions, 12 deletions
diff --git a/fs/splice.c b/fs/splice.c
index c010a72ca2d2..e95a36228863 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1224,6 +1224,33 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1224} 1224}
1225 1225
1226/* 1226/*
1227 * Do a copy-from-user while holding the mmap_semaphore for reading, in a
1228 * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem
1229 * for writing) and page faulting on the user memory pointed to by src.
1230 * This assumes that we will very rarely hit the partial != 0 path, or this
1231 * will not be a win.
1232 */
1233static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n)
1234{
1235 int partial;
1236
1237 pagefault_disable();
1238 partial = __copy_from_user_inatomic(dst, src, n);
1239 pagefault_enable();
1240
1241 /*
1242 * Didn't copy everything, drop the mmap_sem and do a faulting copy
1243 */
1244 if (unlikely(partial)) {
1245 up_read(&current->mm->mmap_sem);
1246 partial = copy_from_user(dst, src, n);
1247 down_read(&current->mm->mmap_sem);
1248 }
1249
1250 return partial;
1251}
1252
1253/*
1227 * Map an iov into an array of pages and offset/length tupples. With the 1254 * Map an iov into an array of pages and offset/length tupples. With the
1228 * partial_page structure, we can map several non-contiguous ranges into 1255 * partial_page structure, we can map several non-contiguous ranges into
1229 * our ones pages[] map instead of splitting that operation into pieces. 1256 * our ones pages[] map instead of splitting that operation into pieces.
@@ -1236,31 +1263,26 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1236{ 1263{
1237 int buffers = 0, error = 0; 1264 int buffers = 0, error = 0;
1238 1265
1239 /*
1240 * It's ok to take the mmap_sem for reading, even
1241 * across a "get_user()".
1242 */
1243 down_read(&current->mm->mmap_sem); 1266 down_read(&current->mm->mmap_sem);
1244 1267
1245 while (nr_vecs) { 1268 while (nr_vecs) {
1246 unsigned long off, npages; 1269 unsigned long off, npages;
1270 struct iovec entry;
1247 void __user *base; 1271 void __user *base;
1248 size_t len; 1272 size_t len;
1249 int i; 1273 int i;
1250 1274
1251 /* 1275 error = -EFAULT;
1252 * Get user address base and length for this iovec. 1276 if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry)))
1253 */
1254 error = get_user(base, &iov->iov_base);
1255 if (unlikely(error))
1256 break;
1257 error = get_user(len, &iov->iov_len);
1258 if (unlikely(error))
1259 break; 1277 break;
1260 1278
1279 base = entry.iov_base;
1280 len = entry.iov_len;
1281
1261 /* 1282 /*
1262 * Sanity check this iovec. 0 read succeeds. 1283 * Sanity check this iovec. 0 read succeeds.
1263 */ 1284 */
1285 error = 0;
1264 if (unlikely(!len)) 1286 if (unlikely(!len))
1265 break; 1287 break;
1266 error = -EFAULT; 1288 error = -EFAULT;