aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJens Axboe <axboe@suse.de>2006-07-10 05:00:01 -0400
committerJens Axboe <axboe@suse.de>2006-07-10 05:00:01 -0400
commitaadd06e5c56b9ff5117ec77e59eada43dc46e2fc (patch)
tree16da42148eab5ebcfc821fcedd0541f35a2c318b /fs
parentb3cf257623fabd8f1ee6700a6d328cc1c5da5a1d (diff)
[PATCH] splice: fix problems with sys_tee()
Several issues noticed/fixed: - We cannot reliably block in link_pipe() while holding both input and output mutexes. So do preparatory checks before locking down both mutexes and doing the link. - The ipipe->nrbufs vs i check was bad, because we could have dropped the ipipe lock in-between. This causes us to potentially look at unknown buffers if we were racing with someone else reading this pipe. Signed-off-by: Jens Axboe <axboe@suse.de>
Diffstat (limited to 'fs')
-rw-r--r--fs/splice.c238
1 files changed, 133 insertions, 105 deletions
diff --git a/fs/splice.c b/fs/splice.c
index 05fd2787be98..684bca3d3a10 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1307,6 +1307,85 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
1307} 1307}
1308 1308
1309/* 1309/*
1310 * Make sure there's data to read. Wait for input if we can, otherwise
1311 * return an appropriate error.
1312 */
1313static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1314{
1315 int ret;
1316
1317 /*
1318 * Check ->nrbufs without the inode lock first. This function
1319 * is speculative anyways, so missing one is ok.
1320 */
1321 if (pipe->nrbufs)
1322 return 0;
1323
1324 ret = 0;
1325 mutex_lock(&pipe->inode->i_mutex);
1326
1327 while (!pipe->nrbufs) {
1328 if (signal_pending(current)) {
1329 ret = -ERESTARTSYS;
1330 break;
1331 }
1332 if (!pipe->writers)
1333 break;
1334 if (!pipe->waiting_writers) {
1335 if (flags & SPLICE_F_NONBLOCK) {
1336 ret = -EAGAIN;
1337 break;
1338 }
1339 }
1340 pipe_wait(pipe);
1341 }
1342
1343 mutex_unlock(&pipe->inode->i_mutex);
1344 return ret;
1345}
1346
1347/*
1348 * Make sure there's writeable room. Wait for room if we can, otherwise
1349 * return an appropriate error.
1350 */
1351static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1352{
1353 int ret;
1354
1355 /*
1356 * Check ->nrbufs without the inode lock first. This function
1357 * is speculative anyways, so missing one is ok.
1358 */
1359 if (pipe->nrbufs < PIPE_BUFFERS)
1360 return 0;
1361
1362 ret = 0;
1363 mutex_lock(&pipe->inode->i_mutex);
1364
1365 while (pipe->nrbufs >= PIPE_BUFFERS) {
1366 if (!pipe->readers) {
1367 send_sig(SIGPIPE, current, 0);
1368 ret = -EPIPE;
1369 break;
1370 }
1371 if (flags & SPLICE_F_NONBLOCK) {
1372 ret = -EAGAIN;
1373 break;
1374 }
1375 if (signal_pending(current)) {
1376 ret = -ERESTARTSYS;
1377 break;
1378 }
1379 pipe->waiting_writers++;
1380 pipe_wait(pipe);
1381 pipe->waiting_writers--;
1382 }
1383
1384 mutex_unlock(&pipe->inode->i_mutex);
1385 return ret;
1386}
1387
1388/*
1310 * Link contents of ipipe to opipe. 1389 * Link contents of ipipe to opipe.
1311 */ 1390 */
1312static int link_pipe(struct pipe_inode_info *ipipe, 1391static int link_pipe(struct pipe_inode_info *ipipe,
@@ -1314,9 +1393,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1314 size_t len, unsigned int flags) 1393 size_t len, unsigned int flags)
1315{ 1394{
1316 struct pipe_buffer *ibuf, *obuf; 1395 struct pipe_buffer *ibuf, *obuf;
1317 int ret, do_wakeup, i, ipipe_first; 1396 int ret = 0, i = 0, nbuf;
1318
1319 ret = do_wakeup = ipipe_first = 0;
1320 1397
1321 /* 1398 /*
1322 * Potential ABBA deadlock, work around it by ordering lock 1399 * Potential ABBA deadlock, work around it by ordering lock
@@ -1324,126 +1401,62 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1324 * could deadlock (one doing tee from A -> B, the other from B -> A). 1401 * could deadlock (one doing tee from A -> B, the other from B -> A).
1325 */ 1402 */
1326 if (ipipe->inode < opipe->inode) { 1403 if (ipipe->inode < opipe->inode) {
1327 ipipe_first = 1; 1404 mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
1328 mutex_lock(&ipipe->inode->i_mutex); 1405 mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
1329 mutex_lock(&opipe->inode->i_mutex);
1330 } else { 1406 } else {
1331 mutex_lock(&opipe->inode->i_mutex); 1407 mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
1332 mutex_lock(&ipipe->inode->i_mutex); 1408 mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
1333 } 1409 }
1334 1410
1335 for (i = 0;; i++) { 1411 do {
1336 if (!opipe->readers) { 1412 if (!opipe->readers) {
1337 send_sig(SIGPIPE, current, 0); 1413 send_sig(SIGPIPE, current, 0);
1338 if (!ret) 1414 if (!ret)
1339 ret = -EPIPE; 1415 ret = -EPIPE;
1340 break; 1416 break;
1341 } 1417 }
1342 if (ipipe->nrbufs - i) {
1343 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
1344 1418
1345 /* 1419 /*
1346 * If we have room, fill this buffer 1420 * If we have iterated all input buffers or ran out of
1347 */ 1421 * output room, break.
1348 if (opipe->nrbufs < PIPE_BUFFERS) { 1422 */
1349 int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); 1423 if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
1350 1424 break;
1351 /*
1352 * Get a reference to this pipe buffer,
1353 * so we can copy the contents over.
1354 */
1355 ibuf->ops->get(ipipe, ibuf);
1356
1357 obuf = opipe->bufs + nbuf;
1358 *obuf = *ibuf;
1359
1360 /*
1361 * Don't inherit the gift flag, we need to
1362 * prevent multiple steals of this page.
1363 */
1364 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1365
1366 if (obuf->len > len)
1367 obuf->len = len;
1368
1369 opipe->nrbufs++;
1370 do_wakeup = 1;
1371 ret += obuf->len;
1372 len -= obuf->len;
1373
1374 if (!len)
1375 break;
1376 if (opipe->nrbufs < PIPE_BUFFERS)
1377 continue;
1378 }
1379
1380 /*
1381 * We have input available, but no output room.
1382 * If we already copied data, return that. If we
1383 * need to drop the opipe lock, it must be ordered
1384 * last to avoid deadlocks.
1385 */
1386 if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
1387 if (!ret)
1388 ret = -EAGAIN;
1389 break;
1390 }
1391 if (signal_pending(current)) {
1392 if (!ret)
1393 ret = -ERESTARTSYS;
1394 break;
1395 }
1396 if (do_wakeup) {
1397 smp_mb();
1398 if (waitqueue_active(&opipe->wait))
1399 wake_up_interruptible(&opipe->wait);
1400 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1401 do_wakeup = 0;
1402 }
1403 1425
1404 opipe->waiting_writers++; 1426 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
1405 pipe_wait(opipe); 1427 nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
1406 opipe->waiting_writers--;
1407 continue;
1408 }
1409 1428
1410 /* 1429 /*
1411 * No input buffers, do the usual checks for available 1430 * Get a reference to this pipe buffer,
1412 * writers and blocking and wait if necessary 1431 * so we can copy the contents over.
1413 */ 1432 */
1414 if (!ipipe->writers) 1433 ibuf->ops->get(ipipe, ibuf);
1415 break; 1434
1416 if (!ipipe->waiting_writers) { 1435 obuf = opipe->bufs + nbuf;
1417 if (ret) 1436 *obuf = *ibuf;
1418 break; 1437
1419 }
1420 /* 1438 /*
1421 * pipe_wait() drops the ipipe mutex. To avoid deadlocks 1439 * Don't inherit the gift flag, we need to
1422 * with another process, we can only safely do that if 1440 * prevent multiple steals of this page.
1423 * the ipipe lock is ordered last.
1424 */ 1441 */
1425 if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) { 1442 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1426 if (!ret)
1427 ret = -EAGAIN;
1428 break;
1429 }
1430 if (signal_pending(current)) {
1431 if (!ret)
1432 ret = -ERESTARTSYS;
1433 break;
1434 }
1435 1443
1436 if (waitqueue_active(&ipipe->wait)) 1444 if (obuf->len > len)
1437 wake_up_interruptible_sync(&ipipe->wait); 1445 obuf->len = len;
1438 kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
1439 1446
1440 pipe_wait(ipipe); 1447 opipe->nrbufs++;
1441 } 1448 ret += obuf->len;
1449 len -= obuf->len;
1450 i++;
1451 } while (len);
1442 1452
1443 mutex_unlock(&ipipe->inode->i_mutex); 1453 mutex_unlock(&ipipe->inode->i_mutex);
1444 mutex_unlock(&opipe->inode->i_mutex); 1454 mutex_unlock(&opipe->inode->i_mutex);
1445 1455
1446 if (do_wakeup) { 1456 /*
1457 * If we put data in the output pipe, wakeup any potential readers.
1458 */
1459 if (ret > 0) {
1447 smp_mb(); 1460 smp_mb();
1448 if (waitqueue_active(&opipe->wait)) 1461 if (waitqueue_active(&opipe->wait))
1449 wake_up_interruptible(&opipe->wait); 1462 wake_up_interruptible(&opipe->wait);
@@ -1464,14 +1477,29 @@ static long do_tee(struct file *in, struct file *out, size_t len,
1464{ 1477{
1465 struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; 1478 struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
1466 struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; 1479 struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
1480 int ret = -EINVAL;
1467 1481
1468 /* 1482 /*
1469 * Link ipipe to the two output pipes, consuming as we go along. 1483 * Duplicate the contents of ipipe to opipe without actually
1484 * copying the data.
1470 */ 1485 */
1471 if (ipipe && opipe) 1486 if (ipipe && opipe && ipipe != opipe) {
1472 return link_pipe(ipipe, opipe, len, flags); 1487 /*
1488 * Keep going, unless we encounter an error. The ipipe/opipe
1489 * ordering doesn't really matter.
1490 */
1491 ret = link_ipipe_prep(ipipe, flags);
1492 if (!ret) {
1493 ret = link_opipe_prep(opipe, flags);
1494 if (!ret) {
1495 ret = link_pipe(ipipe, opipe, len, flags);
1496 if (!ret && (flags & SPLICE_F_NONBLOCK))
1497 ret = -EAGAIN;
1498 }
1499 }
1500 }
1473 1501
1474 return -EINVAL; 1502 return ret;
1475} 1503}
1476 1504
1477asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags) 1505asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)