diff options
| author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-07-12 11:14:48 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-07-12 11:14:48 -0400 |
| commit | 826adfe49a508f15943ce9140a3fc7f6c7661509 (patch) | |
| tree | 134742fb3a90cf37a7c3449b6e14867159099f87 | |
| parent | 7c3dec0679c66ce177726802adbe2f403942fc27 (diff) | |
| parent | aadd06e5c56b9ff5117ec77e59eada43dc46e2fc (diff) | |
Merge branch 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block
* 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block:
[PATCH] splice: fix problems with sys_tee()
| -rw-r--r-- | fs/splice.c | 238 |
1 files changed, 133 insertions, 105 deletions
diff --git a/fs/splice.c b/fs/splice.c index 05fd2787be98..684bca3d3a10 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -1307,6 +1307,85 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, | |||
| 1307 | } | 1307 | } |
| 1308 | 1308 | ||
| 1309 | /* | 1309 | /* |
| 1310 | * Make sure there's data to read. Wait for input if we can, otherwise | ||
| 1311 | * return an appropriate error. | ||
| 1312 | */ | ||
| 1313 | static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | ||
| 1314 | { | ||
| 1315 | int ret; | ||
| 1316 | |||
| 1317 | /* | ||
| 1318 | * Check ->nrbufs without the inode lock first. This function | ||
| 1319 | * is speculative anyways, so missing one is ok. | ||
| 1320 | */ | ||
| 1321 | if (pipe->nrbufs) | ||
| 1322 | return 0; | ||
| 1323 | |||
| 1324 | ret = 0; | ||
| 1325 | mutex_lock(&pipe->inode->i_mutex); | ||
| 1326 | |||
| 1327 | while (!pipe->nrbufs) { | ||
| 1328 | if (signal_pending(current)) { | ||
| 1329 | ret = -ERESTARTSYS; | ||
| 1330 | break; | ||
| 1331 | } | ||
| 1332 | if (!pipe->writers) | ||
| 1333 | break; | ||
| 1334 | if (!pipe->waiting_writers) { | ||
| 1335 | if (flags & SPLICE_F_NONBLOCK) { | ||
| 1336 | ret = -EAGAIN; | ||
| 1337 | break; | ||
| 1338 | } | ||
| 1339 | } | ||
| 1340 | pipe_wait(pipe); | ||
| 1341 | } | ||
| 1342 | |||
| 1343 | mutex_unlock(&pipe->inode->i_mutex); | ||
| 1344 | return ret; | ||
| 1345 | } | ||
| 1346 | |||
| 1347 | /* | ||
| 1348 | * Make sure there's writeable room. Wait for room if we can, otherwise | ||
| 1349 | * return an appropriate error. | ||
| 1350 | */ | ||
| 1351 | static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | ||
| 1352 | { | ||
| 1353 | int ret; | ||
| 1354 | |||
| 1355 | /* | ||
| 1356 | * Check ->nrbufs without the inode lock first. This function | ||
| 1357 | * is speculative anyways, so missing one is ok. | ||
| 1358 | */ | ||
| 1359 | if (pipe->nrbufs < PIPE_BUFFERS) | ||
| 1360 | return 0; | ||
| 1361 | |||
| 1362 | ret = 0; | ||
| 1363 | mutex_lock(&pipe->inode->i_mutex); | ||
| 1364 | |||
| 1365 | while (pipe->nrbufs >= PIPE_BUFFERS) { | ||
| 1366 | if (!pipe->readers) { | ||
| 1367 | send_sig(SIGPIPE, current, 0); | ||
| 1368 | ret = -EPIPE; | ||
| 1369 | break; | ||
| 1370 | } | ||
| 1371 | if (flags & SPLICE_F_NONBLOCK) { | ||
| 1372 | ret = -EAGAIN; | ||
| 1373 | break; | ||
| 1374 | } | ||
| 1375 | if (signal_pending(current)) { | ||
| 1376 | ret = -ERESTARTSYS; | ||
| 1377 | break; | ||
| 1378 | } | ||
| 1379 | pipe->waiting_writers++; | ||
| 1380 | pipe_wait(pipe); | ||
| 1381 | pipe->waiting_writers--; | ||
| 1382 | } | ||
| 1383 | |||
| 1384 | mutex_unlock(&pipe->inode->i_mutex); | ||
| 1385 | return ret; | ||
| 1386 | } | ||
| 1387 | |||
| 1388 | /* | ||
| 1310 | * Link contents of ipipe to opipe. | 1389 | * Link contents of ipipe to opipe. |
| 1311 | */ | 1390 | */ |
| 1312 | static int link_pipe(struct pipe_inode_info *ipipe, | 1391 | static int link_pipe(struct pipe_inode_info *ipipe, |
| @@ -1314,9 +1393,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
| 1314 | size_t len, unsigned int flags) | 1393 | size_t len, unsigned int flags) |
| 1315 | { | 1394 | { |
| 1316 | struct pipe_buffer *ibuf, *obuf; | 1395 | struct pipe_buffer *ibuf, *obuf; |
| 1317 | int ret, do_wakeup, i, ipipe_first; | 1396 | int ret = 0, i = 0, nbuf; |
| 1318 | |||
| 1319 | ret = do_wakeup = ipipe_first = 0; | ||
| 1320 | 1397 | ||
| 1321 | /* | 1398 | /* |
| 1322 | * Potential ABBA deadlock, work around it by ordering lock | 1399 | * Potential ABBA deadlock, work around it by ordering lock |
| @@ -1324,126 +1401,62 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
| 1324 | * could deadlock (one doing tee from A -> B, the other from B -> A). | 1401 | * could deadlock (one doing tee from A -> B, the other from B -> A). |
| 1325 | */ | 1402 | */ |
| 1326 | if (ipipe->inode < opipe->inode) { | 1403 | if (ipipe->inode < opipe->inode) { |
| 1327 | ipipe_first = 1; | 1404 | mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT); |
| 1328 | mutex_lock(&ipipe->inode->i_mutex); | 1405 | mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD); |
| 1329 | mutex_lock(&opipe->inode->i_mutex); | ||
| 1330 | } else { | 1406 | } else { |
| 1331 | mutex_lock(&opipe->inode->i_mutex); | 1407 | mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT); |
| 1332 | mutex_lock(&ipipe->inode->i_mutex); | 1408 | mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD); |
| 1333 | } | 1409 | } |
| 1334 | 1410 | ||
| 1335 | for (i = 0;; i++) { | 1411 | do { |
| 1336 | if (!opipe->readers) { | 1412 | if (!opipe->readers) { |
| 1337 | send_sig(SIGPIPE, current, 0); | 1413 | send_sig(SIGPIPE, current, 0); |
| 1338 | if (!ret) | 1414 | if (!ret) |
| 1339 | ret = -EPIPE; | 1415 | ret = -EPIPE; |
| 1340 | break; | 1416 | break; |
| 1341 | } | 1417 | } |
| 1342 | if (ipipe->nrbufs - i) { | ||
| 1343 | ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); | ||
| 1344 | 1418 | ||
| 1345 | /* | 1419 | /* |
| 1346 | * If we have room, fill this buffer | 1420 | * If we have iterated all input buffers or ran out of |
| 1347 | */ | 1421 | * output room, break. |
| 1348 | if (opipe->nrbufs < PIPE_BUFFERS) { | 1422 | */ |
| 1349 | int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); | 1423 | if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) |
| 1350 | 1424 | break; | |
| 1351 | /* | ||
| 1352 | * Get a reference to this pipe buffer, | ||
| 1353 | * so we can copy the contents over. | ||
| 1354 | */ | ||
| 1355 | ibuf->ops->get(ipipe, ibuf); | ||
| 1356 | |||
| 1357 | obuf = opipe->bufs + nbuf; | ||
| 1358 | *obuf = *ibuf; | ||
| 1359 | |||
| 1360 | /* | ||
| 1361 | * Don't inherit the gift flag, we need to | ||
| 1362 | * prevent multiple steals of this page. | ||
| 1363 | */ | ||
| 1364 | obuf->flags &= ~PIPE_BUF_FLAG_GIFT; | ||
| 1365 | |||
| 1366 | if (obuf->len > len) | ||
| 1367 | obuf->len = len; | ||
| 1368 | |||
| 1369 | opipe->nrbufs++; | ||
| 1370 | do_wakeup = 1; | ||
| 1371 | ret += obuf->len; | ||
| 1372 | len -= obuf->len; | ||
| 1373 | |||
| 1374 | if (!len) | ||
| 1375 | break; | ||
| 1376 | if (opipe->nrbufs < PIPE_BUFFERS) | ||
| 1377 | continue; | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | /* | ||
| 1381 | * We have input available, but no output room. | ||
| 1382 | * If we already copied data, return that. If we | ||
| 1383 | * need to drop the opipe lock, it must be ordered | ||
| 1384 | * last to avoid deadlocks. | ||
| 1385 | */ | ||
| 1386 | if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) { | ||
| 1387 | if (!ret) | ||
| 1388 | ret = -EAGAIN; | ||
| 1389 | break; | ||
| 1390 | } | ||
| 1391 | if (signal_pending(current)) { | ||
| 1392 | if (!ret) | ||
| 1393 | ret = -ERESTARTSYS; | ||
| 1394 | break; | ||
| 1395 | } | ||
| 1396 | if (do_wakeup) { | ||
| 1397 | smp_mb(); | ||
| 1398 | if (waitqueue_active(&opipe->wait)) | ||
| 1399 | wake_up_interruptible(&opipe->wait); | ||
| 1400 | kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||
| 1401 | do_wakeup = 0; | ||
| 1402 | } | ||
| 1403 | 1425 | ||
| 1404 | opipe->waiting_writers++; | 1426 | ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); |
| 1405 | pipe_wait(opipe); | 1427 | nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); |
| 1406 | opipe->waiting_writers--; | ||
| 1407 | continue; | ||
| 1408 | } | ||
| 1409 | 1428 | ||
| 1410 | /* | 1429 | /* |
| 1411 | * No input buffers, do the usual checks for available | 1430 | * Get a reference to this pipe buffer, |
| 1412 | * writers and blocking and wait if necessary | 1431 | * so we can copy the contents over. |
| 1413 | */ | 1432 | */ |
| 1414 | if (!ipipe->writers) | 1433 | ibuf->ops->get(ipipe, ibuf); |
| 1415 | break; | 1434 | |
| 1416 | if (!ipipe->waiting_writers) { | 1435 | obuf = opipe->bufs + nbuf; |
| 1417 | if (ret) | 1436 | *obuf = *ibuf; |
| 1418 | break; | 1437 | |
| 1419 | } | ||
| 1420 | /* | 1438 | /* |
| 1421 | * pipe_wait() drops the ipipe mutex. To avoid deadlocks | 1439 | * Don't inherit the gift flag, we need to |
| 1422 | * with another process, we can only safely do that if | 1440 | * prevent multiple steals of this page. |
| 1423 | * the ipipe lock is ordered last. | ||
| 1424 | */ | 1441 | */ |
| 1425 | if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) { | 1442 | obuf->flags &= ~PIPE_BUF_FLAG_GIFT; |
| 1426 | if (!ret) | ||
| 1427 | ret = -EAGAIN; | ||
| 1428 | break; | ||
| 1429 | } | ||
| 1430 | if (signal_pending(current)) { | ||
| 1431 | if (!ret) | ||
| 1432 | ret = -ERESTARTSYS; | ||
| 1433 | break; | ||
| 1434 | } | ||
| 1435 | 1443 | ||
| 1436 | if (waitqueue_active(&ipipe->wait)) | 1444 | if (obuf->len > len) |
| 1437 | wake_up_interruptible_sync(&ipipe->wait); | 1445 | obuf->len = len; |
| 1438 | kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT); | ||
| 1439 | 1446 | ||
| 1440 | pipe_wait(ipipe); | 1447 | opipe->nrbufs++; |
| 1441 | } | 1448 | ret += obuf->len; |
| 1449 | len -= obuf->len; | ||
| 1450 | i++; | ||
| 1451 | } while (len); | ||
| 1442 | 1452 | ||
| 1443 | mutex_unlock(&ipipe->inode->i_mutex); | 1453 | mutex_unlock(&ipipe->inode->i_mutex); |
| 1444 | mutex_unlock(&opipe->inode->i_mutex); | 1454 | mutex_unlock(&opipe->inode->i_mutex); |
| 1445 | 1455 | ||
| 1446 | if (do_wakeup) { | 1456 | /* |
| 1457 | * If we put data in the output pipe, wakeup any potential readers. | ||
| 1458 | */ | ||
| 1459 | if (ret > 0) { | ||
| 1447 | smp_mb(); | 1460 | smp_mb(); |
| 1448 | if (waitqueue_active(&opipe->wait)) | 1461 | if (waitqueue_active(&opipe->wait)) |
| 1449 | wake_up_interruptible(&opipe->wait); | 1462 | wake_up_interruptible(&opipe->wait); |
| @@ -1464,14 +1477,29 @@ static long do_tee(struct file *in, struct file *out, size_t len, | |||
| 1464 | { | 1477 | { |
| 1465 | struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; | 1478 | struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; |
| 1466 | struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; | 1479 | struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; |
| 1480 | int ret = -EINVAL; | ||
| 1467 | 1481 | ||
| 1468 | /* | 1482 | /* |
| 1469 | * Link ipipe to the two output pipes, consuming as we go along. | 1483 | * Duplicate the contents of ipipe to opipe without actually |
| 1484 | * copying the data. | ||
| 1470 | */ | 1485 | */ |
| 1471 | if (ipipe && opipe) | 1486 | if (ipipe && opipe && ipipe != opipe) { |
| 1472 | return link_pipe(ipipe, opipe, len, flags); | 1487 | /* |
| 1488 | * Keep going, unless we encounter an error. The ipipe/opipe | ||
| 1489 | * ordering doesn't really matter. | ||
| 1490 | */ | ||
| 1491 | ret = link_ipipe_prep(ipipe, flags); | ||
| 1492 | if (!ret) { | ||
| 1493 | ret = link_opipe_prep(opipe, flags); | ||
| 1494 | if (!ret) { | ||
| 1495 | ret = link_pipe(ipipe, opipe, len, flags); | ||
| 1496 | if (!ret && (flags & SPLICE_F_NONBLOCK)) | ||
| 1497 | ret = -EAGAIN; | ||
| 1498 | } | ||
| 1499 | } | ||
| 1500 | } | ||
| 1473 | 1501 | ||
| 1474 | return -EINVAL; | 1502 | return ret; |
| 1475 | } | 1503 | } |
| 1476 | 1504 | ||
| 1477 | asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags) | 1505 | asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags) |
