aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorZach Brown <zach.brown@oracle.com>2005-09-30 14:58:55 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-30 15:41:17 -0400
commit897f15fb587fd2772b9e7ff6ec0265057f3c3975 (patch)
treed975ce5f131b8f42915cf264122cd265661651e0 /fs
parent998765e5588b197737d457e16f72832d8036190f (diff)
[PATCH] aio: remove unlocked task_list test and resulting race
Only one of the run or kick path is supposed to put an iocb on the run list. If both of them do it than one of them can end up referencing a freed iocb. The kick path could delete the task_list item from the wait queue before getting the ctx_lock and putting the iocb on the run list. The run path was testing the task_list item outside the lock so that it could catch ki_retry methods that return -EIOCBRETRY *without* putting the iocb on a wait queue and promising to call kick_iocb. This unlocked check could then race with the kick path to cause both to try and put the iocb on the run list. The patch stops the run path from testing task_list by requring that any ki_retry that returns -EIOCBRETRY *must* guarantee that kick_iocb() will be called in the future. aio_p{read,write}, the only in-tree -EIOCBRETRY users, are updated. Signed-off-by: Zach Brown <zach.brown@oracle.com> Signed-off-by: Benjamin LaHaise <bcrl@linux.intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c79
1 files changed, 33 insertions, 46 deletions
diff --git a/fs/aio.c b/fs/aio.c
index b8f296999c04..9edc0e4a1219 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -741,19 +741,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
741 ret = retry(iocb); 741 ret = retry(iocb);
742 current->io_wait = NULL; 742 current->io_wait = NULL;
743 743
744 if (-EIOCBRETRY != ret) { 744 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
745 if (-EIOCBQUEUED != ret) { 745 BUG_ON(!list_empty(&iocb->ki_wait.task_list));
746 BUG_ON(!list_empty(&iocb->ki_wait.task_list)); 746 aio_complete(iocb, ret, 0);
747 aio_complete(iocb, ret, 0);
748 /* must not access the iocb after this */
749 }
750 } else {
751 /*
752 * Issue an additional retry to avoid waiting forever if
753 * no waits were queued (e.g. in case of a short read).
754 */
755 if (list_empty(&iocb->ki_wait.task_list))
756 kiocbSetKicked(iocb);
757 } 747 }
758out: 748out:
759 spin_lock_irq(&ctx->ctx_lock); 749 spin_lock_irq(&ctx->ctx_lock);
@@ -1327,8 +1317,11 @@ asmlinkage long sys_io_destroy(aio_context_t ctx)
1327} 1317}
1328 1318
1329/* 1319/*
1330 * Default retry method for aio_read (also used for first time submit) 1320 * aio_p{read,write} are the default ki_retry methods for
1331 * Responsible for updating iocb state as retries progress 1321 * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially
1322 * multiple calls to f_op->aio_read(). They loop around partial progress
1323 * instead of returning -EIOCBRETRY because they don't have the means to call
1324 * kick_iocb().
1332 */ 1325 */
1333static ssize_t aio_pread(struct kiocb *iocb) 1326static ssize_t aio_pread(struct kiocb *iocb)
1334{ 1327{
@@ -1337,25 +1330,25 @@ static ssize_t aio_pread(struct kiocb *iocb)
1337 struct inode *inode = mapping->host; 1330 struct inode *inode = mapping->host;
1338 ssize_t ret = 0; 1331 ssize_t ret = 0;
1339 1332
1340 ret = file->f_op->aio_read(iocb, iocb->ki_buf, 1333 do {
1341 iocb->ki_left, iocb->ki_pos); 1334 ret = file->f_op->aio_read(iocb, iocb->ki_buf,
1335 iocb->ki_left, iocb->ki_pos);
1336 /*
1337 * Can't just depend on iocb->ki_left to determine
1338 * whether we are done. This may have been a short read.
1339 */
1340 if (ret > 0) {
1341 iocb->ki_buf += ret;
1342 iocb->ki_left -= ret;
1343 }
1342 1344
1343 /*
1344 * Can't just depend on iocb->ki_left to determine
1345 * whether we are done. This may have been a short read.
1346 */
1347 if (ret > 0) {
1348 iocb->ki_buf += ret;
1349 iocb->ki_left -= ret;
1350 /* 1345 /*
1351 * For pipes and sockets we return once we have 1346 * For pipes and sockets we return once we have some data; for
1352 * some data; for regular files we retry till we 1347 * regular files we retry till we complete the entire read or
1353 * complete the entire read or find that we can't 1348 * find that we can't read any more data (e.g short reads).
1354 * read any more data (e.g short reads).
1355 */ 1349 */
1356 if (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)) 1350 } while (ret > 0 &&
1357 ret = -EIOCBRETRY; 1351 !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
1358 }
1359 1352
1360 /* This means we must have transferred all that we could */ 1353 /* This means we must have transferred all that we could */
1361 /* No need to retry anymore */ 1354 /* No need to retry anymore */
@@ -1365,27 +1358,21 @@ static ssize_t aio_pread(struct kiocb *iocb)
1365 return ret; 1358 return ret;
1366} 1359}
1367 1360
1368/* 1361/* see aio_pread() */
1369 * Default retry method for aio_write (also used for first time submit)
1370 * Responsible for updating iocb state as retries progress
1371 */
1372static ssize_t aio_pwrite(struct kiocb *iocb) 1362static ssize_t aio_pwrite(struct kiocb *iocb)
1373{ 1363{
1374 struct file *file = iocb->ki_filp; 1364 struct file *file = iocb->ki_filp;
1375 ssize_t ret = 0; 1365 ssize_t ret = 0;
1376 1366
1377 ret = file->f_op->aio_write(iocb, iocb->ki_buf, 1367 do {
1378 iocb->ki_left, iocb->ki_pos); 1368 ret = file->f_op->aio_write(iocb, iocb->ki_buf,
1379 1369 iocb->ki_left, iocb->ki_pos);
1380 if (ret > 0) { 1370 if (ret > 0) {
1381 iocb->ki_buf += ret; 1371 iocb->ki_buf += ret;
1382 iocb->ki_left -= ret; 1372 iocb->ki_left -= ret;
1383 1373 }
1384 ret = -EIOCBRETRY; 1374 } while (ret > 0);
1385 }
1386 1375
1387 /* This means we must have transferred all that we could */
1388 /* No need to retry anymore */
1389 if ((ret == 0) || (iocb->ki_left == 0)) 1376 if ((ret == 0) || (iocb->ki_left == 0))
1390 ret = iocb->ki_nbytes - iocb->ki_left; 1377 ret = iocb->ki_nbytes - iocb->ki_left;
1391 1378