diff options
author | Zach Brown <zach.brown@oracle.com> | 2005-09-30 14:58:55 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-09-30 15:41:17 -0400 |
commit | 897f15fb587fd2772b9e7ff6ec0265057f3c3975 (patch) | |
tree | d975ce5f131b8f42915cf264122cd265661651e0 | |
parent | 998765e5588b197737d457e16f72832d8036190f (diff) |
[PATCH] aio: remove unlocked task_list test and resulting race
Only one of the run or kick path is supposed to put an iocb on the run
list. If both of them do it than one of them can end up referencing a
freed iocb. The kick path could delete the task_list item from the wait
queue before getting the ctx_lock and putting the iocb on the run list.
The run path was testing the task_list item outside the lock so that it
could catch ki_retry methods that return -EIOCBRETRY *without* putting the
iocb on a wait queue and promising to call kick_iocb. This unlocked check
could then race with the kick path to cause both to try and put the iocb on
the run list.
The patch stops the run path from testing task_list by requring that any
ki_retry that returns -EIOCBRETRY *must* guarantee that kick_iocb() will be
called in the future. aio_p{read,write}, the only in-tree -EIOCBRETRY
users, are updated.
Signed-off-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Benjamin LaHaise <bcrl@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/aio.c | 79 | ||||
-rw-r--r-- | include/linux/aio.h | 34 |
2 files changed, 67 insertions, 46 deletions
@@ -741,19 +741,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) | |||
741 | ret = retry(iocb); | 741 | ret = retry(iocb); |
742 | current->io_wait = NULL; | 742 | current->io_wait = NULL; |
743 | 743 | ||
744 | if (-EIOCBRETRY != ret) { | 744 | if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { |
745 | if (-EIOCBQUEUED != ret) { | 745 | BUG_ON(!list_empty(&iocb->ki_wait.task_list)); |
746 | BUG_ON(!list_empty(&iocb->ki_wait.task_list)); | 746 | aio_complete(iocb, ret, 0); |
747 | aio_complete(iocb, ret, 0); | ||
748 | /* must not access the iocb after this */ | ||
749 | } | ||
750 | } else { | ||
751 | /* | ||
752 | * Issue an additional retry to avoid waiting forever if | ||
753 | * no waits were queued (e.g. in case of a short read). | ||
754 | */ | ||
755 | if (list_empty(&iocb->ki_wait.task_list)) | ||
756 | kiocbSetKicked(iocb); | ||
757 | } | 747 | } |
758 | out: | 748 | out: |
759 | spin_lock_irq(&ctx->ctx_lock); | 749 | spin_lock_irq(&ctx->ctx_lock); |
@@ -1327,8 +1317,11 @@ asmlinkage long sys_io_destroy(aio_context_t ctx) | |||
1327 | } | 1317 | } |
1328 | 1318 | ||
1329 | /* | 1319 | /* |
1330 | * Default retry method for aio_read (also used for first time submit) | 1320 | * aio_p{read,write} are the default ki_retry methods for |
1331 | * Responsible for updating iocb state as retries progress | 1321 | * IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially |
1322 | * multiple calls to f_op->aio_read(). They loop around partial progress | ||
1323 | * instead of returning -EIOCBRETRY because they don't have the means to call | ||
1324 | * kick_iocb(). | ||
1332 | */ | 1325 | */ |
1333 | static ssize_t aio_pread(struct kiocb *iocb) | 1326 | static ssize_t aio_pread(struct kiocb *iocb) |
1334 | { | 1327 | { |
@@ -1337,25 +1330,25 @@ static ssize_t aio_pread(struct kiocb *iocb) | |||
1337 | struct inode *inode = mapping->host; | 1330 | struct inode *inode = mapping->host; |
1338 | ssize_t ret = 0; | 1331 | ssize_t ret = 0; |
1339 | 1332 | ||
1340 | ret = file->f_op->aio_read(iocb, iocb->ki_buf, | 1333 | do { |
1341 | iocb->ki_left, iocb->ki_pos); | 1334 | ret = file->f_op->aio_read(iocb, iocb->ki_buf, |
1335 | iocb->ki_left, iocb->ki_pos); | ||
1336 | /* | ||
1337 | * Can't just depend on iocb->ki_left to determine | ||
1338 | * whether we are done. This may have been a short read. | ||
1339 | */ | ||
1340 | if (ret > 0) { | ||
1341 | iocb->ki_buf += ret; | ||
1342 | iocb->ki_left -= ret; | ||
1343 | } | ||
1342 | 1344 | ||
1343 | /* | ||
1344 | * Can't just depend on iocb->ki_left to determine | ||
1345 | * whether we are done. This may have been a short read. | ||
1346 | */ | ||
1347 | if (ret > 0) { | ||
1348 | iocb->ki_buf += ret; | ||
1349 | iocb->ki_left -= ret; | ||
1350 | /* | 1345 | /* |
1351 | * For pipes and sockets we return once we have | 1346 | * For pipes and sockets we return once we have some data; for |
1352 | * some data; for regular files we retry till we | 1347 | * regular files we retry till we complete the entire read or |
1353 | * complete the entire read or find that we can't | 1348 | * find that we can't read any more data (e.g short reads). |
1354 | * read any more data (e.g short reads). | ||
1355 | */ | 1349 | */ |
1356 | if (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)) | 1350 | } while (ret > 0 && |
1357 | ret = -EIOCBRETRY; | 1351 | !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)); |
1358 | } | ||
1359 | 1352 | ||
1360 | /* This means we must have transferred all that we could */ | 1353 | /* This means we must have transferred all that we could */ |
1361 | /* No need to retry anymore */ | 1354 | /* No need to retry anymore */ |
@@ -1365,27 +1358,21 @@ static ssize_t aio_pread(struct kiocb *iocb) | |||
1365 | return ret; | 1358 | return ret; |
1366 | } | 1359 | } |
1367 | 1360 | ||
1368 | /* | 1361 | /* see aio_pread() */ |
1369 | * Default retry method for aio_write (also used for first time submit) | ||
1370 | * Responsible for updating iocb state as retries progress | ||
1371 | */ | ||
1372 | static ssize_t aio_pwrite(struct kiocb *iocb) | 1362 | static ssize_t aio_pwrite(struct kiocb *iocb) |
1373 | { | 1363 | { |
1374 | struct file *file = iocb->ki_filp; | 1364 | struct file *file = iocb->ki_filp; |
1375 | ssize_t ret = 0; | 1365 | ssize_t ret = 0; |
1376 | 1366 | ||
1377 | ret = file->f_op->aio_write(iocb, iocb->ki_buf, | 1367 | do { |
1378 | iocb->ki_left, iocb->ki_pos); | 1368 | ret = file->f_op->aio_write(iocb, iocb->ki_buf, |
1379 | 1369 | iocb->ki_left, iocb->ki_pos); | |
1380 | if (ret > 0) { | 1370 | if (ret > 0) { |
1381 | iocb->ki_buf += ret; | 1371 | iocb->ki_buf += ret; |
1382 | iocb->ki_left -= ret; | 1372 | iocb->ki_left -= ret; |
1383 | 1373 | } | |
1384 | ret = -EIOCBRETRY; | 1374 | } while (ret > 0); |
1385 | } | ||
1386 | 1375 | ||
1387 | /* This means we must have transferred all that we could */ | ||
1388 | /* No need to retry anymore */ | ||
1389 | if ((ret == 0) || (iocb->ki_left == 0)) | 1376 | if ((ret == 0) || (iocb->ki_left == 0)) |
1390 | ret = iocb->ki_nbytes - iocb->ki_left; | 1377 | ret = iocb->ki_nbytes - iocb->ki_left; |
1391 | 1378 | ||
diff --git a/include/linux/aio.h b/include/linux/aio.h index a4d5af907f90..60def658b246 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h | |||
@@ -43,6 +43,40 @@ struct kioctx; | |||
43 | #define kiocbIsKicked(iocb) test_bit(KIF_KICKED, &(iocb)->ki_flags) | 43 | #define kiocbIsKicked(iocb) test_bit(KIF_KICKED, &(iocb)->ki_flags) |
44 | #define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_flags) | 44 | #define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_flags) |
45 | 45 | ||
46 | /* is there a better place to document function pointer methods? */ | ||
47 | /** | ||
48 | * ki_retry - iocb forward progress callback | ||
49 | * @kiocb: The kiocb struct to advance by performing an operation. | ||
50 | * | ||
51 | * This callback is called when the AIO core wants a given AIO operation | ||
52 | * to make forward progress. The kiocb argument describes the operation | ||
53 | * that is to be performed. As the operation proceeds, perhaps partially, | ||
54 | * ki_retry is expected to update the kiocb with progress made. Typically | ||
55 | * ki_retry is set in the AIO core and it itself calls file_operations | ||
56 | * helpers. | ||
57 | * | ||
58 | * ki_retry's return value determines when the AIO operation is completed | ||
59 | * and an event is generated in the AIO event ring. Except the special | ||
60 | * return values described below, the value that is returned from ki_retry | ||
61 | * is transferred directly into the completion ring as the operation's | ||
62 | * resulting status. Once this has happened ki_retry *MUST NOT* reference | ||
63 | * the kiocb pointer again. | ||
64 | * | ||
65 | * If ki_retry returns -EIOCBQUEUED it has made a promise that aio_complete() | ||
66 | * will be called on the kiocb pointer in the future. The AIO core will | ||
67 | * not ask the method again -- ki_retry must ensure forward progress. | ||
68 | * aio_complete() must be called once and only once in the future, multiple | ||
69 | * calls may result in undefined behaviour. | ||
70 | * | ||
71 | * If ki_retry returns -EIOCBRETRY it has made a promise that kick_iocb() | ||
72 | * will be called on the kiocb pointer in the future. This may happen | ||
73 | * through generic helpers that associate kiocb->ki_wait with a wait | ||
74 | * queue head that ki_retry uses via current->io_wait. It can also happen | ||
75 | * with custom tracking and manual calls to kick_iocb(), though that is | ||
76 | * discouraged. In either case, kick_iocb() must be called once and only | ||
77 | * once. ki_retry must ensure forward progress, the AIO core will wait | ||
78 | * indefinitely for kick_iocb() to be called. | ||
79 | */ | ||
46 | struct kiocb { | 80 | struct kiocb { |
47 | struct list_head ki_run_list; | 81 | struct list_head ki_run_list; |
48 | long ki_flags; | 82 | long ki_flags; |