[PATCH] aio: remove unlocked task_list test and resulting race

Only one of the run or kick path is supposed to put an iocb on the run list. If both of them do it than one of them can end up referencing a freed iocb. The kick path could delete the task_list item from the wait queue before getting the ctx_lock and putting the iocb on the run list. The run path was testing the task_list item outside the lock so that it could catch ki_retry methods that return -EIOCBRETRY *without* putting the iocb on a wait queue and promising to call kick_iocb. This unlocked check could then race with the kick path to cause both to try and put the iocb on the run list. The patch stops the run path from testing task_list by requring that any ki_retry that returns -EIOCBRETRY *must* guarantee that kick_iocb() will be called in the future. aio_p{read,write}, the only in-tree -EIOCBRETRY users, are updated. Signed-off-by: Zach Brown <zach.brown@oracle.com> Signed-off-by: Benjamin LaHaise <bcrl@linux.intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Zach Brown <zach.brown@oracle.com> 2005-09-30 14:58:55 -0400
committer: Linus Torvalds <torvalds@g5.osdl.org> 2005-09-30 15:41:17 -0400
commit: 897f15fb587fd2772b9e7ff6ec0265057f3c3975 (patch)
tree: d975ce5f131b8f42915cf264122cd265661651e0 /fs
parent: 998765e5588b197737d457e16f72832d8036190f (diff)
1 files changed, 33 insertions, 46 deletions
diff --git a/fs/aio.c b/fs/aio.c
index b8f296999c04..9edc0e4a1219 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -741,19 +741,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
        ret = retry(iocb);
        current->io_wait = NULL;
-        if (-EIOCBRETRY != ret) {
+        if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
-                if (-EIOCBQUEUED != ret) {
+                BUG_ON(!list_empty(&iocb->ki_wait.task_list));
-                        BUG_ON(!list_empty(&iocb->ki_wait.task_list));
+                aio_complete(iocb, ret, 0);
-                        aio_complete(iocb, ret, 0);
-                        /* must not access the iocb after this */
-                }
-        } else {
-                /*
-                 * Issue an additional retry to avoid waiting forever if
-                 * no waits were queued (e.g. in case of a short read).
-                 */
-                if (list_empty(&iocb->ki_wait.task_list))
-                        kiocbSetKicked(iocb);
        }
 out:
        spin_lock_irq(&ctx->ctx_lock);
@@ -1327,8 +1317,11 @@ asmlinkage long sys_io_destroy(aio_context_t ctx)
 }
 /*
- * Default retry method for aio_read (also used for first time submit)
+ * aio_p{read,write} are the default  ki_retry methods for
- * Responsible for updating iocb state as retries progress
+ * IO_CMD_P{READ,WRITE}.  They maintains kiocb retry state around potentially
+ * multiple calls to f_op->aio_read().  They loop around partial progress
+ * instead of returning -EIOCBRETRY because they don't have the means to call
+ * kick_iocb().
 */
 static ssize_t aio_pread(struct kiocb *iocb)
 {
@@ -1337,25 +1330,25 @@ static ssize_t aio_pread(struct kiocb *iocb)
        struct inode *inode = mapping->host;
        ssize_t ret = 0;
-        ret = file->f_op->aio_read(iocb, iocb->ki_buf,
+        do {
-                iocb->ki_left, iocb->ki_pos);
+                ret = file->f_op->aio_read(iocb, iocb->ki_buf,
+                        iocb->ki_left, iocb->ki_pos);
+                /*
+                 * Can't just depend on iocb->ki_left to determine
+                 * whether we are done. This may have been a short read.
+                 */
+                if (ret > 0) {
+                        iocb->ki_buf += ret;
+                        iocb->ki_left -= ret;
+                }
-        /*
-         * Can't just depend on iocb->ki_left to determine
-         * whether we are done. This may have been a short read.
-         */
-        if (ret > 0) {
-                iocb->ki_buf += ret;
-                iocb->ki_left -= ret;
                /*
-                 * For pipes and sockets we return once we have
+                 * For pipes and sockets we return once we have some data; for
-                 * some data; for regular files we retry till we
+                 * regular files we retry till we complete the entire read or
-                 * complete the entire read or find that we can't
+                 * find that we can't read any more data (e.g short reads).
-                 * read any more data (e.g short reads).
                 */
-                if (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))
+        } while (ret > 0 &&
-                        ret = -EIOCBRETRY;
+                 !S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
-        }
        /* This means we must have transferred all that we could */
        /* No need to retry anymore */
@@ -1365,27 +1358,21 @@ static ssize_t aio_pread(struct kiocb *iocb)
        return ret;
 }
-/*
+/* see aio_pread() */
- * Default retry method for aio_write (also used for first time submit)
- * Responsible for updating iocb state as retries progress
- */
 static ssize_t aio_pwrite(struct kiocb *iocb)
 {
        struct file *file = iocb->ki_filp;
        ssize_t ret = 0;
-        ret = file->f_op->aio_write(iocb, iocb->ki_buf,
+        do {
-                iocb->ki_left, iocb->ki_pos);
+                ret = file->f_op->aio_write(iocb, iocb->ki_buf,
+                        iocb->ki_left, iocb->ki_pos);
-        if (ret > 0) {
+                if (ret > 0) {
-                iocb->ki_buf += ret;
+                        iocb->ki_buf += ret;
-                iocb->ki_left -= ret;
+                        iocb->ki_left -= ret;
+                }
-                ret = -EIOCBRETRY;
+        } while (ret > 0);
-        }
-        /* This means we must have transferred all that we could */
-        /* No need to retry anymore */
        if ((ret == 0) || (iocb->ki_left == 0))
                ret = iocb->ki_nbytes - iocb->ki_left;
author	Zach Brown <zach.brown@oracle.com>	2005-09-30 14:58:55 -0400
committer	Linus Torvalds <torvalds@g5.osdl.org>	2005-09-30 15:41:17 -0400
commit	897f15fb587fd2772b9e7ff6ec0265057f3c3975 (patch)
tree	d975ce5f131b8f42915cf264122cd265661651e0 /fs
parent	998765e5588b197737d457e16f72832d8036190f (diff)

diff --git a/fs/aio.c b/fs/aio.c index b8f296999c04..9edc0e4a1219 100644 --- a/fs/aio.c +++ b/fs/aio.c
@@ -741,19 +741,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
741	ret = retry(iocb);	741	ret = retry(iocb);
742	current->io_wait = NULL;	742	current->io_wait = NULL;
743		743
744	if (-EIOCBRETRY != ret) {	744	if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
745	if (-EIOCBQUEUED != ret) {	745	BUG_ON(!list_empty(&iocb->ki_wait.task_list));
746	BUG_ON(!list_empty(&iocb->ki_wait.task_list));	746	aio_complete(iocb, ret, 0);
747	aio_complete(iocb, ret, 0);
748	/* must not access the iocb after this */
749	}
750	} else {
751	/*
752	* Issue an additional retry to avoid waiting forever if
753	* no waits were queued (e.g. in case of a short read).
754	*/
755	if (list_empty(&iocb->ki_wait.task_list))
756	kiocbSetKicked(iocb);
757	}	747	}
758	out:	748	out:
759	spin_lock_irq(&ctx->ctx_lock);	749	spin_lock_irq(&ctx->ctx_lock);
@@ -1327,8 +1317,11 @@ asmlinkage long sys_io_destroy(aio_context_t ctx)
1327	}	1317	}
1328		1318
1329	/*	1319	/*
1330	* Default retry method for aio_read (also used for first time submit)	1320	* aio_p{read,write} are the default ki_retry methods for
1331	* Responsible for updating iocb state as retries progress	1321	* IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially
		1322	* multiple calls to f_op->aio_read(). They loop around partial progress
		1323	* instead of returning -EIOCBRETRY because they don't have the means to call
		1324	* kick_iocb().
1332	*/	1325	*/
1333	static ssize_t aio_pread(struct kiocb *iocb)	1326	static ssize_t aio_pread(struct kiocb *iocb)
1334	{	1327	{
@@ -1337,25 +1330,25 @@ static ssize_t aio_pread(struct kiocb *iocb)
1337	struct inode *inode = mapping->host;	1330	struct inode *inode = mapping->host;
1338	ssize_t ret = 0;	1331	ssize_t ret = 0;
1339		1332
1340	ret = file->f_op->aio_read(iocb, iocb->ki_buf,	1333	do {
1341	iocb->ki_left, iocb->ki_pos);	1334	ret = file->f_op->aio_read(iocb, iocb->ki_buf,
		1335	iocb->ki_left, iocb->ki_pos);
		1336	/*
		1337	* Can't just depend on iocb->ki_left to determine
		1338	* whether we are done. This may have been a short read.
		1339	*/
		1340	if (ret > 0) {
		1341	iocb->ki_buf += ret;
		1342	iocb->ki_left -= ret;
		1343	}
1342		1344
1343	/*
1344	* Can't just depend on iocb->ki_left to determine
1345	* whether we are done. This may have been a short read.
1346	*/
1347	if (ret > 0) {
1348	iocb->ki_buf += ret;
1349	iocb->ki_left -= ret;
1350	/*	1345	/*
1351	* For pipes and sockets we return once we have	1346	* For pipes and sockets we return once we have some data; for
1352	* some data; for regular files we retry till we	1347	* regular files we retry till we complete the entire read or
1353	* complete the entire read or find that we can't	1348	* find that we can't read any more data (e.g short reads).
1354	* read any more data (e.g short reads).
1355	*/	1349	*/
1356	if (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))	1350	} while (ret > 0 &&
1357	ret = -EIOCBRETRY;	1351	!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
1358	}
1359		1352
1360	/* This means we must have transferred all that we could */	1353	/* This means we must have transferred all that we could */
1361	/* No need to retry anymore */	1354	/* No need to retry anymore */
@@ -1365,27 +1358,21 @@ static ssize_t aio_pread(struct kiocb *iocb)
1365	return ret;	1358	return ret;
1366	}	1359	}
1367		1360
1368	/*	1361	/* see aio_pread() */
1369	* Default retry method for aio_write (also used for first time submit)
1370	* Responsible for updating iocb state as retries progress
1371	*/
1372	static ssize_t aio_pwrite(struct kiocb *iocb)	1362	static ssize_t aio_pwrite(struct kiocb *iocb)
1373	{	1363	{
1374	struct file *file = iocb->ki_filp;	1364	struct file *file = iocb->ki_filp;
1375	ssize_t ret = 0;	1365	ssize_t ret = 0;
1376		1366
1377	ret = file->f_op->aio_write(iocb, iocb->ki_buf,	1367	do {
1378	iocb->ki_left, iocb->ki_pos);	1368	ret = file->f_op->aio_write(iocb, iocb->ki_buf,
1379		1369	iocb->ki_left, iocb->ki_pos);
1380	if (ret > 0) {	1370	if (ret > 0) {
1381	iocb->ki_buf += ret;	1371	iocb->ki_buf += ret;
1382	iocb->ki_left -= ret;	1372	iocb->ki_left -= ret;
1383		1373	}
1384	ret = -EIOCBRETRY;	1374	} while (ret > 0);
1385	}
1386		1375
1387	/* This means we must have transferred all that we could */
1388	/* No need to retry anymore */
1389	if ((ret == 0) \|\| (iocb->ki_left == 0))	1376	if ((ret == 0) \|\| (iocb->ki_left == 0))
1390	ret = iocb->ki_nbytes - iocb->ki_left;	1377	ret = iocb->ki_nbytes - iocb->ki_left;
1391		1378