aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChen, Kenneth W <kenneth.w.chen@intel.com>2006-03-25 06:08:16 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-25 11:23:00 -0500
commit174e27c607cfa3ebb92934d28c0fdfcf5ce6c3af (patch)
tree1bc8f1804dc0917f1a86b2d32c701883932d81fe
parent0e6b3e5e97e2e8a25bcfc528dad94edf5220dfeb (diff)
[PATCH] direct-io: bug fix in dio handling write error
There is a bug in direct-io on propagating write error up to the higher I/O layer. When performing an async ODIRECT write to a block device, if a device error occurred (like media error or disk is pulled), the error code is only propagated from device driver to the DIO layer. The error code stops at finished_one_bio(). The aysnc write, however, is supposedly have a corresponding AIO event with appropriate return code (in this case -EIO). Application which waits on the async write event, will hang forever since such AIO event is lost forever (if such app did not use the timeout option in io_getevents call. Regardless, an AIO event is lost). The discovery of above bug leads to another discovery of potential race window with dio->result. The fundamental problem is that dio->result is overloaded with dual use: an indicator of fall back path for partial dio write, and an error indicator used in the I/O completion path. In the event of device error, the setting of -EIO to dio->result clashes with value used to track partial write that activates the fall back path. It was also pointed out that it is impossible to use dio->result to track partial write and at the same time to track error returned from device driver. Because direct_io_work can only determines whether it is a partial write at the end of io submission and in mid stream of those io submission, a return code could be coming back from the driver. Thus messing up all the subsequent logic. Proposed fix is to separating out error code returned by the IO completion path from partial IO submit tracking. A new variable is added to dio structure specifically to track io error returned in the completion path. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Acked-by: Zach Brown <zach.brown@oracle.com> Acked-by: Suparna Bhattacharya <suparna@in.ibm.com> Cc: Badari Pulavarty <pbadari@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/direct-io.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 27f3e787faca..235ed8d1f11e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -129,6 +129,7 @@ struct dio {
129 /* AIO related stuff */ 129 /* AIO related stuff */
130 struct kiocb *iocb; /* kiocb */ 130 struct kiocb *iocb; /* kiocb */
131 int is_async; /* is IO async ? */ 131 int is_async; /* is IO async ? */
132 int io_error; /* IO error in completion path */
132 ssize_t result; /* IO result */ 133 ssize_t result; /* IO result */
133}; 134};
134 135
@@ -250,6 +251,10 @@ static void finished_one_bio(struct dio *dio)
250 ((offset + transferred) > dio->i_size)) 251 ((offset + transferred) > dio->i_size))
251 transferred = dio->i_size - offset; 252 transferred = dio->i_size - offset;
252 253
254 /* check for error in completion path */
255 if (dio->io_error)
256 transferred = dio->io_error;
257
253 dio_complete(dio, offset, transferred); 258 dio_complete(dio, offset, transferred);
254 259
255 /* Complete AIO later if falling back to buffered i/o */ 260 /* Complete AIO later if falling back to buffered i/o */
@@ -406,7 +411,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
406 int page_no; 411 int page_no;
407 412
408 if (!uptodate) 413 if (!uptodate)
409 dio->result = -EIO; 414 dio->io_error = -EIO;
410 415
411 if (dio->is_async && dio->rw == READ) { 416 if (dio->is_async && dio->rw == READ) {
412 bio_check_pages_dirty(bio); /* transfers ownership */ 417 bio_check_pages_dirty(bio); /* transfers ownership */
@@ -971,6 +976,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
971 dio->next_block_for_io = -1; 976 dio->next_block_for_io = -1;
972 977
973 dio->page_errors = 0; 978 dio->page_errors = 0;
979 dio->io_error = 0;
974 dio->result = 0; 980 dio->result = 0;
975 dio->iocb = iocb; 981 dio->iocb = iocb;
976 dio->i_size = i_size_read(inode); 982 dio->i_size = i_size_read(inode);