diff options
author | Ryan Ding <ryan.ding@oracle.com> | 2016-03-25 17:21:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-25 19:37:42 -0400 |
commit | e63890f38ade9497b5609ddeb7f52df0fe55ea15 (patch) | |
tree | 6374b96dfa45477fc53cb6033cddc5621d5936c7 /fs/ocfs2/file.c | |
parent | f1f973ffce96a47c2b3f142e91eccef5bf22f699 (diff) |
ocfs2: fix ip_unaligned_aio deadlock with dio work queue
In the current implementation of unaligned aio+dio, lock order behave as
follow:
in user process context:
-> call io_submit()
-> get i_mutex
<== window1
-> get ip_unaligned_aio
-> submit direct io to block device
-> release i_mutex
-> io_submit() return
in dio work queue context(the work queue is created in __blockdev_direct_IO):
-> release ip_unaligned_aio
<== window2
-> get i_mutex
-> clear unwritten flag & change i_size
-> release i_mutex
There is a limitation to the thread number of dio work queue. 256 at
default. If all 256 thread are in the above 'window2' stage, and there
is a user process in the 'window1' stage, the system will became
deadlock. Since the user process hold i_mutex to wait ip_unaligned_aio
lock, while there is a direct bio hold ip_unaligned_aio mutex who is
waiting for a dio work queue thread to be schedule. But all the dio
work queue thread is waiting for i_mutex lock in 'window2'.
This case only happened in a test which send a large number(more than
256) of aio at one io_submit() call.
My design is to remove ip_unaligned_aio lock. Change it to a sync io
instead. Just like ip_unaligned_aio lock, serialize the unaligned aio
dio.
[akpm@linux-foundation.org: remove OCFS2_IOCB_UNALIGNED_IO, per Junxiao Bi]
Signed-off-by: Ryan Ding <ryan.ding@oracle.com>
Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <joseph.qi@huawei.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2/file.c')
-rw-r--r-- | fs/ocfs2/file.c | 27 |
1 files changed, 9 insertions, 18 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 1ab182321b18..c18ab45f8d21 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2178,7 +2178,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2178 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2178 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2179 | int full_coherency = !(osb->s_mount_opt & | 2179 | int full_coherency = !(osb->s_mount_opt & |
2180 | OCFS2_MOUNT_COHERENCY_BUFFERED); | 2180 | OCFS2_MOUNT_COHERENCY_BUFFERED); |
2181 | int unaligned_dio = 0; | 2181 | void *saved_ki_complete = NULL; |
2182 | int append_write = ((iocb->ki_pos + count) >= | 2182 | int append_write = ((iocb->ki_pos + count) >= |
2183 | i_size_read(inode) ? 1 : 0); | 2183 | i_size_read(inode) ? 1 : 0); |
2184 | 2184 | ||
@@ -2241,17 +2241,12 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2241 | goto out; | 2241 | goto out; |
2242 | } | 2242 | } |
2243 | 2243 | ||
2244 | if (direct_io && !is_sync_kiocb(iocb)) | 2244 | if (direct_io && !is_sync_kiocb(iocb) && |
2245 | unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos); | 2245 | ocfs2_is_io_unaligned(inode, count, iocb->ki_pos)) { |
2246 | |||
2247 | if (unaligned_dio) { | ||
2248 | /* | 2246 | /* |
2249 | * Wait on previous unaligned aio to complete before | 2247 | * Make it a sync io if it's an unaligned aio. |
2250 | * proceeding. | ||
2251 | */ | 2248 | */ |
2252 | mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio); | 2249 | saved_ki_complete = xchg(&iocb->ki_complete, NULL); |
2253 | /* Mark the iocb as needing an unlock in ocfs2_dio_end_io */ | ||
2254 | ocfs2_iocb_set_unaligned_aio(iocb); | ||
2255 | } | 2250 | } |
2256 | 2251 | ||
2257 | /* communicate with ocfs2_dio_end_io */ | 2252 | /* communicate with ocfs2_dio_end_io */ |
@@ -2272,11 +2267,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2272 | */ | 2267 | */ |
2273 | if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { | 2268 | if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { |
2274 | rw_level = -1; | 2269 | rw_level = -1; |
2275 | unaligned_dio = 0; | ||
2276 | } | 2270 | } |
2277 | 2271 | ||
2278 | if (unlikely(written <= 0)) | 2272 | if (unlikely(written <= 0)) |
2279 | goto no_sync; | 2273 | goto out; |
2280 | 2274 | ||
2281 | if (((file->f_flags & O_DSYNC) && !direct_io) || | 2275 | if (((file->f_flags & O_DSYNC) && !direct_io) || |
2282 | IS_SYNC(inode)) { | 2276 | IS_SYNC(inode)) { |
@@ -2298,13 +2292,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2298 | iocb->ki_pos - 1); | 2292 | iocb->ki_pos - 1); |
2299 | } | 2293 | } |
2300 | 2294 | ||
2301 | no_sync: | ||
2302 | if (unaligned_dio && ocfs2_iocb_is_unaligned_aio(iocb)) { | ||
2303 | ocfs2_iocb_clear_unaligned_aio(iocb); | ||
2304 | mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio); | ||
2305 | } | ||
2306 | |||
2307 | out: | 2295 | out: |
2296 | if (saved_ki_complete) | ||
2297 | xchg(&iocb->ki_complete, saved_ki_complete); | ||
2298 | |||
2308 | if (rw_level != -1) | 2299 | if (rw_level != -1) |
2309 | ocfs2_rw_unlock(inode, rw_level); | 2300 | ocfs2_rw_unlock(inode, rw_level); |
2310 | 2301 | ||