aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-06-24 14:29:46 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-07-20 20:47:48 -0400
commitdf2d6f26586f12a24f3ae5df4e236dc5c08d6eb4 (patch)
tree68c6ec96177f766d3b9ab0a48408271ef2af4d89 /fs
parent562c72aa57c36b178eacc3500a0215651eca9429 (diff)
fs: always maintain i_dio_count
Maintain i_dio_count for all filesystems, not just those using DIO_LOCKING. This these filesystems to also protect truncate against direct I/O requests by using common code. Right now the only non-DIO_LOCKING filesystem that appears to do so is XFS, which uses an opencoded variant of the i_dio_count scheme. Behaviour doesn't change for filesystems never calling inode_dio_wait. For ext4 behaviour changes when using the dioread_nonlock option, which previously was missing any protection between truncate and direct I/O reads. For ocfs2 that handcrafted i_dio_count manipulations are replaced with the common code now enable. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r--fs/direct-io.c25
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/file.c12
3 files changed, 17 insertions, 24 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 354cbdbc14bd..0a073c7125a6 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -297,8 +297,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
297 aio_complete(dio->iocb, ret, 0); 297 aio_complete(dio->iocb, ret, 0);
298 } 298 }
299 299
300 if (dio->flags & DIO_LOCKING) 300 inode_dio_done(dio->inode);
301 inode_dio_done(dio->inode);
302 return ret; 301 return ret;
303} 302}
304 303
@@ -1185,14 +1184,16 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1185 * For writes this function is called under i_mutex and returns with 1184 * For writes this function is called under i_mutex and returns with
1186 * i_mutex held, for reads, i_mutex is not held on entry, but it is 1185 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1187 * taken and dropped again before returning. 1186 * taken and dropped again before returning.
1188 * The i_dio_count counter keeps track of the number of outstanding
1189 * direct I/O requests, and truncate waits for it to reach zero.
1190 * New references to i_dio_count must only be grabbed with i_mutex
1191 * held.
1192 *
1193 * - if the flags value does NOT contain DIO_LOCKING we don't use any 1187 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1194 * internal locking but rather rely on the filesystem to synchronize 1188 * internal locking but rather rely on the filesystem to synchronize
1195 * direct I/O reads/writes versus each other and truncate. 1189 * direct I/O reads/writes versus each other and truncate.
1190 *
1191 * To help with locking against truncate we incremented the i_dio_count
1192 * counter before starting direct I/O, and decrement it once we are done.
1193 * Truncate can wait for it to reach zero to provide exclusion. It is
1194 * expected that filesystem provide exclusion between new direct I/O
1195 * and truncates. For DIO_LOCKING filesystems this is done by i_mutex,
1196 * but other filesystems need to take care of this on their own.
1196 */ 1197 */
1197ssize_t 1198ssize_t
1198__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1199__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
@@ -1270,14 +1271,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1270 goto out; 1271 goto out;
1271 } 1272 }
1272 } 1273 }
1273
1274 /*
1275 * Will be decremented at I/O completion time.
1276 */
1277 atomic_inc(&inode->i_dio_count);
1278 } 1274 }
1279 1275
1280 /* 1276 /*
1277 * Will be decremented at I/O completion time.
1278 */
1279 atomic_inc(&inode->i_dio_count);
1280
1281 /*
1281 * For file extending writes updating i_size before data 1282 * For file extending writes updating i_size before data
1282 * writeouts complete can expose uninitialized blocks. So 1283 * writeouts complete can expose uninitialized blocks. So
1283 * even for AIO, we need to wait for i/o to complete before 1284 * even for AIO, we need to wait for i/o to complete before
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index de1d3953599d..524d6167fb63 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -567,10 +567,8 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
567 /* this io's submitter should not have unlocked this before we could */ 567 /* this io's submitter should not have unlocked this before we could */
568 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 568 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
569 569
570 if (ocfs2_iocb_is_sem_locked(iocb)) { 570 if (ocfs2_iocb_is_sem_locked(iocb))
571 inode_dio_done(inode);
572 ocfs2_iocb_clear_sem_locked(iocb); 571 ocfs2_iocb_clear_sem_locked(iocb);
573 }
574 572
575 ocfs2_iocb_clear_rw_locked(iocb); 573 ocfs2_iocb_clear_rw_locked(iocb);
576 574
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 736283ca4a4c..22d604601957 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2240,7 +2240,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2240relock: 2240relock:
2241 /* to match setattr's i_mutex -> rw_lock ordering */ 2241 /* to match setattr's i_mutex -> rw_lock ordering */
2242 if (direct_io) { 2242 if (direct_io) {
2243 atomic_inc(&inode->i_dio_count);
2244 have_alloc_sem = 1; 2243 have_alloc_sem = 1;
2245 /* communicate with ocfs2_dio_end_io */ 2244 /* communicate with ocfs2_dio_end_io */
2246 ocfs2_iocb_set_sem_locked(iocb); 2245 ocfs2_iocb_set_sem_locked(iocb);
@@ -2292,7 +2291,6 @@ relock:
2292 */ 2291 */
2293 if (direct_io && !can_do_direct) { 2292 if (direct_io && !can_do_direct) {
2294 ocfs2_rw_unlock(inode, rw_level); 2293 ocfs2_rw_unlock(inode, rw_level);
2295 inode_dio_done(inode);
2296 2294
2297 have_alloc_sem = 0; 2295 have_alloc_sem = 0;
2298 rw_level = -1; 2296 rw_level = -1;
@@ -2379,10 +2377,8 @@ out:
2379 ocfs2_rw_unlock(inode, rw_level); 2377 ocfs2_rw_unlock(inode, rw_level);
2380 2378
2381out_sems: 2379out_sems:
2382 if (have_alloc_sem) { 2380 if (have_alloc_sem)
2383 inode_dio_done(inode);
2384 ocfs2_iocb_clear_sem_locked(iocb); 2381 ocfs2_iocb_clear_sem_locked(iocb);
2385 }
2386 2382
2387 mutex_unlock(&inode->i_mutex); 2383 mutex_unlock(&inode->i_mutex);
2388 2384
@@ -2533,7 +2529,6 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2533 */ 2529 */
2534 if (filp->f_flags & O_DIRECT) { 2530 if (filp->f_flags & O_DIRECT) {
2535 have_alloc_sem = 1; 2531 have_alloc_sem = 1;
2536 atomic_inc(&inode->i_dio_count);
2537 ocfs2_iocb_set_sem_locked(iocb); 2532 ocfs2_iocb_set_sem_locked(iocb);
2538 2533
2539 ret = ocfs2_rw_lock(inode, 0); 2534 ret = ocfs2_rw_lock(inode, 0);
@@ -2575,10 +2570,9 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2575 } 2570 }
2576 2571
2577bail: 2572bail:
2578 if (have_alloc_sem) { 2573 if (have_alloc_sem)
2579 inode_dio_done(inode);
2580 ocfs2_iocb_clear_sem_locked(iocb); 2574 ocfs2_iocb_clear_sem_locked(iocb);
2581 } 2575
2582 if (rw_level != -1) 2576 if (rw_level != -1)
2583 ocfs2_rw_unlock(inode, rw_level); 2577 ocfs2_rw_unlock(inode, rw_level);
2584 2578