aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-06-24 14:29:43 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-07-20 20:47:46 -0400
commitbd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3 (patch)
treeef5341c7747f809aec7ae233f6e3ef90af39be5f /fs/ocfs2
parentf9b5570d7fdedff32a2e78102bfb54cd1b12b289 (diff)
fs: kill i_alloc_sem
i_alloc_sem is a rather special rw_semaphore. It's the last one that may be released by a non-owner, and it's write side is always mirrored by real exclusion. It's intended use it to wait for all pending direct I/O requests to finish before starting a truncate. Replace it with a hand-grown construct: - exclusion for truncates is already guaranteed by i_mutex, so it can simply fall way - the reader side is replaced by an i_dio_count member in struct inode that counts the number of pending direct I/O requests. Truncate can't proceed as long as it's non-zero - when i_dio_count reaches non-zero we wake up a pending truncate using wake_up_bit on a new bit in i_flags - new references to i_dio_count can't appear while we are waiting for it to read zero because the direct I/O count always needs i_mutex (or an equivalent like XFS's i_iolock) for starting a new operation. This scheme is much simpler, and saves the space of a spinlock_t and a struct list_head in struct inode (typically 160 bits on a non-debug 64-bit system). Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/file.c15
2 files changed, 10 insertions, 12 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index ac97bca282d2..de1d3953599d 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -551,9 +551,8 @@ bail:
551 551
552/* 552/*
553 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're 553 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
554 * particularly interested in the aio/dio case. Like the core uses 554 * particularly interested in the aio/dio case. We use the rw_lock DLM lock
555 * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from 555 * to protect io on one node from truncation on another.
556 * truncation on another.
557 */ 556 */
558static void ocfs2_dio_end_io(struct kiocb *iocb, 557static void ocfs2_dio_end_io(struct kiocb *iocb,
559 loff_t offset, 558 loff_t offset,
@@ -569,7 +568,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
569 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 568 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
570 569
571 if (ocfs2_iocb_is_sem_locked(iocb)) { 570 if (ocfs2_iocb_is_sem_locked(iocb)) {
572 up_read(&inode->i_alloc_sem); 571 inode_dio_done(inode);
573 ocfs2_iocb_clear_sem_locked(iocb); 572 ocfs2_iocb_clear_sem_locked(iocb);
574 } 573 }
575 574
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1406c37a5722..2c3a465514a2 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2236,9 +2236,9 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2236 ocfs2_iocb_clear_sem_locked(iocb); 2236 ocfs2_iocb_clear_sem_locked(iocb);
2237 2237
2238relock: 2238relock:
2239 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ 2239 /* to match setattr's i_mutex -> rw_lock ordering */
2240 if (direct_io) { 2240 if (direct_io) {
2241 down_read(&inode->i_alloc_sem); 2241 atomic_inc(&inode->i_dio_count);
2242 have_alloc_sem = 1; 2242 have_alloc_sem = 1;
2243 /* communicate with ocfs2_dio_end_io */ 2243 /* communicate with ocfs2_dio_end_io */
2244 ocfs2_iocb_set_sem_locked(iocb); 2244 ocfs2_iocb_set_sem_locked(iocb);
@@ -2290,7 +2290,7 @@ relock:
2290 */ 2290 */
2291 if (direct_io && !can_do_direct) { 2291 if (direct_io && !can_do_direct) {
2292 ocfs2_rw_unlock(inode, rw_level); 2292 ocfs2_rw_unlock(inode, rw_level);
2293 up_read(&inode->i_alloc_sem); 2293 inode_dio_done(inode);
2294 2294
2295 have_alloc_sem = 0; 2295 have_alloc_sem = 0;
2296 rw_level = -1; 2296 rw_level = -1;
@@ -2361,8 +2361,7 @@ out_dio:
2361 /* 2361 /*
2362 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io 2362 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
2363 * function pointer which is called when o_direct io completes so that 2363 * function pointer which is called when o_direct io completes so that
2364 * it can unlock our rw lock. (it's the clustered equivalent of 2364 * it can unlock our rw lock.
2365 * i_alloc_sem; protects truncate from racing with pending ios).
2366 * Unfortunately there are error cases which call end_io and others 2365 * Unfortunately there are error cases which call end_io and others
2367 * that don't. so we don't have to unlock the rw_lock if either an 2366 * that don't. so we don't have to unlock the rw_lock if either an
2368 * async dio is going to do it in the future or an end_io after an 2367 * async dio is going to do it in the future or an end_io after an
@@ -2379,7 +2378,7 @@ out:
2379 2378
2380out_sems: 2379out_sems:
2381 if (have_alloc_sem) { 2380 if (have_alloc_sem) {
2382 up_read(&inode->i_alloc_sem); 2381 inode_dio_done(inode);
2383 ocfs2_iocb_clear_sem_locked(iocb); 2382 ocfs2_iocb_clear_sem_locked(iocb);
2384 } 2383 }
2385 2384
@@ -2531,8 +2530,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2531 * need locks to protect pending reads from racing with truncate. 2530 * need locks to protect pending reads from racing with truncate.
2532 */ 2531 */
2533 if (filp->f_flags & O_DIRECT) { 2532 if (filp->f_flags & O_DIRECT) {
2534 down_read(&inode->i_alloc_sem);
2535 have_alloc_sem = 1; 2533 have_alloc_sem = 1;
2534 atomic_inc(&inode->i_dio_count);
2536 ocfs2_iocb_set_sem_locked(iocb); 2535 ocfs2_iocb_set_sem_locked(iocb);
2537 2536
2538 ret = ocfs2_rw_lock(inode, 0); 2537 ret = ocfs2_rw_lock(inode, 0);
@@ -2575,7 +2574,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2575 2574
2576bail: 2575bail:
2577 if (have_alloc_sem) { 2576 if (have_alloc_sem) {
2578 up_read(&inode->i_alloc_sem); 2577 inode_dio_done(inode);
2579 ocfs2_iocb_clear_sem_locked(iocb); 2578 ocfs2_iocb_clear_sem_locked(iocb);
2580 } 2579 }
2581 if (rw_level != -1) 2580 if (rw_level != -1)