diff options
author | Christoph Hellwig <hch@infradead.org> | 2011-06-24 14:29:43 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-07-20 20:47:46 -0400 |
commit | bd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3 (patch) | |
tree | ef5341c7747f809aec7ae233f6e3ef90af39be5f /fs/ocfs2 | |
parent | f9b5570d7fdedff32a2e78102bfb54cd1b12b289 (diff) |
fs: kill i_alloc_sem
i_alloc_sem is a rather special rw_semaphore. It's the last one that may
be released by a non-owner, and it's write side is always mirrored by
real exclusion. It's intended use it to wait for all pending direct I/O
requests to finish before starting a truncate.
Replace it with a hand-grown construct:
- exclusion for truncates is already guaranteed by i_mutex, so it can
simply fall way
- the reader side is replaced by an i_dio_count member in struct inode
that counts the number of pending direct I/O requests. Truncate can't
proceed as long as it's non-zero
- when i_dio_count reaches non-zero we wake up a pending truncate using
wake_up_bit on a new bit in i_flags
- new references to i_dio_count can't appear while we are waiting for
it to read zero because the direct I/O count always needs i_mutex
(or an equivalent like XFS's i_iolock) for starting a new operation.
This scheme is much simpler, and saves the space of a spinlock_t and a
struct list_head in struct inode (typically 160 bits on a non-debug 64-bit
system).
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/aops.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 15 |
2 files changed, 10 insertions, 12 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index ac97bca282d2..de1d3953599d 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -551,9 +551,8 @@ bail: | |||
551 | 551 | ||
552 | /* | 552 | /* |
553 | * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're | 553 | * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're |
554 | * particularly interested in the aio/dio case. Like the core uses | 554 | * particularly interested in the aio/dio case. We use the rw_lock DLM lock |
555 | * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from | 555 | * to protect io on one node from truncation on another. |
556 | * truncation on another. | ||
557 | */ | 556 | */ |
558 | static void ocfs2_dio_end_io(struct kiocb *iocb, | 557 | static void ocfs2_dio_end_io(struct kiocb *iocb, |
559 | loff_t offset, | 558 | loff_t offset, |
@@ -569,7 +568,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, | |||
569 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); | 568 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); |
570 | 569 | ||
571 | if (ocfs2_iocb_is_sem_locked(iocb)) { | 570 | if (ocfs2_iocb_is_sem_locked(iocb)) { |
572 | up_read(&inode->i_alloc_sem); | 571 | inode_dio_done(inode); |
573 | ocfs2_iocb_clear_sem_locked(iocb); | 572 | ocfs2_iocb_clear_sem_locked(iocb); |
574 | } | 573 | } |
575 | 574 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 1406c37a5722..2c3a465514a2 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2236,9 +2236,9 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
2236 | ocfs2_iocb_clear_sem_locked(iocb); | 2236 | ocfs2_iocb_clear_sem_locked(iocb); |
2237 | 2237 | ||
2238 | relock: | 2238 | relock: |
2239 | /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ | 2239 | /* to match setattr's i_mutex -> rw_lock ordering */ |
2240 | if (direct_io) { | 2240 | if (direct_io) { |
2241 | down_read(&inode->i_alloc_sem); | 2241 | atomic_inc(&inode->i_dio_count); |
2242 | have_alloc_sem = 1; | 2242 | have_alloc_sem = 1; |
2243 | /* communicate with ocfs2_dio_end_io */ | 2243 | /* communicate with ocfs2_dio_end_io */ |
2244 | ocfs2_iocb_set_sem_locked(iocb); | 2244 | ocfs2_iocb_set_sem_locked(iocb); |
@@ -2290,7 +2290,7 @@ relock: | |||
2290 | */ | 2290 | */ |
2291 | if (direct_io && !can_do_direct) { | 2291 | if (direct_io && !can_do_direct) { |
2292 | ocfs2_rw_unlock(inode, rw_level); | 2292 | ocfs2_rw_unlock(inode, rw_level); |
2293 | up_read(&inode->i_alloc_sem); | 2293 | inode_dio_done(inode); |
2294 | 2294 | ||
2295 | have_alloc_sem = 0; | 2295 | have_alloc_sem = 0; |
2296 | rw_level = -1; | 2296 | rw_level = -1; |
@@ -2361,8 +2361,7 @@ out_dio: | |||
2361 | /* | 2361 | /* |
2362 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io | 2362 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io |
2363 | * function pointer which is called when o_direct io completes so that | 2363 | * function pointer which is called when o_direct io completes so that |
2364 | * it can unlock our rw lock. (it's the clustered equivalent of | 2364 | * it can unlock our rw lock. |
2365 | * i_alloc_sem; protects truncate from racing with pending ios). | ||
2366 | * Unfortunately there are error cases which call end_io and others | 2365 | * Unfortunately there are error cases which call end_io and others |
2367 | * that don't. so we don't have to unlock the rw_lock if either an | 2366 | * that don't. so we don't have to unlock the rw_lock if either an |
2368 | * async dio is going to do it in the future or an end_io after an | 2367 | * async dio is going to do it in the future or an end_io after an |
@@ -2379,7 +2378,7 @@ out: | |||
2379 | 2378 | ||
2380 | out_sems: | 2379 | out_sems: |
2381 | if (have_alloc_sem) { | 2380 | if (have_alloc_sem) { |
2382 | up_read(&inode->i_alloc_sem); | 2381 | inode_dio_done(inode); |
2383 | ocfs2_iocb_clear_sem_locked(iocb); | 2382 | ocfs2_iocb_clear_sem_locked(iocb); |
2384 | } | 2383 | } |
2385 | 2384 | ||
@@ -2531,8 +2530,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
2531 | * need locks to protect pending reads from racing with truncate. | 2530 | * need locks to protect pending reads from racing with truncate. |
2532 | */ | 2531 | */ |
2533 | if (filp->f_flags & O_DIRECT) { | 2532 | if (filp->f_flags & O_DIRECT) { |
2534 | down_read(&inode->i_alloc_sem); | ||
2535 | have_alloc_sem = 1; | 2533 | have_alloc_sem = 1; |
2534 | atomic_inc(&inode->i_dio_count); | ||
2536 | ocfs2_iocb_set_sem_locked(iocb); | 2535 | ocfs2_iocb_set_sem_locked(iocb); |
2537 | 2536 | ||
2538 | ret = ocfs2_rw_lock(inode, 0); | 2537 | ret = ocfs2_rw_lock(inode, 0); |
@@ -2575,7 +2574,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
2575 | 2574 | ||
2576 | bail: | 2575 | bail: |
2577 | if (have_alloc_sem) { | 2576 | if (have_alloc_sem) { |
2578 | up_read(&inode->i_alloc_sem); | 2577 | inode_dio_done(inode); |
2579 | ocfs2_iocb_clear_sem_locked(iocb); | 2578 | ocfs2_iocb_clear_sem_locked(iocb); |
2580 | } | 2579 | } |
2581 | if (rw_level != -1) | 2580 | if (rw_level != -1) |