aboutsummaryrefslogtreecommitdiffstats
path: root/fs/direct-io.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-06-24 14:29:43 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-07-20 20:47:46 -0400
commitbd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3 (patch)
treeef5341c7747f809aec7ae233f6e3ef90af39be5f /fs/direct-io.c
parentf9b5570d7fdedff32a2e78102bfb54cd1b12b289 (diff)
fs: kill i_alloc_sem
i_alloc_sem is a rather special rw_semaphore. It's the last one that may be released by a non-owner, and it's write side is always mirrored by real exclusion. It's intended use it to wait for all pending direct I/O requests to finish before starting a truncate. Replace it with a hand-grown construct: - exclusion for truncates is already guaranteed by i_mutex, so it can simply fall way - the reader side is replaced by an i_dio_count member in struct inode that counts the number of pending direct I/O requests. Truncate can't proceed as long as it's non-zero - when i_dio_count reaches non-zero we wake up a pending truncate using wake_up_bit on a new bit in i_flags - new references to i_dio_count can't appear while we are waiting for it to read zero because the direct I/O count always needs i_mutex (or an equivalent like XFS's i_iolock) for starting a new operation. This scheme is much simpler, and saves the space of a spinlock_t and a struct list_head in struct inode (typically 160 bits on a non-debug 64-bit system). Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r--fs/direct-io.c65
1 files changed, 51 insertions, 14 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 98ce3ac0d94b..354cbdbc14bd 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -135,6 +135,50 @@ struct dio {
135 struct page *pages[DIO_PAGES]; /* page buffer */ 135 struct page *pages[DIO_PAGES]; /* page buffer */
136}; 136};
137 137
138static void __inode_dio_wait(struct inode *inode)
139{
140 wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
141 DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
142
143 do {
144 prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
145 if (atomic_read(&inode->i_dio_count))
146 schedule();
147 } while (atomic_read(&inode->i_dio_count));
148 finish_wait(wq, &q.wait);
149}
150
151/**
152 * inode_dio_wait - wait for outstanding DIO requests to finish
153 * @inode: inode to wait for
154 *
155 * Waits for all pending direct I/O requests to finish so that we can
156 * proceed with a truncate or equivalent operation.
157 *
158 * Must be called under a lock that serializes taking new references
159 * to i_dio_count, usually by inode->i_mutex.
160 */
161void inode_dio_wait(struct inode *inode)
162{
163 if (atomic_read(&inode->i_dio_count))
164 __inode_dio_wait(inode);
165}
166EXPORT_SYMBOL_GPL(inode_dio_wait);
167
168/*
169 * inode_dio_done - signal finish of a direct I/O requests
170 * @inode: inode the direct I/O happens on
171 *
172 * This is called once we've finished processing a direct I/O request,
173 * and is used to wake up callers waiting for direct I/O to be quiesced.
174 */
175void inode_dio_done(struct inode *inode)
176{
177 if (atomic_dec_and_test(&inode->i_dio_count))
178 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
179}
180EXPORT_SYMBOL_GPL(inode_dio_done);
181
138/* 182/*
139 * How many pages are in the queue? 183 * How many pages are in the queue?
140 */ 184 */
@@ -254,9 +298,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
254 } 298 }
255 299
256 if (dio->flags & DIO_LOCKING) 300 if (dio->flags & DIO_LOCKING)
257 /* lockdep: non-owner release */ 301 inode_dio_done(dio->inode);
258 up_read_non_owner(&dio->inode->i_alloc_sem);
259
260 return ret; 302 return ret;
261} 303}
262 304
@@ -980,9 +1022,6 @@ out:
980 return ret; 1022 return ret;
981} 1023}
982 1024
983/*
984 * Releases both i_mutex and i_alloc_sem
985 */
986static ssize_t 1025static ssize_t
987direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 1026direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
988 const struct iovec *iov, loff_t offset, unsigned long nr_segs, 1027 const struct iovec *iov, loff_t offset, unsigned long nr_segs,
@@ -1146,15 +1185,14 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1146 * For writes this function is called under i_mutex and returns with 1185 * For writes this function is called under i_mutex and returns with
1147 * i_mutex held, for reads, i_mutex is not held on entry, but it is 1186 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1148 * taken and dropped again before returning. 1187 * taken and dropped again before returning.
1149 * For reads and writes i_alloc_sem is taken in shared mode and released 1188 * The i_dio_count counter keeps track of the number of outstanding
1150 * on I/O completion (which may happen asynchronously after returning to 1189 * direct I/O requests, and truncate waits for it to reach zero.
1151 * the caller). 1190 * New references to i_dio_count must only be grabbed with i_mutex
1191 * held.
1152 * 1192 *
1153 * - if the flags value does NOT contain DIO_LOCKING we don't use any 1193 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1154 * internal locking but rather rely on the filesystem to synchronize 1194 * internal locking but rather rely on the filesystem to synchronize
1155 * direct I/O reads/writes versus each other and truncate. 1195 * direct I/O reads/writes versus each other and truncate.
1156 * For reads and writes both i_mutex and i_alloc_sem are not held on
1157 * entry and are never taken.
1158 */ 1196 */
1159ssize_t 1197ssize_t
1160__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1198__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
@@ -1234,10 +1272,9 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1234 } 1272 }
1235 1273
1236 /* 1274 /*
1237 * Will be released at I/O completion, possibly in a 1275 * Will be decremented at I/O completion time.
1238 * different thread.
1239 */ 1276 */
1240 down_read_non_owner(&inode->i_alloc_sem); 1277 atomic_inc(&inode->i_dio_count);
1241 } 1278 }
1242 1279
1243 /* 1280 /*