diff options
author | Christoph Hellwig <hch@infradead.org> | 2011-06-24 14:29:43 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-07-20 20:47:46 -0400 |
commit | bd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3 (patch) | |
tree | ef5341c7747f809aec7ae233f6e3ef90af39be5f /fs/direct-io.c | |
parent | f9b5570d7fdedff32a2e78102bfb54cd1b12b289 (diff) |
fs: kill i_alloc_sem
i_alloc_sem is a rather special rw_semaphore. It's the last one that may
be released by a non-owner, and it's write side is always mirrored by
real exclusion. It's intended use it to wait for all pending direct I/O
requests to finish before starting a truncate.
Replace it with a hand-grown construct:
- exclusion for truncates is already guaranteed by i_mutex, so it can
simply fall way
- the reader side is replaced by an i_dio_count member in struct inode
that counts the number of pending direct I/O requests. Truncate can't
proceed as long as it's non-zero
- when i_dio_count reaches non-zero we wake up a pending truncate using
wake_up_bit on a new bit in i_flags
- new references to i_dio_count can't appear while we are waiting for
it to read zero because the direct I/O count always needs i_mutex
(or an equivalent like XFS's i_iolock) for starting a new operation.
This scheme is much simpler, and saves the space of a spinlock_t and a
struct list_head in struct inode (typically 160 bits on a non-debug 64-bit
system).
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r-- | fs/direct-io.c | 65 |
1 files changed, 51 insertions, 14 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index 98ce3ac0d94b..354cbdbc14bd 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -135,6 +135,50 @@ struct dio { | |||
135 | struct page *pages[DIO_PAGES]; /* page buffer */ | 135 | struct page *pages[DIO_PAGES]; /* page buffer */ |
136 | }; | 136 | }; |
137 | 137 | ||
138 | static void __inode_dio_wait(struct inode *inode) | ||
139 | { | ||
140 | wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); | ||
141 | DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); | ||
142 | |||
143 | do { | ||
144 | prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE); | ||
145 | if (atomic_read(&inode->i_dio_count)) | ||
146 | schedule(); | ||
147 | } while (atomic_read(&inode->i_dio_count)); | ||
148 | finish_wait(wq, &q.wait); | ||
149 | } | ||
150 | |||
151 | /** | ||
152 | * inode_dio_wait - wait for outstanding DIO requests to finish | ||
153 | * @inode: inode to wait for | ||
154 | * | ||
155 | * Waits for all pending direct I/O requests to finish so that we can | ||
156 | * proceed with a truncate or equivalent operation. | ||
157 | * | ||
158 | * Must be called under a lock that serializes taking new references | ||
159 | * to i_dio_count, usually by inode->i_mutex. | ||
160 | */ | ||
161 | void inode_dio_wait(struct inode *inode) | ||
162 | { | ||
163 | if (atomic_read(&inode->i_dio_count)) | ||
164 | __inode_dio_wait(inode); | ||
165 | } | ||
166 | EXPORT_SYMBOL_GPL(inode_dio_wait); | ||
167 | |||
168 | /* | ||
169 | * inode_dio_done - signal finish of a direct I/O requests | ||
170 | * @inode: inode the direct I/O happens on | ||
171 | * | ||
172 | * This is called once we've finished processing a direct I/O request, | ||
173 | * and is used to wake up callers waiting for direct I/O to be quiesced. | ||
174 | */ | ||
175 | void inode_dio_done(struct inode *inode) | ||
176 | { | ||
177 | if (atomic_dec_and_test(&inode->i_dio_count)) | ||
178 | wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); | ||
179 | } | ||
180 | EXPORT_SYMBOL_GPL(inode_dio_done); | ||
181 | |||
138 | /* | 182 | /* |
139 | * How many pages are in the queue? | 183 | * How many pages are in the queue? |
140 | */ | 184 | */ |
@@ -254,9 +298,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is | |||
254 | } | 298 | } |
255 | 299 | ||
256 | if (dio->flags & DIO_LOCKING) | 300 | if (dio->flags & DIO_LOCKING) |
257 | /* lockdep: non-owner release */ | 301 | inode_dio_done(dio->inode); |
258 | up_read_non_owner(&dio->inode->i_alloc_sem); | ||
259 | |||
260 | return ret; | 302 | return ret; |
261 | } | 303 | } |
262 | 304 | ||
@@ -980,9 +1022,6 @@ out: | |||
980 | return ret; | 1022 | return ret; |
981 | } | 1023 | } |
982 | 1024 | ||
983 | /* | ||
984 | * Releases both i_mutex and i_alloc_sem | ||
985 | */ | ||
986 | static ssize_t | 1025 | static ssize_t |
987 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | 1026 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, |
988 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, | 1027 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, |
@@ -1146,15 +1185,14 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1146 | * For writes this function is called under i_mutex and returns with | 1185 | * For writes this function is called under i_mutex and returns with |
1147 | * i_mutex held, for reads, i_mutex is not held on entry, but it is | 1186 | * i_mutex held, for reads, i_mutex is not held on entry, but it is |
1148 | * taken and dropped again before returning. | 1187 | * taken and dropped again before returning. |
1149 | * For reads and writes i_alloc_sem is taken in shared mode and released | 1188 | * The i_dio_count counter keeps track of the number of outstanding |
1150 | * on I/O completion (which may happen asynchronously after returning to | 1189 | * direct I/O requests, and truncate waits for it to reach zero. |
1151 | * the caller). | 1190 | * New references to i_dio_count must only be grabbed with i_mutex |
1191 | * held. | ||
1152 | * | 1192 | * |
1153 | * - if the flags value does NOT contain DIO_LOCKING we don't use any | 1193 | * - if the flags value does NOT contain DIO_LOCKING we don't use any |
1154 | * internal locking but rather rely on the filesystem to synchronize | 1194 | * internal locking but rather rely on the filesystem to synchronize |
1155 | * direct I/O reads/writes versus each other and truncate. | 1195 | * direct I/O reads/writes versus each other and truncate. |
1156 | * For reads and writes both i_mutex and i_alloc_sem are not held on | ||
1157 | * entry and are never taken. | ||
1158 | */ | 1196 | */ |
1159 | ssize_t | 1197 | ssize_t |
1160 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1198 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
@@ -1234,10 +1272,9 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1234 | } | 1272 | } |
1235 | 1273 | ||
1236 | /* | 1274 | /* |
1237 | * Will be released at I/O completion, possibly in a | 1275 | * Will be decremented at I/O completion time. |
1238 | * different thread. | ||
1239 | */ | 1276 | */ |
1240 | down_read_non_owner(&inode->i_alloc_sem); | 1277 | atomic_inc(&inode->i_dio_count); |
1241 | } | 1278 | } |
1242 | 1279 | ||
1243 | /* | 1280 | /* |