diff options
author | Gang He <ghe@suse.com> | 2018-01-31 19:15:25 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 20:18:35 -0500 |
commit | c4c2416ab0d656539cca5de4ae0a2ba8ec3d9eca (patch) | |
tree | 043ac9760184a10a566f6ba415d37e5df93fa802 /fs/ocfs2/file.c | |
parent | ac604d3cdb20a12d67131d20095c4c7905aeb722 (diff) |
ocfs2: nowait aio support
Return EAGAIN if any of the following checks fail for direct I/O:
- Cannot get the related locks immediately
- Blocks are not allocated at the write location, it will trigger block
allocation and block IO operations.
[ghe@suse.com: v4]
Link: http://lkml.kernel.org/r/1516007283-29932-4-git-send-email-ghe@suse.com
[ghe@suse.com: v2]
Link: http://lkml.kernel.org/r/1511944612-9629-4-git-send-email-ghe@suse.com
Link: http://lkml.kernel.org/r/1511775987-841-4-git-send-email-ghe@suse.com
Signed-off-by: Gang He <ghe@suse.com>
Reviewed-by: Alex Chen <alex.chen@huawei.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <ge.changwei@h3c.com>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2/file.c')
-rw-r--r-- | fs/ocfs2/file.c | 101 |
1 files changed, 80 insertions, 21 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index a1d051055472..5d1784a365a3 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -140,6 +140,8 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) | |||
140 | spin_unlock(&oi->ip_lock); | 140 | spin_unlock(&oi->ip_lock); |
141 | } | 141 | } |
142 | 142 | ||
143 | file->f_mode |= FMODE_NOWAIT; | ||
144 | |||
143 | leave: | 145 | leave: |
144 | return status; | 146 | return status; |
145 | } | 147 | } |
@@ -2132,12 +2134,12 @@ out: | |||
2132 | } | 2134 | } |
2133 | 2135 | ||
2134 | static int ocfs2_prepare_inode_for_write(struct file *file, | 2136 | static int ocfs2_prepare_inode_for_write(struct file *file, |
2135 | loff_t pos, | 2137 | loff_t pos, size_t count, int wait) |
2136 | size_t count) | ||
2137 | { | 2138 | { |
2138 | int ret = 0, meta_level = 0; | 2139 | int ret = 0, meta_level = 0, overwrite_io = 0; |
2139 | struct dentry *dentry = file->f_path.dentry; | 2140 | struct dentry *dentry = file->f_path.dentry; |
2140 | struct inode *inode = d_inode(dentry); | 2141 | struct inode *inode = d_inode(dentry); |
2142 | struct buffer_head *di_bh = NULL; | ||
2141 | loff_t end; | 2143 | loff_t end; |
2142 | 2144 | ||
2143 | /* | 2145 | /* |
@@ -2145,13 +2147,40 @@ static int ocfs2_prepare_inode_for_write(struct file *file, | |||
2145 | * if we need to make modifications here. | 2147 | * if we need to make modifications here. |
2146 | */ | 2148 | */ |
2147 | for(;;) { | 2149 | for(;;) { |
2148 | ret = ocfs2_inode_lock(inode, NULL, meta_level); | 2150 | if (wait) |
2151 | ret = ocfs2_inode_lock(inode, NULL, meta_level); | ||
2152 | else | ||
2153 | ret = ocfs2_try_inode_lock(inode, | ||
2154 | overwrite_io ? NULL : &di_bh, meta_level); | ||
2149 | if (ret < 0) { | 2155 | if (ret < 0) { |
2150 | meta_level = -1; | 2156 | meta_level = -1; |
2151 | mlog_errno(ret); | 2157 | if (ret != -EAGAIN) |
2158 | mlog_errno(ret); | ||
2152 | goto out; | 2159 | goto out; |
2153 | } | 2160 | } |
2154 | 2161 | ||
2162 | /* | ||
2163 | * Check if IO will overwrite allocated blocks in case | ||
2164 | * IOCB_NOWAIT flag is set. | ||
2165 | */ | ||
2166 | if (!wait && !overwrite_io) { | ||
2167 | overwrite_io = 1; | ||
2168 | if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) { | ||
2169 | ret = -EAGAIN; | ||
2170 | goto out_unlock; | ||
2171 | } | ||
2172 | |||
2173 | ret = ocfs2_overwrite_io(inode, di_bh, pos, count); | ||
2174 | brelse(di_bh); | ||
2175 | di_bh = NULL; | ||
2176 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
2177 | if (ret < 0) { | ||
2178 | if (ret != -EAGAIN) | ||
2179 | mlog_errno(ret); | ||
2180 | goto out_unlock; | ||
2181 | } | ||
2182 | } | ||
2183 | |||
2155 | /* Clear suid / sgid if necessary. We do this here | 2184 | /* Clear suid / sgid if necessary. We do this here |
2156 | * instead of later in the write path because | 2185 | * instead of later in the write path because |
2157 | * remove_suid() calls ->setattr without any hint that | 2186 | * remove_suid() calls ->setattr without any hint that |
@@ -2199,7 +2228,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file, | |||
2199 | 2228 | ||
2200 | out_unlock: | 2229 | out_unlock: |
2201 | trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, | 2230 | trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, |
2202 | pos, count); | 2231 | pos, count, wait); |
2232 | |||
2233 | brelse(di_bh); | ||
2203 | 2234 | ||
2204 | if (meta_level >= 0) | 2235 | if (meta_level >= 0) |
2205 | ocfs2_inode_unlock(inode, meta_level); | 2236 | ocfs2_inode_unlock(inode, meta_level); |
@@ -2211,7 +2242,7 @@ out: | |||
2211 | static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | 2242 | static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, |
2212 | struct iov_iter *from) | 2243 | struct iov_iter *from) |
2213 | { | 2244 | { |
2214 | int direct_io, rw_level; | 2245 | int rw_level; |
2215 | ssize_t written = 0; | 2246 | ssize_t written = 0; |
2216 | ssize_t ret; | 2247 | ssize_t ret; |
2217 | size_t count = iov_iter_count(from); | 2248 | size_t count = iov_iter_count(from); |
@@ -2223,6 +2254,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2223 | void *saved_ki_complete = NULL; | 2254 | void *saved_ki_complete = NULL; |
2224 | int append_write = ((iocb->ki_pos + count) >= | 2255 | int append_write = ((iocb->ki_pos + count) >= |
2225 | i_size_read(inode) ? 1 : 0); | 2256 | i_size_read(inode) ? 1 : 0); |
2257 | int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; | ||
2258 | int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0; | ||
2226 | 2259 | ||
2227 | trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, | 2260 | trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, |
2228 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2261 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
@@ -2230,12 +2263,17 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2230 | file->f_path.dentry->d_name.name, | 2263 | file->f_path.dentry->d_name.name, |
2231 | (unsigned int)from->nr_segs); /* GRRRRR */ | 2264 | (unsigned int)from->nr_segs); /* GRRRRR */ |
2232 | 2265 | ||
2266 | if (!direct_io && nowait) | ||
2267 | return -EOPNOTSUPP; | ||
2268 | |||
2233 | if (count == 0) | 2269 | if (count == 0) |
2234 | return 0; | 2270 | return 0; |
2235 | 2271 | ||
2236 | direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; | 2272 | if (nowait) { |
2237 | 2273 | if (!inode_trylock(inode)) | |
2238 | inode_lock(inode); | 2274 | return -EAGAIN; |
2275 | } else | ||
2276 | inode_lock(inode); | ||
2239 | 2277 | ||
2240 | /* | 2278 | /* |
2241 | * Concurrent O_DIRECT writes are allowed with | 2279 | * Concurrent O_DIRECT writes are allowed with |
@@ -2244,9 +2282,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2244 | */ | 2282 | */ |
2245 | rw_level = (!direct_io || full_coherency || append_write); | 2283 | rw_level = (!direct_io || full_coherency || append_write); |
2246 | 2284 | ||
2247 | ret = ocfs2_rw_lock(inode, rw_level); | 2285 | if (nowait) |
2286 | ret = ocfs2_try_rw_lock(inode, rw_level); | ||
2287 | else | ||
2288 | ret = ocfs2_rw_lock(inode, rw_level); | ||
2248 | if (ret < 0) { | 2289 | if (ret < 0) { |
2249 | mlog_errno(ret); | 2290 | if (ret != -EAGAIN) |
2291 | mlog_errno(ret); | ||
2250 | goto out_mutex; | 2292 | goto out_mutex; |
2251 | } | 2293 | } |
2252 | 2294 | ||
@@ -2260,9 +2302,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2260 | * other nodes to drop their caches. Buffered I/O | 2302 | * other nodes to drop their caches. Buffered I/O |
2261 | * already does this in write_begin(). | 2303 | * already does this in write_begin(). |
2262 | */ | 2304 | */ |
2263 | ret = ocfs2_inode_lock(inode, NULL, 1); | 2305 | if (nowait) |
2306 | ret = ocfs2_try_inode_lock(inode, NULL, 1); | ||
2307 | else | ||
2308 | ret = ocfs2_inode_lock(inode, NULL, 1); | ||
2264 | if (ret < 0) { | 2309 | if (ret < 0) { |
2265 | mlog_errno(ret); | 2310 | if (ret != -EAGAIN) |
2311 | mlog_errno(ret); | ||
2266 | goto out; | 2312 | goto out; |
2267 | } | 2313 | } |
2268 | 2314 | ||
@@ -2277,9 +2323,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2277 | } | 2323 | } |
2278 | count = ret; | 2324 | count = ret; |
2279 | 2325 | ||
2280 | ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count); | 2326 | ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait); |
2281 | if (ret < 0) { | 2327 | if (ret < 0) { |
2282 | mlog_errno(ret); | 2328 | if (ret != -EAGAIN) |
2329 | mlog_errno(ret); | ||
2283 | goto out; | 2330 | goto out; |
2284 | } | 2331 | } |
2285 | 2332 | ||
@@ -2355,6 +2402,8 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, | |||
2355 | int ret = 0, rw_level = -1, lock_level = 0; | 2402 | int ret = 0, rw_level = -1, lock_level = 0; |
2356 | struct file *filp = iocb->ki_filp; | 2403 | struct file *filp = iocb->ki_filp; |
2357 | struct inode *inode = file_inode(filp); | 2404 | struct inode *inode = file_inode(filp); |
2405 | int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; | ||
2406 | int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0; | ||
2358 | 2407 | ||
2359 | trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, | 2408 | trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, |
2360 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2409 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
@@ -2369,14 +2418,22 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, | |||
2369 | goto bail; | 2418 | goto bail; |
2370 | } | 2419 | } |
2371 | 2420 | ||
2421 | if (!direct_io && nowait) | ||
2422 | return -EOPNOTSUPP; | ||
2423 | |||
2372 | /* | 2424 | /* |
2373 | * buffered reads protect themselves in ->readpage(). O_DIRECT reads | 2425 | * buffered reads protect themselves in ->readpage(). O_DIRECT reads |
2374 | * need locks to protect pending reads from racing with truncate. | 2426 | * need locks to protect pending reads from racing with truncate. |
2375 | */ | 2427 | */ |
2376 | if (iocb->ki_flags & IOCB_DIRECT) { | 2428 | if (direct_io) { |
2377 | ret = ocfs2_rw_lock(inode, 0); | 2429 | if (nowait) |
2430 | ret = ocfs2_try_rw_lock(inode, 0); | ||
2431 | else | ||
2432 | ret = ocfs2_rw_lock(inode, 0); | ||
2433 | |||
2378 | if (ret < 0) { | 2434 | if (ret < 0) { |
2379 | mlog_errno(ret); | 2435 | if (ret != -EAGAIN) |
2436 | mlog_errno(ret); | ||
2380 | goto bail; | 2437 | goto bail; |
2381 | } | 2438 | } |
2382 | rw_level = 0; | 2439 | rw_level = 0; |
@@ -2393,9 +2450,11 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, | |||
2393 | * like i_size. This allows the checks down below | 2450 | * like i_size. This allows the checks down below |
2394 | * generic_file_aio_read() a chance of actually working. | 2451 | * generic_file_aio_read() a chance of actually working. |
2395 | */ | 2452 | */ |
2396 | ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level); | 2453 | ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level, |
2454 | !nowait); | ||
2397 | if (ret < 0) { | 2455 | if (ret < 0) { |
2398 | mlog_errno(ret); | 2456 | if (ret != -EAGAIN) |
2457 | mlog_errno(ret); | ||
2399 | goto bail; | 2458 | goto bail; |
2400 | } | 2459 | } |
2401 | ocfs2_inode_unlock(inode, lock_level); | 2460 | ocfs2_inode_unlock(inode, lock_level); |