diff options
Diffstat (limited to 'fs/ocfs2/file.c')
-rw-r--r-- | fs/ocfs2/file.c | 165 |
1 files changed, 15 insertions, 150 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 7cb38fdca229..c18ab45f8d21 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1381,44 +1381,6 @@ out: | |||
1381 | return ret; | 1381 | return ret; |
1382 | } | 1382 | } |
1383 | 1383 | ||
1384 | /* | ||
1385 | * Will look for holes and unwritten extents in the range starting at | ||
1386 | * pos for count bytes (inclusive). | ||
1387 | */ | ||
1388 | static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos, | ||
1389 | size_t count) | ||
1390 | { | ||
1391 | int ret = 0; | ||
1392 | unsigned int extent_flags; | ||
1393 | u32 cpos, clusters, extent_len, phys_cpos; | ||
1394 | struct super_block *sb = inode->i_sb; | ||
1395 | |||
1396 | cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits; | ||
1397 | clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos; | ||
1398 | |||
1399 | while (clusters) { | ||
1400 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len, | ||
1401 | &extent_flags); | ||
1402 | if (ret < 0) { | ||
1403 | mlog_errno(ret); | ||
1404 | goto out; | ||
1405 | } | ||
1406 | |||
1407 | if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) { | ||
1408 | ret = 1; | ||
1409 | break; | ||
1410 | } | ||
1411 | |||
1412 | if (extent_len > clusters) | ||
1413 | extent_len = clusters; | ||
1414 | |||
1415 | clusters -= extent_len; | ||
1416 | cpos += extent_len; | ||
1417 | } | ||
1418 | out: | ||
1419 | return ret; | ||
1420 | } | ||
1421 | |||
1422 | static int ocfs2_write_remove_suid(struct inode *inode) | 1384 | static int ocfs2_write_remove_suid(struct inode *inode) |
1423 | { | 1385 | { |
1424 | int ret; | 1386 | int ret; |
@@ -2129,18 +2091,12 @@ out: | |||
2129 | 2091 | ||
2130 | static int ocfs2_prepare_inode_for_write(struct file *file, | 2092 | static int ocfs2_prepare_inode_for_write(struct file *file, |
2131 | loff_t pos, | 2093 | loff_t pos, |
2132 | size_t count, | 2094 | size_t count) |
2133 | int appending, | ||
2134 | int *direct_io, | ||
2135 | int *has_refcount) | ||
2136 | { | 2095 | { |
2137 | int ret = 0, meta_level = 0; | 2096 | int ret = 0, meta_level = 0; |
2138 | struct dentry *dentry = file->f_path.dentry; | 2097 | struct dentry *dentry = file->f_path.dentry; |
2139 | struct inode *inode = d_inode(dentry); | 2098 | struct inode *inode = d_inode(dentry); |
2140 | loff_t end; | 2099 | loff_t end; |
2141 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
2142 | int full_coherency = !(osb->s_mount_opt & | ||
2143 | OCFS2_MOUNT_COHERENCY_BUFFERED); | ||
2144 | 2100 | ||
2145 | /* | 2101 | /* |
2146 | * We start with a read level meta lock and only jump to an ex | 2102 | * We start with a read level meta lock and only jump to an ex |
@@ -2189,10 +2145,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file, | |||
2189 | pos, | 2145 | pos, |
2190 | count, | 2146 | count, |
2191 | &meta_level); | 2147 | &meta_level); |
2192 | if (has_refcount) | ||
2193 | *has_refcount = 1; | ||
2194 | if (direct_io) | ||
2195 | *direct_io = 0; | ||
2196 | } | 2148 | } |
2197 | 2149 | ||
2198 | if (ret < 0) { | 2150 | if (ret < 0) { |
@@ -2200,67 +2152,12 @@ static int ocfs2_prepare_inode_for_write(struct file *file, | |||
2200 | goto out_unlock; | 2152 | goto out_unlock; |
2201 | } | 2153 | } |
2202 | 2154 | ||
2203 | /* | ||
2204 | * Skip the O_DIRECT checks if we don't need | ||
2205 | * them. | ||
2206 | */ | ||
2207 | if (!direct_io || !(*direct_io)) | ||
2208 | break; | ||
2209 | |||
2210 | /* | ||
2211 | * There's no sane way to do direct writes to an inode | ||
2212 | * with inline data. | ||
2213 | */ | ||
2214 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
2215 | *direct_io = 0; | ||
2216 | break; | ||
2217 | } | ||
2218 | |||
2219 | /* | ||
2220 | * Allowing concurrent direct writes means | ||
2221 | * i_size changes wouldn't be synchronized, so | ||
2222 | * one node could wind up truncating another | ||
2223 | * nodes writes. | ||
2224 | */ | ||
2225 | if (end > i_size_read(inode) && !full_coherency) { | ||
2226 | *direct_io = 0; | ||
2227 | break; | ||
2228 | } | ||
2229 | |||
2230 | /* | ||
2231 | * Fallback to old way if the feature bit is not set. | ||
2232 | */ | ||
2233 | if (end > i_size_read(inode) && | ||
2234 | !ocfs2_supports_append_dio(osb)) { | ||
2235 | *direct_io = 0; | ||
2236 | break; | ||
2237 | } | ||
2238 | |||
2239 | /* | ||
2240 | * We don't fill holes during direct io, so | ||
2241 | * check for them here. If any are found, the | ||
2242 | * caller will have to retake some cluster | ||
2243 | * locks and initiate the io as buffered. | ||
2244 | */ | ||
2245 | ret = ocfs2_check_range_for_holes(inode, pos, count); | ||
2246 | if (ret == 1) { | ||
2247 | /* | ||
2248 | * Fallback to old way if the feature bit is not set. | ||
2249 | * Otherwise try dio first and then complete the rest | ||
2250 | * request through buffer io. | ||
2251 | */ | ||
2252 | if (!ocfs2_supports_append_dio(osb)) | ||
2253 | *direct_io = 0; | ||
2254 | ret = 0; | ||
2255 | } else if (ret < 0) | ||
2256 | mlog_errno(ret); | ||
2257 | break; | 2155 | break; |
2258 | } | 2156 | } |
2259 | 2157 | ||
2260 | out_unlock: | 2158 | out_unlock: |
2261 | trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, | 2159 | trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, |
2262 | pos, appending, count, | 2160 | pos, count); |
2263 | direct_io, has_refcount); | ||
2264 | 2161 | ||
2265 | if (meta_level >= 0) | 2162 | if (meta_level >= 0) |
2266 | ocfs2_inode_unlock(inode, meta_level); | 2163 | ocfs2_inode_unlock(inode, meta_level); |
@@ -2272,18 +2169,16 @@ out: | |||
2272 | static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | 2169 | static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, |
2273 | struct iov_iter *from) | 2170 | struct iov_iter *from) |
2274 | { | 2171 | { |
2275 | int direct_io, appending, rw_level; | 2172 | int direct_io, rw_level; |
2276 | int can_do_direct, has_refcount = 0; | ||
2277 | ssize_t written = 0; | 2173 | ssize_t written = 0; |
2278 | ssize_t ret; | 2174 | ssize_t ret; |
2279 | size_t count = iov_iter_count(from), orig_count; | 2175 | size_t count = iov_iter_count(from); |
2280 | struct file *file = iocb->ki_filp; | 2176 | struct file *file = iocb->ki_filp; |
2281 | struct inode *inode = file_inode(file); | 2177 | struct inode *inode = file_inode(file); |
2282 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2178 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2283 | int full_coherency = !(osb->s_mount_opt & | 2179 | int full_coherency = !(osb->s_mount_opt & |
2284 | OCFS2_MOUNT_COHERENCY_BUFFERED); | 2180 | OCFS2_MOUNT_COHERENCY_BUFFERED); |
2285 | int unaligned_dio = 0; | 2181 | void *saved_ki_complete = NULL; |
2286 | int dropped_dio = 0; | ||
2287 | int append_write = ((iocb->ki_pos + count) >= | 2182 | int append_write = ((iocb->ki_pos + count) >= |
2288 | i_size_read(inode) ? 1 : 0); | 2183 | i_size_read(inode) ? 1 : 0); |
2289 | 2184 | ||
@@ -2296,12 +2191,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2296 | if (count == 0) | 2191 | if (count == 0) |
2297 | return 0; | 2192 | return 0; |
2298 | 2193 | ||
2299 | appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0; | ||
2300 | direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; | 2194 | direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; |
2301 | 2195 | ||
2302 | inode_lock(inode); | 2196 | inode_lock(inode); |
2303 | 2197 | ||
2304 | relock: | ||
2305 | /* | 2198 | /* |
2306 | * Concurrent O_DIRECT writes are allowed with | 2199 | * Concurrent O_DIRECT writes are allowed with |
2307 | * mount_option "coherency=buffered". | 2200 | * mount_option "coherency=buffered". |
@@ -2334,7 +2227,6 @@ relock: | |||
2334 | ocfs2_inode_unlock(inode, 1); | 2227 | ocfs2_inode_unlock(inode, 1); |
2335 | } | 2228 | } |
2336 | 2229 | ||
2337 | orig_count = iov_iter_count(from); | ||
2338 | ret = generic_write_checks(iocb, from); | 2230 | ret = generic_write_checks(iocb, from); |
2339 | if (ret <= 0) { | 2231 | if (ret <= 0) { |
2340 | if (ret) | 2232 | if (ret) |
@@ -2343,41 +2235,18 @@ relock: | |||
2343 | } | 2235 | } |
2344 | count = ret; | 2236 | count = ret; |
2345 | 2237 | ||
2346 | can_do_direct = direct_io; | 2238 | ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count); |
2347 | ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending, | ||
2348 | &can_do_direct, &has_refcount); | ||
2349 | if (ret < 0) { | 2239 | if (ret < 0) { |
2350 | mlog_errno(ret); | 2240 | mlog_errno(ret); |
2351 | goto out; | 2241 | goto out; |
2352 | } | 2242 | } |
2353 | 2243 | ||
2354 | if (direct_io && !is_sync_kiocb(iocb)) | 2244 | if (direct_io && !is_sync_kiocb(iocb) && |
2355 | unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos); | 2245 | ocfs2_is_io_unaligned(inode, count, iocb->ki_pos)) { |
2356 | |||
2357 | /* | ||
2358 | * We can't complete the direct I/O as requested, fall back to | ||
2359 | * buffered I/O. | ||
2360 | */ | ||
2361 | if (direct_io && !can_do_direct) { | ||
2362 | ocfs2_rw_unlock(inode, rw_level); | ||
2363 | |||
2364 | rw_level = -1; | ||
2365 | |||
2366 | direct_io = 0; | ||
2367 | iocb->ki_flags &= ~IOCB_DIRECT; | ||
2368 | iov_iter_reexpand(from, orig_count); | ||
2369 | dropped_dio = 1; | ||
2370 | goto relock; | ||
2371 | } | ||
2372 | |||
2373 | if (unaligned_dio) { | ||
2374 | /* | 2246 | /* |
2375 | * Wait on previous unaligned aio to complete before | 2247 | * Make it a sync io if it's an unaligned aio. |
2376 | * proceeding. | ||
2377 | */ | 2248 | */ |
2378 | mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio); | 2249 | saved_ki_complete = xchg(&iocb->ki_complete, NULL); |
2379 | /* Mark the iocb as needing an unlock in ocfs2_dio_end_io */ | ||
2380 | ocfs2_iocb_set_unaligned_aio(iocb); | ||
2381 | } | 2250 | } |
2382 | 2251 | ||
2383 | /* communicate with ocfs2_dio_end_io */ | 2252 | /* communicate with ocfs2_dio_end_io */ |
@@ -2398,14 +2267,13 @@ relock: | |||
2398 | */ | 2267 | */ |
2399 | if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { | 2268 | if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { |
2400 | rw_level = -1; | 2269 | rw_level = -1; |
2401 | unaligned_dio = 0; | ||
2402 | } | 2270 | } |
2403 | 2271 | ||
2404 | if (unlikely(written <= 0)) | 2272 | if (unlikely(written <= 0)) |
2405 | goto no_sync; | 2273 | goto out; |
2406 | 2274 | ||
2407 | if (((file->f_flags & O_DSYNC) && !direct_io) || | 2275 | if (((file->f_flags & O_DSYNC) && !direct_io) || |
2408 | IS_SYNC(inode) || dropped_dio) { | 2276 | IS_SYNC(inode)) { |
2409 | ret = filemap_fdatawrite_range(file->f_mapping, | 2277 | ret = filemap_fdatawrite_range(file->f_mapping, |
2410 | iocb->ki_pos - written, | 2278 | iocb->ki_pos - written, |
2411 | iocb->ki_pos - 1); | 2279 | iocb->ki_pos - 1); |
@@ -2424,13 +2292,10 @@ relock: | |||
2424 | iocb->ki_pos - 1); | 2292 | iocb->ki_pos - 1); |
2425 | } | 2293 | } |
2426 | 2294 | ||
2427 | no_sync: | ||
2428 | if (unaligned_dio && ocfs2_iocb_is_unaligned_aio(iocb)) { | ||
2429 | ocfs2_iocb_clear_unaligned_aio(iocb); | ||
2430 | mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio); | ||
2431 | } | ||
2432 | |||
2433 | out: | 2295 | out: |
2296 | if (saved_ki_complete) | ||
2297 | xchg(&iocb->ki_complete, saved_ki_complete); | ||
2298 | |||
2434 | if (rw_level != -1) | 2299 | if (rw_level != -1) |
2435 | ocfs2_rw_unlock(inode, rw_level); | 2300 | ocfs2_rw_unlock(inode, rw_level); |
2436 | 2301 | ||