aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/file.c')
-rw-r--r--fs/ocfs2/file.c165
1 files changed, 15 insertions, 150 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7cb38fdca229..c18ab45f8d21 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1381,44 +1381,6 @@ out:
1381 return ret; 1381 return ret;
1382} 1382}
1383 1383
1384/*
1385 * Will look for holes and unwritten extents in the range starting at
1386 * pos for count bytes (inclusive).
1387 */
1388static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
1389 size_t count)
1390{
1391 int ret = 0;
1392 unsigned int extent_flags;
1393 u32 cpos, clusters, extent_len, phys_cpos;
1394 struct super_block *sb = inode->i_sb;
1395
1396 cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
1397 clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
1398
1399 while (clusters) {
1400 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
1401 &extent_flags);
1402 if (ret < 0) {
1403 mlog_errno(ret);
1404 goto out;
1405 }
1406
1407 if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) {
1408 ret = 1;
1409 break;
1410 }
1411
1412 if (extent_len > clusters)
1413 extent_len = clusters;
1414
1415 clusters -= extent_len;
1416 cpos += extent_len;
1417 }
1418out:
1419 return ret;
1420}
1421
1422static int ocfs2_write_remove_suid(struct inode *inode) 1384static int ocfs2_write_remove_suid(struct inode *inode)
1423{ 1385{
1424 int ret; 1386 int ret;
@@ -2129,18 +2091,12 @@ out:
2129 2091
2130static int ocfs2_prepare_inode_for_write(struct file *file, 2092static int ocfs2_prepare_inode_for_write(struct file *file,
2131 loff_t pos, 2093 loff_t pos,
2132 size_t count, 2094 size_t count)
2133 int appending,
2134 int *direct_io,
2135 int *has_refcount)
2136{ 2095{
2137 int ret = 0, meta_level = 0; 2096 int ret = 0, meta_level = 0;
2138 struct dentry *dentry = file->f_path.dentry; 2097 struct dentry *dentry = file->f_path.dentry;
2139 struct inode *inode = d_inode(dentry); 2098 struct inode *inode = d_inode(dentry);
2140 loff_t end; 2099 loff_t end;
2141 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2142 int full_coherency = !(osb->s_mount_opt &
2143 OCFS2_MOUNT_COHERENCY_BUFFERED);
2144 2100
2145 /* 2101 /*
2146 * We start with a read level meta lock and only jump to an ex 2102 * We start with a read level meta lock and only jump to an ex
@@ -2189,10 +2145,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2189 pos, 2145 pos,
2190 count, 2146 count,
2191 &meta_level); 2147 &meta_level);
2192 if (has_refcount)
2193 *has_refcount = 1;
2194 if (direct_io)
2195 *direct_io = 0;
2196 } 2148 }
2197 2149
2198 if (ret < 0) { 2150 if (ret < 0) {
@@ -2200,67 +2152,12 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2200 goto out_unlock; 2152 goto out_unlock;
2201 } 2153 }
2202 2154
2203 /*
2204 * Skip the O_DIRECT checks if we don't need
2205 * them.
2206 */
2207 if (!direct_io || !(*direct_io))
2208 break;
2209
2210 /*
2211 * There's no sane way to do direct writes to an inode
2212 * with inline data.
2213 */
2214 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2215 *direct_io = 0;
2216 break;
2217 }
2218
2219 /*
2220 * Allowing concurrent direct writes means
2221 * i_size changes wouldn't be synchronized, so
2222 * one node could wind up truncating another
2223 * nodes writes.
2224 */
2225 if (end > i_size_read(inode) && !full_coherency) {
2226 *direct_io = 0;
2227 break;
2228 }
2229
2230 /*
2231 * Fallback to old way if the feature bit is not set.
2232 */
2233 if (end > i_size_read(inode) &&
2234 !ocfs2_supports_append_dio(osb)) {
2235 *direct_io = 0;
2236 break;
2237 }
2238
2239 /*
2240 * We don't fill holes during direct io, so
2241 * check for them here. If any are found, the
2242 * caller will have to retake some cluster
2243 * locks and initiate the io as buffered.
2244 */
2245 ret = ocfs2_check_range_for_holes(inode, pos, count);
2246 if (ret == 1) {
2247 /*
2248 * Fallback to old way if the feature bit is not set.
2249 * Otherwise try dio first and then complete the rest
2250 * request through buffer io.
2251 */
2252 if (!ocfs2_supports_append_dio(osb))
2253 *direct_io = 0;
2254 ret = 0;
2255 } else if (ret < 0)
2256 mlog_errno(ret);
2257 break; 2155 break;
2258 } 2156 }
2259 2157
2260out_unlock: 2158out_unlock:
2261 trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, 2159 trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
2262 pos, appending, count, 2160 pos, count);
2263 direct_io, has_refcount);
2264 2161
2265 if (meta_level >= 0) 2162 if (meta_level >= 0)
2266 ocfs2_inode_unlock(inode, meta_level); 2163 ocfs2_inode_unlock(inode, meta_level);
@@ -2272,18 +2169,16 @@ out:
2272static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, 2169static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2273 struct iov_iter *from) 2170 struct iov_iter *from)
2274{ 2171{
2275 int direct_io, appending, rw_level; 2172 int direct_io, rw_level;
2276 int can_do_direct, has_refcount = 0;
2277 ssize_t written = 0; 2173 ssize_t written = 0;
2278 ssize_t ret; 2174 ssize_t ret;
2279 size_t count = iov_iter_count(from), orig_count; 2175 size_t count = iov_iter_count(from);
2280 struct file *file = iocb->ki_filp; 2176 struct file *file = iocb->ki_filp;
2281 struct inode *inode = file_inode(file); 2177 struct inode *inode = file_inode(file);
2282 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2178 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2283 int full_coherency = !(osb->s_mount_opt & 2179 int full_coherency = !(osb->s_mount_opt &
2284 OCFS2_MOUNT_COHERENCY_BUFFERED); 2180 OCFS2_MOUNT_COHERENCY_BUFFERED);
2285 int unaligned_dio = 0; 2181 void *saved_ki_complete = NULL;
2286 int dropped_dio = 0;
2287 int append_write = ((iocb->ki_pos + count) >= 2182 int append_write = ((iocb->ki_pos + count) >=
2288 i_size_read(inode) ? 1 : 0); 2183 i_size_read(inode) ? 1 : 0);
2289 2184
@@ -2296,12 +2191,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2296 if (count == 0) 2191 if (count == 0)
2297 return 0; 2192 return 0;
2298 2193
2299 appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
2300 direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; 2194 direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
2301 2195
2302 inode_lock(inode); 2196 inode_lock(inode);
2303 2197
2304relock:
2305 /* 2198 /*
2306 * Concurrent O_DIRECT writes are allowed with 2199 * Concurrent O_DIRECT writes are allowed with
2307 * mount_option "coherency=buffered". 2200 * mount_option "coherency=buffered".
@@ -2334,7 +2227,6 @@ relock:
2334 ocfs2_inode_unlock(inode, 1); 2227 ocfs2_inode_unlock(inode, 1);
2335 } 2228 }
2336 2229
2337 orig_count = iov_iter_count(from);
2338 ret = generic_write_checks(iocb, from); 2230 ret = generic_write_checks(iocb, from);
2339 if (ret <= 0) { 2231 if (ret <= 0) {
2340 if (ret) 2232 if (ret)
@@ -2343,41 +2235,18 @@ relock:
2343 } 2235 }
2344 count = ret; 2236 count = ret;
2345 2237
2346 can_do_direct = direct_io; 2238 ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count);
2347 ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending,
2348 &can_do_direct, &has_refcount);
2349 if (ret < 0) { 2239 if (ret < 0) {
2350 mlog_errno(ret); 2240 mlog_errno(ret);
2351 goto out; 2241 goto out;
2352 } 2242 }
2353 2243
2354 if (direct_io && !is_sync_kiocb(iocb)) 2244 if (direct_io && !is_sync_kiocb(iocb) &&
2355 unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos); 2245 ocfs2_is_io_unaligned(inode, count, iocb->ki_pos)) {
2356
2357 /*
2358 * We can't complete the direct I/O as requested, fall back to
2359 * buffered I/O.
2360 */
2361 if (direct_io && !can_do_direct) {
2362 ocfs2_rw_unlock(inode, rw_level);
2363
2364 rw_level = -1;
2365
2366 direct_io = 0;
2367 iocb->ki_flags &= ~IOCB_DIRECT;
2368 iov_iter_reexpand(from, orig_count);
2369 dropped_dio = 1;
2370 goto relock;
2371 }
2372
2373 if (unaligned_dio) {
2374 /* 2246 /*
2375 * Wait on previous unaligned aio to complete before 2247 * Make it a sync io if it's an unaligned aio.
2376 * proceeding.
2377 */ 2248 */
2378 mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio); 2249 saved_ki_complete = xchg(&iocb->ki_complete, NULL);
2379 /* Mark the iocb as needing an unlock in ocfs2_dio_end_io */
2380 ocfs2_iocb_set_unaligned_aio(iocb);
2381 } 2250 }
2382 2251
2383 /* communicate with ocfs2_dio_end_io */ 2252 /* communicate with ocfs2_dio_end_io */
@@ -2398,14 +2267,13 @@ relock:
2398 */ 2267 */
2399 if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { 2268 if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) {
2400 rw_level = -1; 2269 rw_level = -1;
2401 unaligned_dio = 0;
2402 } 2270 }
2403 2271
2404 if (unlikely(written <= 0)) 2272 if (unlikely(written <= 0))
2405 goto no_sync; 2273 goto out;
2406 2274
2407 if (((file->f_flags & O_DSYNC) && !direct_io) || 2275 if (((file->f_flags & O_DSYNC) && !direct_io) ||
2408 IS_SYNC(inode) || dropped_dio) { 2276 IS_SYNC(inode)) {
2409 ret = filemap_fdatawrite_range(file->f_mapping, 2277 ret = filemap_fdatawrite_range(file->f_mapping,
2410 iocb->ki_pos - written, 2278 iocb->ki_pos - written,
2411 iocb->ki_pos - 1); 2279 iocb->ki_pos - 1);
@@ -2424,13 +2292,10 @@ relock:
2424 iocb->ki_pos - 1); 2292 iocb->ki_pos - 1);
2425 } 2293 }
2426 2294
2427no_sync:
2428 if (unaligned_dio && ocfs2_iocb_is_unaligned_aio(iocb)) {
2429 ocfs2_iocb_clear_unaligned_aio(iocb);
2430 mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
2431 }
2432
2433out: 2295out:
2296 if (saved_ki_complete)
2297 xchg(&iocb->ki_complete, saved_ki_complete);
2298
2434 if (rw_level != -1) 2299 if (rw_level != -1)
2435 ocfs2_rw_unlock(inode, rw_level); 2300 ocfs2_rw_unlock(inode, rw_level);
2436 2301