aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-10-20 00:53:50 -0400
committerDave Chinner <david@fromorbit.com>2016-10-20 00:53:50 -0400
commit3ba020befef030aaabbd5eb82a09f6ddf02a9542 (patch)
treeb9f3d3f21209f5a875b51e4427d9b46141c77d08
parent5f9268ca53aca992106d74edde3e7cf6c1be60a0 (diff)
xfs: optimize writes to reflink files
Instead of reserving space as the first thing in write_begin move it past reading the extent in the data fork. That way we only have to read from the data fork once and can reuse that information for trimming the extent to the shared/unshared boundary. Additionally this allows to easily limit the actual write size to said boundary, and avoid a roundtrip on the ilock. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/xfs/xfs_iomap.c56
-rw-r--r--fs/xfs/xfs_reflink.c142
-rw-r--r--fs/xfs/xfs_reflink.h4
-rw-r--r--fs/xfs/xfs_trace.h3
4 files changed, 100 insertions, 105 deletions
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1dabf2eb136a..436e109bb01e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay(
566 xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, 566 xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
567 &got, &prev); 567 &got, &prev);
568 if (!eof && got.br_startoff <= offset_fsb) { 568 if (!eof && got.br_startoff <= offset_fsb) {
569 if (xfs_is_reflink_inode(ip)) {
570 bool shared;
571
572 end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
573 maxbytes_fsb);
574 xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
575 error = xfs_reflink_reserve_cow(ip, &got, &shared);
576 if (error)
577 goto out_unlock;
578 }
579
569 trace_xfs_iomap_found(ip, offset, count, 0, &got); 580 trace_xfs_iomap_found(ip, offset, count, 0, &got);
570 goto done; 581 goto done;
571 } 582 }
@@ -961,19 +972,13 @@ xfs_file_iomap_begin(
961 struct xfs_mount *mp = ip->i_mount; 972 struct xfs_mount *mp = ip->i_mount;
962 struct xfs_bmbt_irec imap; 973 struct xfs_bmbt_irec imap;
963 xfs_fileoff_t offset_fsb, end_fsb; 974 xfs_fileoff_t offset_fsb, end_fsb;
964 bool shared, trimmed;
965 int nimaps = 1, error = 0; 975 int nimaps = 1, error = 0;
976 bool shared = false, trimmed = false;
966 unsigned lockmode; 977 unsigned lockmode;
967 978
968 if (XFS_FORCED_SHUTDOWN(mp)) 979 if (XFS_FORCED_SHUTDOWN(mp))
969 return -EIO; 980 return -EIO;
970 981
971 if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
972 error = xfs_reflink_reserve_cow_range(ip, offset, length);
973 if (error < 0)
974 return error;
975 }
976
977 if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && 982 if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
978 !xfs_get_extsz_hint(ip)) { 983 !xfs_get_extsz_hint(ip)) {
979 /* Reserve delalloc blocks for regular writeback. */ 984 /* Reserve delalloc blocks for regular writeback. */
@@ -981,7 +986,16 @@ xfs_file_iomap_begin(
981 iomap); 986 iomap);
982 } 987 }
983 988
984 lockmode = xfs_ilock_data_map_shared(ip); 989 /*
990 * COW writes will allocate delalloc space, so we need to make sure
991 * to take the lock exclusively here.
992 */
993 if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
994 lockmode = XFS_ILOCK_EXCL;
995 xfs_ilock(ip, XFS_ILOCK_EXCL);
996 } else {
997 lockmode = xfs_ilock_data_map_shared(ip);
998 }
985 999
986 ASSERT(offset <= mp->m_super->s_maxbytes); 1000 ASSERT(offset <= mp->m_super->s_maxbytes);
987 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) 1001 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -991,19 +1005,24 @@ xfs_file_iomap_begin(
991 1005
992 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1006 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
993 &nimaps, 0); 1007 &nimaps, 0);
994 if (error) { 1008 if (error)
995 xfs_iunlock(ip, lockmode); 1009 goto out_unlock;
996 return error;
997 }
998 1010
999 if (flags & (IOMAP_WRITE | IOMAP_ZERO | IOMAP_REPORT)) { 1011 if (flags & IOMAP_REPORT) {
1000 /* Trim the mapping to the nearest shared extent boundary. */ 1012 /* Trim the mapping to the nearest shared extent boundary. */
1001 error = xfs_reflink_trim_around_shared(ip, &imap, &shared, 1013 error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
1002 &trimmed); 1014 &trimmed);
1003 if (error) { 1015 if (error)
1004 xfs_iunlock(ip, lockmode); 1016 goto out_unlock;
1005 return error; 1017 }
1006 } 1018
1019 if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
1020 error = xfs_reflink_reserve_cow(ip, &imap, &shared);
1021 if (error)
1022 goto out_unlock;
1023
1024 end_fsb = imap.br_startoff + imap.br_blockcount;
1025 length = XFS_FSB_TO_B(mp, end_fsb) - offset;
1007 } 1026 }
1008 1027
1009 if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { 1028 if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
@@ -1042,6 +1061,9 @@ xfs_file_iomap_begin(
1042 if (shared) 1061 if (shared)
1043 iomap->flags |= IOMAP_F_SHARED; 1062 iomap->flags |= IOMAP_F_SHARED;
1044 return 0; 1063 return 0;
1064out_unlock:
1065 xfs_iunlock(ip, lockmode);
1066 return error;
1045} 1067}
1046 1068
1047static int 1069static int
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 6c4c215634ec..9c477de3c1ac 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -228,50 +228,54 @@ xfs_reflink_trim_around_shared(
228 } 228 }
229} 229}
230 230
231/* Create a CoW reservation for a range of blocks within a file. */ 231/*
232static int 232 * Trim the passed in imap to the next shared/unshared extent boundary, and
233__xfs_reflink_reserve_cow( 233 * if imap->br_startoff points to a shared extent reserve space for it in the
234 * COW fork. In this case *shared is set to true, else to false.
235 *
236 * Note that imap will always contain the block numbers for the existing blocks
237 * in the data fork, as the upper layers need them for read-modify-write
238 * operations.
239 */
240int
241xfs_reflink_reserve_cow(
234 struct xfs_inode *ip, 242 struct xfs_inode *ip,
235 xfs_fileoff_t *offset_fsb, 243 struct xfs_bmbt_irec *imap,
236 xfs_fileoff_t end_fsb, 244 bool *shared)
237 bool *skipped)
238{ 245{
239 struct xfs_bmbt_irec got, prev, imap; 246 struct xfs_bmbt_irec got, prev;
240 xfs_fileoff_t orig_end_fsb; 247 xfs_fileoff_t end_fsb, orig_end_fsb;
241 int nimaps, eof = 0, error = 0; 248 int eof = 0, error = 0;
242 bool shared = false, trimmed = false; 249 bool trimmed;
243 xfs_extnum_t idx; 250 xfs_extnum_t idx;
244 xfs_extlen_t align; 251 xfs_extlen_t align;
245 252
246 /* Already reserved? Skip the refcount btree access. */ 253 /*
247 xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx, 254 * Search the COW fork extent list first. This serves two purposes:
255 * first this implement the speculative preallocation using cowextisze,
256 * so that we also unshared block adjacent to shared blocks instead
257 * of just the shared blocks themselves. Second the lookup in the
258 * extent list is generally faster than going out to the shared extent
259 * tree.
260 */
261 xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
248 &got, &prev); 262 &got, &prev);
249 if (!eof && got.br_startoff <= *offset_fsb) { 263 if (!eof && got.br_startoff <= imap->br_startoff) {
250 end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount; 264 trace_xfs_reflink_cow_found(ip, imap);
251 trace_xfs_reflink_cow_found(ip, &got); 265 xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
252 goto done;
253 }
254 266
255 /* Read extent from the source file. */ 267 *shared = true;
256 nimaps = 1; 268 return 0;
257 error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, 269 }
258 &imap, &nimaps, 0);
259 if (error)
260 goto out_unlock;
261 ASSERT(nimaps == 1);
262 270
263 /* Trim the mapping to the nearest shared extent boundary. */ 271 /* Trim the mapping to the nearest shared extent boundary. */
264 error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); 272 error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
265 if (error) 273 if (error)
266 goto out_unlock; 274 return error;
267
268 end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
269 275
270 /* Not shared? Just report the (potentially capped) extent. */ 276 /* Not shared? Just report the (potentially capped) extent. */
271 if (!shared) { 277 if (!*shared)
272 *skipped = true; 278 return 0;
273 goto done;
274 }
275 279
276 /* 280 /*
277 * Fork all the shared blocks from our write offset until the end of 281 * Fork all the shared blocks from our write offset until the end of
@@ -279,72 +283,38 @@ __xfs_reflink_reserve_cow(
279 */ 283 */
280 error = xfs_qm_dqattach_locked(ip, 0); 284 error = xfs_qm_dqattach_locked(ip, 0);
281 if (error) 285 if (error)
282 goto out_unlock; 286 return error;
287
288 end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
283 289
284 align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); 290 align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
285 if (align) 291 if (align)
286 end_fsb = roundup_64(end_fsb, align); 292 end_fsb = roundup_64(end_fsb, align);
287 293
288retry: 294retry:
289 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb, 295 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
290 end_fsb - *offset_fsb, &got, 296 end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
291 &prev, &idx, eof);
292 switch (error) { 297 switch (error) {
293 case 0: 298 case 0:
294 break; 299 break;
295 case -ENOSPC: 300 case -ENOSPC:
296 case -EDQUOT: 301 case -EDQUOT:
297 /* retry without any preallocation */ 302 /* retry without any preallocation */
298 trace_xfs_reflink_cow_enospc(ip, &imap); 303 trace_xfs_reflink_cow_enospc(ip, imap);
299 if (end_fsb != orig_end_fsb) { 304 if (end_fsb != orig_end_fsb) {
300 end_fsb = orig_end_fsb; 305 end_fsb = orig_end_fsb;
301 goto retry; 306 goto retry;
302 } 307 }
303 /*FALLTHRU*/ 308 /*FALLTHRU*/
304 default: 309 default:
305 goto out_unlock; 310 return error;
306 } 311 }
307 312
308 if (end_fsb != orig_end_fsb) 313 if (end_fsb != orig_end_fsb)
309 xfs_inode_set_cowblocks_tag(ip); 314 xfs_inode_set_cowblocks_tag(ip);
310 315
311 trace_xfs_reflink_cow_alloc(ip, &got); 316 trace_xfs_reflink_cow_alloc(ip, &got);
312done: 317 return 0;
313 *offset_fsb = end_fsb;
314out_unlock:
315 return error;
316}
317
318/* Create a CoW reservation for part of a file. */
319int
320xfs_reflink_reserve_cow_range(
321 struct xfs_inode *ip,
322 xfs_off_t offset,
323 xfs_off_t count)
324{
325 struct xfs_mount *mp = ip->i_mount;
326 xfs_fileoff_t offset_fsb, end_fsb;
327 bool skipped = false;
328 int error = 0;
329
330 trace_xfs_reflink_reserve_cow_range(ip, offset, count);
331
332 offset_fsb = XFS_B_TO_FSBT(mp, offset);
333 end_fsb = XFS_B_TO_FSB(mp, offset + count);
334
335 xfs_ilock(ip, XFS_ILOCK_EXCL);
336 while (offset_fsb < end_fsb) {
337 error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
338 &skipped);
339 if (error) {
340 trace_xfs_reflink_reserve_cow_range_error(ip, error,
341 _RET_IP_);
342 break;
343 }
344 }
345 xfs_iunlock(ip, XFS_ILOCK_EXCL);
346
347 return error;
348} 318}
349 319
350/* Allocate all CoW reservations covering a range of blocks in a file. */ 320/* Allocate all CoW reservations covering a range of blocks in a file. */
@@ -359,9 +329,8 @@ __xfs_reflink_allocate_cow(
359 struct xfs_defer_ops dfops; 329 struct xfs_defer_ops dfops;
360 struct xfs_trans *tp; 330 struct xfs_trans *tp;
361 xfs_fsblock_t first_block; 331 xfs_fsblock_t first_block;
362 xfs_fileoff_t next_fsb;
363 int nimaps = 1, error; 332 int nimaps = 1, error;
364 bool skipped = false; 333 bool shared;
365 334
366 xfs_defer_init(&dfops, &first_block); 335 xfs_defer_init(&dfops, &first_block);
367 336
@@ -372,33 +341,38 @@ __xfs_reflink_allocate_cow(
372 341
373 xfs_ilock(ip, XFS_ILOCK_EXCL); 342 xfs_ilock(ip, XFS_ILOCK_EXCL);
374 343
375 next_fsb = *offset_fsb; 344 /* Read extent from the source file. */
376 error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped); 345 nimaps = 1;
346 error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
347 &imap, &nimaps, 0);
348 if (error)
349 goto out_unlock;
350 ASSERT(nimaps == 1);
351
352 error = xfs_reflink_reserve_cow(ip, &imap, &shared);
377 if (error) 353 if (error)
378 goto out_trans_cancel; 354 goto out_trans_cancel;
379 355
380 if (skipped) { 356 if (!shared) {
381 *offset_fsb = next_fsb; 357 *offset_fsb = imap.br_startoff + imap.br_blockcount;
382 goto out_trans_cancel; 358 goto out_trans_cancel;
383 } 359 }
384 360
385 xfs_trans_ijoin(tp, ip, 0); 361 xfs_trans_ijoin(tp, ip, 0);
386 error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb, 362 error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
387 XFS_BMAPI_COWFORK, &first_block, 363 XFS_BMAPI_COWFORK, &first_block,
388 XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 364 XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
389 &imap, &nimaps, &dfops); 365 &imap, &nimaps, &dfops);
390 if (error) 366 if (error)
391 goto out_trans_cancel; 367 goto out_trans_cancel;
392 368
393 /* We might not have been able to map the whole delalloc extent */
394 *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
395
396 error = xfs_defer_finish(&tp, &dfops, NULL); 369 error = xfs_defer_finish(&tp, &dfops, NULL);
397 if (error) 370 if (error)
398 goto out_trans_cancel; 371 goto out_trans_cancel;
399 372
400 error = xfs_trans_commit(tp); 373 error = xfs_trans_commit(tp);
401 374
375 *offset_fsb = imap.br_startoff + imap.br_blockcount;
402out_unlock: 376out_unlock:
403 xfs_iunlock(ip, XFS_ILOCK_EXCL); 377 xfs_iunlock(ip, XFS_ILOCK_EXCL);
404 return error; 378 return error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 7ddd9f69560d..fad11607c9ad 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
26extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, 26extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
27 struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); 27 struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
28 28
29extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, 29extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
30 xfs_off_t offset, xfs_off_t count); 30 struct xfs_bmbt_irec *imap, bool *shared);
31extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, 31extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
32 xfs_off_t offset, xfs_off_t count); 32 xfs_off_t offset, xfs_off_t count);
33extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, 33extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ad188d3a83f3..72f9f6b7a76a 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
3346DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); 3346DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
3347DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); 3347DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
3348 3348
3349DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range); 3349DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
3350DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); 3350DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
3351 3351
3352DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); 3352DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
@@ -3358,7 +3358,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
3358DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); 3358DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
3359DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece); 3359DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece);
3360 3360
3361DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error);
3362DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); 3361DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
3363DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); 3362DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
3364DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); 3363DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);