aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2018-09-28 23:47:15 -0400
committerDave Chinner <david@fromorbit.com>2018-09-28 23:47:15 -0400
commitdf307077916fde42734a49022fc8ed23cc58caa4 (patch)
tree6d63a78fbcfd8cdf3e540cdfa75828090affe3c1 /fs/xfs
parent8683edb7755b853f0dd92e07fe2e7a7e675a84d7 (diff)
xfs: fix transaction leak in xfs_reflink_allocate_cow()
When xfs_reflink_allocate_cow() allocates a transaction, it drops the ILOCK to perform the operation. This Introduces a race condition where another thread modifying the file can perform the COW allocation operation underneath us. This result in the retry loop finding an allocated block and jumping straight to the conversion code. It does not, however, cancel the transaction it holds and so this gets leaked. This results in a lockdep warning: ================================================ WARNING: lock held when returning to user space! 4.18.5 #1 Not tainted ------------------------------------------------ worker/6123 is leaving the kernel with locks still held! 1 lock held by worker/6123: #0: 000000009eab4f1b (sb_internal#2){.+.+}, at: xfs_trans_alloc+0x17c/0x220 And eventually the filesystem deadlocks because it runs out of log space that is reserved by the leaked transaction and never gets released. The logic flow in xfs_reflink_allocate_cow() is a convoluted mess of gotos - it's no surprise that it has bug where the flow through several goto jumps then fails to clean up context from a non-obvious logic path. CLean up the logic flow and make sure every path does the right thing. Reported-by: Alexander Y. Fomichev <git.user@gmail.com> Tested-by: Alexander Y. Fomichev <git.user@gmail.com> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200981 Signed-off-by: Dave Chinner <dchinner@redhat.com> [hch: slight refactor] Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_reflink.c127
1 files changed, 77 insertions, 50 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 38f405415b88..d60d0eeed7b9 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -352,6 +352,47 @@ xfs_reflink_convert_cow(
352 return error; 352 return error;
353} 353}
354 354
355/*
356 * Find the extent that maps the given range in the COW fork. Even if the extent
357 * is not shared we might have a preallocation for it in the COW fork. If so we
358 * use it that rather than trigger a new allocation.
359 */
360static int
361xfs_find_trim_cow_extent(
362 struct xfs_inode *ip,
363 struct xfs_bmbt_irec *imap,
364 bool *shared,
365 bool *found)
366{
367 xfs_fileoff_t offset_fsb = imap->br_startoff;
368 xfs_filblks_t count_fsb = imap->br_blockcount;
369 struct xfs_iext_cursor icur;
370 struct xfs_bmbt_irec got;
371 bool trimmed;
372
373 *found = false;
374
375 /*
376 * If we don't find an overlapping extent, trim the range we need to
377 * allocate to fit the hole we found.
378 */
379 if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) ||
380 got.br_startoff > offset_fsb)
381 return xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
382
383 *shared = true;
384 if (isnullstartblock(got.br_startblock)) {
385 xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
386 return 0;
387 }
388
389 /* real extent found - no need to allocate */
390 xfs_trim_extent(&got, offset_fsb, count_fsb);
391 *imap = got;
392 *found = true;
393 return 0;
394}
395
355/* Allocate all CoW reservations covering a range of blocks in a file. */ 396/* Allocate all CoW reservations covering a range of blocks in a file. */
356int 397int
357xfs_reflink_allocate_cow( 398xfs_reflink_allocate_cow(
@@ -363,78 +404,64 @@ xfs_reflink_allocate_cow(
363 struct xfs_mount *mp = ip->i_mount; 404 struct xfs_mount *mp = ip->i_mount;
364 xfs_fileoff_t offset_fsb = imap->br_startoff; 405 xfs_fileoff_t offset_fsb = imap->br_startoff;
365 xfs_filblks_t count_fsb = imap->br_blockcount; 406 xfs_filblks_t count_fsb = imap->br_blockcount;
366 struct xfs_bmbt_irec got; 407 struct xfs_trans *tp;
367 struct xfs_trans *tp = NULL;
368 int nimaps, error = 0; 408 int nimaps, error = 0;
369 bool trimmed; 409 bool found;
370 xfs_filblks_t resaligned; 410 xfs_filblks_t resaligned;
371 xfs_extlen_t resblks = 0; 411 xfs_extlen_t resblks = 0;
372 struct xfs_iext_cursor icur;
373 412
374retry:
375 ASSERT(xfs_is_reflink_inode(ip));
376 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 413 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
414 ASSERT(xfs_is_reflink_inode(ip));
377 415
378 /* 416 error = xfs_find_trim_cow_extent(ip, imap, shared, &found);
379 * Even if the extent is not shared we might have a preallocation for 417 if (error || !*shared)
380 * it in the COW fork. If so use it. 418 return error;
381 */ 419 if (found)
382 if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) && 420 goto convert;
383 got.br_startoff <= offset_fsb) {
384 *shared = true;
385
386 /* If we have a real allocation in the COW fork we're done. */
387 if (!isnullstartblock(got.br_startblock)) {
388 xfs_trim_extent(&got, offset_fsb, count_fsb);
389 *imap = got;
390 goto convert;
391 }
392 421
393 xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); 422 resaligned = xfs_aligned_fsb_count(imap->br_startoff,
394 } else { 423 imap->br_blockcount, xfs_get_cowextsz_hint(ip));
395 error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); 424 resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
396 if (error || !*shared)
397 goto out;
398 }
399 425
400 if (!tp) { 426 xfs_iunlock(ip, *lockmode);
401 resaligned = xfs_aligned_fsb_count(imap->br_startoff, 427 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
402 imap->br_blockcount, xfs_get_cowextsz_hint(ip)); 428 *lockmode = XFS_ILOCK_EXCL;
403 resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 429 xfs_ilock(ip, *lockmode);
404 430
405 xfs_iunlock(ip, *lockmode); 431 if (error)
406 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); 432 return error;
407 *lockmode = XFS_ILOCK_EXCL;
408 xfs_ilock(ip, *lockmode);
409 433
410 if (error) 434 error = xfs_qm_dqattach_locked(ip, false);
411 return error; 435 if (error)
436 goto out_trans_cancel;
412 437
413 error = xfs_qm_dqattach_locked(ip, false); 438 /*
414 if (error) 439 * Check for an overlapping extent again now that we dropped the ilock.
415 goto out; 440 */
416 goto retry; 441 error = xfs_find_trim_cow_extent(ip, imap, shared, &found);
442 if (error || !*shared)
443 goto out_trans_cancel;
444 if (found) {
445 xfs_trans_cancel(tp);
446 goto convert;
417 } 447 }
418 448
419 error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, 449 error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
420 XFS_QMOPT_RES_REGBLKS); 450 XFS_QMOPT_RES_REGBLKS);
421 if (error) 451 if (error)
422 goto out; 452 goto out_trans_cancel;
423 453
424 xfs_trans_ijoin(tp, ip, 0); 454 xfs_trans_ijoin(tp, ip, 0);
425 455
426 nimaps = 1;
427
428 /* Allocate the entire reservation as unwritten blocks. */ 456 /* Allocate the entire reservation as unwritten blocks. */
457 nimaps = 1;
429 error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, 458 error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
430 XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 459 XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC,
431 resblks, imap, &nimaps); 460 resblks, imap, &nimaps);
432 if (error) 461 if (error)
433 goto out_trans_cancel; 462 goto out_unreserve;
434 463
435 xfs_inode_set_cowblocks_tag(ip); 464 xfs_inode_set_cowblocks_tag(ip);
436
437 /* Finish up. */
438 error = xfs_trans_commit(tp); 465 error = xfs_trans_commit(tp);
439 if (error) 466 if (error)
440 return error; 467 return error;
@@ -447,12 +474,12 @@ retry:
447 return -ENOSPC; 474 return -ENOSPC;
448convert: 475convert:
449 return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb); 476 return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb);
450out_trans_cancel: 477
478out_unreserve:
451 xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0, 479 xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
452 XFS_QMOPT_RES_REGBLKS); 480 XFS_QMOPT_RES_REGBLKS);
453out: 481out_trans_cancel:
454 if (tp) 482 xfs_trans_cancel(tp);
455 xfs_trans_cancel(tp);
456 return error; 483 return error;
457} 484}
458 485