diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-13 06:06:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-13 06:06:54 -0400 |
commit | 5ff0b9e1a1da58b584aa4b8ea234be20b5a1164b (patch) | |
tree | 4849a305c073d4add184c1474a6c000a847285e7 | |
parent | 77c688ac87183537ed0fb84ec2cb8fa8ec97c458 (diff) | |
parent | 6889e783cd68b79f8330ad4d10a2571c67c3f7df (diff) |
Merge tag 'xfs-for-linus-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs update from Dave Chinner:
"This update contains:
- various cleanups
- log recovery debug hooks
- seek hole/data implementation merge
- extent shift rework to fix collapse range bugs
- various sparse warning fixes
- log recovery transaction processing rework to fix use after free
bugs
- metadata buffer IO infrastructuer rework to ensure all buffers
under IO have valid reference counts
- various fixes for ondisk flags, writeback and zero range corner
cases"
* tag 'xfs-for-linus-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (56 commits)
xfs: fix agno increment in xfs_inumbers() loop
xfs: xfs_iflush_done checks the wrong log item callback
xfs: flush the range before zero range conversion
xfs: restore buffer_head unwritten bit on ioend cancel
xfs: check for null dquot in xfs_quota_calc_throttle()
xfs: fix crc field handling in xfs_sb_to/from_disk
xfs: don't send null bp to xfs_trans_brelse()
xfs: check for inode size overflow in xfs_new_eof()
xfs: only set extent size hint when asked
xfs: project id inheritance is a directory only flag
xfs: kill time.h
xfs: compat_xfs_bstat does not have forkoff
xfs: simplify xfs_zero_remaining_bytes
xfs: check xfs_buf_read_uncached returns correctly
xfs: introduce xfs_buf_submit[_wait]
xfs: kill xfs_bioerror_relse
xfs: xfs_bioerror can die.
xfs: kill xfs_bdstrat_cb
xfs: rework xfs_buf_bio_endio error handling
xfs: xfs_buf_ioend and xfs_buf_iodone_work duplicate functionality
...
47 files changed, 1304 insertions, 1176 deletions
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 844e288b9576..53e95b2a1369 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/swap.h> | 21 | #include <linux/swap.h> |
22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
23 | #include <linux/backing-dev.h> | 23 | #include <linux/backing-dev.h> |
24 | #include "time.h" | ||
25 | #include "kmem.h" | 24 | #include "kmem.h" |
26 | #include "xfs_message.h" | 25 | #include "xfs_message.h" |
27 | 26 | ||
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 4bffffe038a1..eff34218f405 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c | |||
@@ -2209,6 +2209,10 @@ xfs_agf_verify( | |||
2209 | be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) | 2209 | be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) |
2210 | return false; | 2210 | return false; |
2211 | 2211 | ||
2212 | if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || | ||
2213 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) | ||
2214 | return false; | ||
2215 | |||
2212 | /* | 2216 | /* |
2213 | * during growfs operations, the perag is not fully initialised, | 2217 | * during growfs operations, the perag is not fully initialised, |
2214 | * so we can't use it for any useful checking. growfs ensures we can't | 2218 | * so we can't use it for any useful checking. growfs ensures we can't |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 86df952d3e24..79c981984dca 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -5404,22 +5404,223 @@ error0: | |||
5404 | } | 5404 | } |
5405 | 5405 | ||
5406 | /* | 5406 | /* |
5407 | * Determine whether an extent shift can be accomplished by a merge with the | ||
5408 | * extent that precedes the target hole of the shift. | ||
5409 | */ | ||
5410 | STATIC bool | ||
5411 | xfs_bmse_can_merge( | ||
5412 | struct xfs_bmbt_irec *left, /* preceding extent */ | ||
5413 | struct xfs_bmbt_irec *got, /* current extent to shift */ | ||
5414 | xfs_fileoff_t shift) /* shift fsb */ | ||
5415 | { | ||
5416 | xfs_fileoff_t startoff; | ||
5417 | |||
5418 | startoff = got->br_startoff - shift; | ||
5419 | |||
5420 | /* | ||
5421 | * The extent, once shifted, must be adjacent in-file and on-disk with | ||
5422 | * the preceding extent. | ||
5423 | */ | ||
5424 | if ((left->br_startoff + left->br_blockcount != startoff) || | ||
5425 | (left->br_startblock + left->br_blockcount != got->br_startblock) || | ||
5426 | (left->br_state != got->br_state) || | ||
5427 | (left->br_blockcount + got->br_blockcount > MAXEXTLEN)) | ||
5428 | return false; | ||
5429 | |||
5430 | return true; | ||
5431 | } | ||
5432 | |||
5433 | /* | ||
5434 | * A bmap extent shift adjusts the file offset of an extent to fill a preceding | ||
5435 | * hole in the file. If an extent shift would result in the extent being fully | ||
5436 | * adjacent to the extent that currently precedes the hole, we can merge with | ||
5437 | * the preceding extent rather than do the shift. | ||
5438 | * | ||
5439 | * This function assumes the caller has verified a shift-by-merge is possible | ||
5440 | * with the provided extents via xfs_bmse_can_merge(). | ||
5441 | */ | ||
5442 | STATIC int | ||
5443 | xfs_bmse_merge( | ||
5444 | struct xfs_inode *ip, | ||
5445 | int whichfork, | ||
5446 | xfs_fileoff_t shift, /* shift fsb */ | ||
5447 | int current_ext, /* idx of gotp */ | ||
5448 | struct xfs_bmbt_rec_host *gotp, /* extent to shift */ | ||
5449 | struct xfs_bmbt_rec_host *leftp, /* preceding extent */ | ||
5450 | struct xfs_btree_cur *cur, | ||
5451 | int *logflags) /* output */ | ||
5452 | { | ||
5453 | struct xfs_ifork *ifp; | ||
5454 | struct xfs_bmbt_irec got; | ||
5455 | struct xfs_bmbt_irec left; | ||
5456 | xfs_filblks_t blockcount; | ||
5457 | int error, i; | ||
5458 | |||
5459 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
5460 | xfs_bmbt_get_all(gotp, &got); | ||
5461 | xfs_bmbt_get_all(leftp, &left); | ||
5462 | blockcount = left.br_blockcount + got.br_blockcount; | ||
5463 | |||
5464 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
5465 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
5466 | ASSERT(xfs_bmse_can_merge(&left, &got, shift)); | ||
5467 | |||
5468 | /* | ||
5469 | * Merge the in-core extents. Note that the host record pointers and | ||
5470 | * current_ext index are invalid once the extent has been removed via | ||
5471 | * xfs_iext_remove(). | ||
5472 | */ | ||
5473 | xfs_bmbt_set_blockcount(leftp, blockcount); | ||
5474 | xfs_iext_remove(ip, current_ext, 1, 0); | ||
5475 | |||
5476 | /* | ||
5477 | * Update the on-disk extent count, the btree if necessary and log the | ||
5478 | * inode. | ||
5479 | */ | ||
5480 | XFS_IFORK_NEXT_SET(ip, whichfork, | ||
5481 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | ||
5482 | *logflags |= XFS_ILOG_CORE; | ||
5483 | if (!cur) { | ||
5484 | *logflags |= XFS_ILOG_DEXT; | ||
5485 | return 0; | ||
5486 | } | ||
5487 | |||
5488 | /* lookup and remove the extent to merge */ | ||
5489 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, | ||
5490 | got.br_blockcount, &i); | ||
5491 | if (error) | ||
5492 | goto out_error; | ||
5493 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); | ||
5494 | |||
5495 | error = xfs_btree_delete(cur, &i); | ||
5496 | if (error) | ||
5497 | goto out_error; | ||
5498 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); | ||
5499 | |||
5500 | /* lookup and update size of the previous extent */ | ||
5501 | error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, | ||
5502 | left.br_blockcount, &i); | ||
5503 | if (error) | ||
5504 | goto out_error; | ||
5505 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); | ||
5506 | |||
5507 | left.br_blockcount = blockcount; | ||
5508 | |||
5509 | error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, | ||
5510 | left.br_blockcount, left.br_state); | ||
5511 | if (error) | ||
5512 | goto out_error; | ||
5513 | |||
5514 | return 0; | ||
5515 | |||
5516 | out_error: | ||
5517 | return error; | ||
5518 | } | ||
5519 | |||
5520 | /* | ||
5521 | * Shift a single extent. | ||
5522 | */ | ||
5523 | STATIC int | ||
5524 | xfs_bmse_shift_one( | ||
5525 | struct xfs_inode *ip, | ||
5526 | int whichfork, | ||
5527 | xfs_fileoff_t offset_shift_fsb, | ||
5528 | int *current_ext, | ||
5529 | struct xfs_bmbt_rec_host *gotp, | ||
5530 | struct xfs_btree_cur *cur, | ||
5531 | int *logflags) | ||
5532 | { | ||
5533 | struct xfs_ifork *ifp; | ||
5534 | xfs_fileoff_t startoff; | ||
5535 | struct xfs_bmbt_rec_host *leftp; | ||
5536 | struct xfs_bmbt_irec got; | ||
5537 | struct xfs_bmbt_irec left; | ||
5538 | int error; | ||
5539 | int i; | ||
5540 | |||
5541 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
5542 | |||
5543 | xfs_bmbt_get_all(gotp, &got); | ||
5544 | startoff = got.br_startoff - offset_shift_fsb; | ||
5545 | |||
5546 | /* delalloc extents should be prevented by caller */ | ||
5547 | XFS_WANT_CORRUPTED_GOTO(!isnullstartblock(got.br_startblock), | ||
5548 | out_error); | ||
5549 | |||
5550 | /* | ||
5551 | * If this is the first extent in the file, make sure there's enough | ||
5552 | * room at the start of the file and jump right to the shift as there's | ||
5553 | * no left extent to merge. | ||
5554 | */ | ||
5555 | if (*current_ext == 0) { | ||
5556 | if (got.br_startoff < offset_shift_fsb) | ||
5557 | return -EINVAL; | ||
5558 | goto shift_extent; | ||
5559 | } | ||
5560 | |||
5561 | /* grab the left extent and check for a large enough hole */ | ||
5562 | leftp = xfs_iext_get_ext(ifp, *current_ext - 1); | ||
5563 | xfs_bmbt_get_all(leftp, &left); | ||
5564 | |||
5565 | if (startoff < left.br_startoff + left.br_blockcount) | ||
5566 | return -EINVAL; | ||
5567 | |||
5568 | /* check whether to merge the extent or shift it down */ | ||
5569 | if (!xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) | ||
5570 | goto shift_extent; | ||
5571 | |||
5572 | return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, *current_ext, | ||
5573 | gotp, leftp, cur, logflags); | ||
5574 | |||
5575 | shift_extent: | ||
5576 | /* | ||
5577 | * Increment the extent index for the next iteration, update the start | ||
5578 | * offset of the in-core extent and update the btree if applicable. | ||
5579 | */ | ||
5580 | (*current_ext)++; | ||
5581 | xfs_bmbt_set_startoff(gotp, startoff); | ||
5582 | *logflags |= XFS_ILOG_CORE; | ||
5583 | if (!cur) { | ||
5584 | *logflags |= XFS_ILOG_DEXT; | ||
5585 | return 0; | ||
5586 | } | ||
5587 | |||
5588 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, | ||
5589 | got.br_blockcount, &i); | ||
5590 | if (error) | ||
5591 | return error; | ||
5592 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); | ||
5593 | |||
5594 | got.br_startoff = startoff; | ||
5595 | error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, | ||
5596 | got.br_blockcount, got.br_state); | ||
5597 | if (error) | ||
5598 | return error; | ||
5599 | |||
5600 | return 0; | ||
5601 | |||
5602 | out_error: | ||
5603 | return error; | ||
5604 | } | ||
5605 | |||
5606 | /* | ||
5407 | * Shift extent records to the left to cover a hole. | 5607 | * Shift extent records to the left to cover a hole. |
5408 | * | 5608 | * |
5409 | * The maximum number of extents to be shifted in a single operation | 5609 | * The maximum number of extents to be shifted in a single operation is |
5410 | * is @num_exts, and @current_ext keeps track of the current extent | 5610 | * @num_exts. @start_fsb specifies the file offset to start the shift and the |
5411 | * index we have shifted. @offset_shift_fsb is the length by which each | 5611 | * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb |
5412 | * extent is shifted. If there is no hole to shift the extents | 5612 | * is the length by which each extent is shifted. If there is no hole to shift |
5413 | * into, this will be considered invalid operation and we abort immediately. | 5613 | * the extents into, this will be considered invalid operation and we abort |
5614 | * immediately. | ||
5414 | */ | 5615 | */ |
5415 | int | 5616 | int |
5416 | xfs_bmap_shift_extents( | 5617 | xfs_bmap_shift_extents( |
5417 | struct xfs_trans *tp, | 5618 | struct xfs_trans *tp, |
5418 | struct xfs_inode *ip, | 5619 | struct xfs_inode *ip, |
5419 | int *done, | ||
5420 | xfs_fileoff_t start_fsb, | 5620 | xfs_fileoff_t start_fsb, |
5421 | xfs_fileoff_t offset_shift_fsb, | 5621 | xfs_fileoff_t offset_shift_fsb, |
5422 | xfs_extnum_t *current_ext, | 5622 | int *done, |
5623 | xfs_fileoff_t *next_fsb, | ||
5423 | xfs_fsblock_t *firstblock, | 5624 | xfs_fsblock_t *firstblock, |
5424 | struct xfs_bmap_free *flist, | 5625 | struct xfs_bmap_free *flist, |
5425 | int num_exts) | 5626 | int num_exts) |
@@ -5427,16 +5628,13 @@ xfs_bmap_shift_extents( | |||
5427 | struct xfs_btree_cur *cur = NULL; | 5628 | struct xfs_btree_cur *cur = NULL; |
5428 | struct xfs_bmbt_rec_host *gotp; | 5629 | struct xfs_bmbt_rec_host *gotp; |
5429 | struct xfs_bmbt_irec got; | 5630 | struct xfs_bmbt_irec got; |
5430 | struct xfs_bmbt_irec left; | ||
5431 | struct xfs_mount *mp = ip->i_mount; | 5631 | struct xfs_mount *mp = ip->i_mount; |
5432 | struct xfs_ifork *ifp; | 5632 | struct xfs_ifork *ifp; |
5433 | xfs_extnum_t nexts = 0; | 5633 | xfs_extnum_t nexts = 0; |
5434 | xfs_fileoff_t startoff; | 5634 | xfs_extnum_t current_ext; |
5435 | int error = 0; | 5635 | int error = 0; |
5436 | int i; | ||
5437 | int whichfork = XFS_DATA_FORK; | 5636 | int whichfork = XFS_DATA_FORK; |
5438 | int logflags = 0; | 5637 | int logflags = 0; |
5439 | xfs_filblks_t blockcount = 0; | ||
5440 | int total_extents; | 5638 | int total_extents; |
5441 | 5639 | ||
5442 | if (unlikely(XFS_TEST_ERROR( | 5640 | if (unlikely(XFS_TEST_ERROR( |
@@ -5451,7 +5649,8 @@ xfs_bmap_shift_extents( | |||
5451 | if (XFS_FORCED_SHUTDOWN(mp)) | 5649 | if (XFS_FORCED_SHUTDOWN(mp)) |
5452 | return -EIO; | 5650 | return -EIO; |
5453 | 5651 | ||
5454 | ASSERT(current_ext != NULL); | 5652 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); |
5653 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
5455 | 5654 | ||
5456 | ifp = XFS_IFORK_PTR(ip, whichfork); | 5655 | ifp = XFS_IFORK_PTR(ip, whichfork); |
5457 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | 5656 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { |
@@ -5461,23 +5660,6 @@ xfs_bmap_shift_extents( | |||
5461 | return error; | 5660 | return error; |
5462 | } | 5661 | } |
5463 | 5662 | ||
5464 | /* | ||
5465 | * If *current_ext is 0, we would need to lookup the extent | ||
5466 | * from where we would start shifting and store it in gotp. | ||
5467 | */ | ||
5468 | if (!*current_ext) { | ||
5469 | gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); | ||
5470 | /* | ||
5471 | * gotp can be null in 2 cases: 1) if there are no extents | ||
5472 | * or 2) start_fsb lies in a hole beyond which there are | ||
5473 | * no extents. Either way, we are done. | ||
5474 | */ | ||
5475 | if (!gotp) { | ||
5476 | *done = 1; | ||
5477 | return 0; | ||
5478 | } | ||
5479 | } | ||
5480 | |||
5481 | if (ifp->if_flags & XFS_IFBROOT) { | 5663 | if (ifp->if_flags & XFS_IFBROOT) { |
5482 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); | 5664 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); |
5483 | cur->bc_private.b.firstblock = *firstblock; | 5665 | cur->bc_private.b.firstblock = *firstblock; |
@@ -5486,112 +5668,46 @@ xfs_bmap_shift_extents( | |||
5486 | } | 5668 | } |
5487 | 5669 | ||
5488 | /* | 5670 | /* |
5671 | * Look up the extent index for the fsb where we start shifting. We can | ||
5672 | * henceforth iterate with current_ext as extent list changes are locked | ||
5673 | * out via ilock. | ||
5674 | * | ||
5675 | * gotp can be null in 2 cases: 1) if there are no extents or 2) | ||
5676 | * start_fsb lies in a hole beyond which there are no extents. Either | ||
5677 | * way, we are done. | ||
5678 | */ | ||
5679 | gotp = xfs_iext_bno_to_ext(ifp, start_fsb, ¤t_ext); | ||
5680 | if (!gotp) { | ||
5681 | *done = 1; | ||
5682 | goto del_cursor; | ||
5683 | } | ||
5684 | |||
5685 | /* | ||
5489 | * There may be delalloc extents in the data fork before the range we | 5686 | * There may be delalloc extents in the data fork before the range we |
5490 | * are collapsing out, so we cannot | 5687 | * are collapsing out, so we cannot use the count of real extents here. |
5491 | * use the count of real extents here. Instead we have to calculate it | 5688 | * Instead we have to calculate it from the incore fork. |
5492 | * from the incore fork. | ||
5493 | */ | 5689 | */ |
5494 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | 5690 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); |
5495 | while (nexts++ < num_exts && *current_ext < total_extents) { | 5691 | while (nexts++ < num_exts && current_ext < total_extents) { |
5496 | 5692 | error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, | |
5497 | gotp = xfs_iext_get_ext(ifp, *current_ext); | 5693 | ¤t_ext, gotp, cur, &logflags); |
5498 | xfs_bmbt_get_all(gotp, &got); | ||
5499 | startoff = got.br_startoff - offset_shift_fsb; | ||
5500 | |||
5501 | /* | ||
5502 | * Before shifting extent into hole, make sure that the hole | ||
5503 | * is large enough to accomodate the shift. | ||
5504 | */ | ||
5505 | if (*current_ext) { | ||
5506 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, | ||
5507 | *current_ext - 1), &left); | ||
5508 | |||
5509 | if (startoff < left.br_startoff + left.br_blockcount) | ||
5510 | error = -EINVAL; | ||
5511 | } else if (offset_shift_fsb > got.br_startoff) { | ||
5512 | /* | ||
5513 | * When first extent is shifted, offset_shift_fsb | ||
5514 | * should be less than the stating offset of | ||
5515 | * the first extent. | ||
5516 | */ | ||
5517 | error = -EINVAL; | ||
5518 | } | ||
5519 | |||
5520 | if (error) | 5694 | if (error) |
5521 | goto del_cursor; | 5695 | goto del_cursor; |
5522 | 5696 | ||
5523 | if (cur) { | 5697 | /* update total extent count and grab the next record */ |
5524 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, | ||
5525 | got.br_startblock, | ||
5526 | got.br_blockcount, | ||
5527 | &i); | ||
5528 | if (error) | ||
5529 | goto del_cursor; | ||
5530 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
5531 | } | ||
5532 | |||
5533 | /* Check if we can merge 2 adjacent extents */ | ||
5534 | if (*current_ext && | ||
5535 | left.br_startoff + left.br_blockcount == startoff && | ||
5536 | left.br_startblock + left.br_blockcount == | ||
5537 | got.br_startblock && | ||
5538 | left.br_state == got.br_state && | ||
5539 | left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { | ||
5540 | blockcount = left.br_blockcount + | ||
5541 | got.br_blockcount; | ||
5542 | xfs_iext_remove(ip, *current_ext, 1, 0); | ||
5543 | logflags |= XFS_ILOG_CORE; | ||
5544 | if (cur) { | ||
5545 | error = xfs_btree_delete(cur, &i); | ||
5546 | if (error) | ||
5547 | goto del_cursor; | ||
5548 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
5549 | } else { | ||
5550 | logflags |= XFS_ILOG_DEXT; | ||
5551 | } | ||
5552 | XFS_IFORK_NEXT_SET(ip, whichfork, | ||
5553 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | ||
5554 | gotp = xfs_iext_get_ext(ifp, --*current_ext); | ||
5555 | xfs_bmbt_get_all(gotp, &got); | ||
5556 | |||
5557 | /* Make cursor point to the extent we will update */ | ||
5558 | if (cur) { | ||
5559 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, | ||
5560 | got.br_startblock, | ||
5561 | got.br_blockcount, | ||
5562 | &i); | ||
5563 | if (error) | ||
5564 | goto del_cursor; | ||
5565 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
5566 | } | ||
5567 | |||
5568 | xfs_bmbt_set_blockcount(gotp, blockcount); | ||
5569 | got.br_blockcount = blockcount; | ||
5570 | } else { | ||
5571 | /* We have to update the startoff */ | ||
5572 | xfs_bmbt_set_startoff(gotp, startoff); | ||
5573 | got.br_startoff = startoff; | ||
5574 | } | ||
5575 | |||
5576 | logflags |= XFS_ILOG_CORE; | ||
5577 | if (cur) { | ||
5578 | error = xfs_bmbt_update(cur, got.br_startoff, | ||
5579 | got.br_startblock, | ||
5580 | got.br_blockcount, | ||
5581 | got.br_state); | ||
5582 | if (error) | ||
5583 | goto del_cursor; | ||
5584 | } else { | ||
5585 | logflags |= XFS_ILOG_DEXT; | ||
5586 | } | ||
5587 | |||
5588 | (*current_ext)++; | ||
5589 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | 5698 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); |
5699 | if (current_ext >= total_extents) | ||
5700 | break; | ||
5701 | gotp = xfs_iext_get_ext(ifp, current_ext); | ||
5590 | } | 5702 | } |
5591 | 5703 | ||
5592 | /* Check if we are done */ | 5704 | /* Check if we are done */ |
5593 | if (*current_ext == total_extents) | 5705 | if (current_ext == total_extents) { |
5594 | *done = 1; | 5706 | *done = 1; |
5707 | } else if (next_fsb) { | ||
5708 | xfs_bmbt_get_all(gotp, &got); | ||
5709 | *next_fsb = got.br_startoff; | ||
5710 | } | ||
5595 | 5711 | ||
5596 | del_cursor: | 5712 | del_cursor: |
5597 | if (cur) | 5713 | if (cur) |
@@ -5600,5 +5716,6 @@ del_cursor: | |||
5600 | 5716 | ||
5601 | if (logflags) | 5717 | if (logflags) |
5602 | xfs_trans_log_inode(tp, ip, logflags); | 5718 | xfs_trans_log_inode(tp, ip, logflags); |
5719 | |||
5603 | return error; | 5720 | return error; |
5604 | } | 5721 | } |
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index b879ca56a64c..44db6db86402 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h | |||
@@ -178,9 +178,8 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, | |||
178 | xfs_extnum_t num); | 178 | xfs_extnum_t num); |
179 | uint xfs_default_attroffset(struct xfs_inode *ip); | 179 | uint xfs_default_attroffset(struct xfs_inode *ip); |
180 | int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, | 180 | int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, |
181 | int *done, xfs_fileoff_t start_fsb, | 181 | xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, |
182 | xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext, | 182 | int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, |
183 | xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, | 183 | struct xfs_bmap_free *flist, int num_exts); |
184 | int num_exts); | ||
185 | 184 | ||
186 | #endif /* __XFS_BMAP_H__ */ | 185 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 2c42ae28d027..fd827530afec 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c | |||
@@ -2563,7 +2563,8 @@ xfs_da_get_buf( | |||
2563 | mapp, nmap, 0); | 2563 | mapp, nmap, 0); |
2564 | error = bp ? bp->b_error : -EIO; | 2564 | error = bp ? bp->b_error : -EIO; |
2565 | if (error) { | 2565 | if (error) { |
2566 | xfs_trans_brelse(trans, bp); | 2566 | if (bp) |
2567 | xfs_trans_brelse(trans, bp); | ||
2567 | goto out_free; | 2568 | goto out_free; |
2568 | } | 2569 | } |
2569 | 2570 | ||
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c index c9aee52a37e2..7e42fdfd2f1d 100644 --- a/fs/xfs/libxfs/xfs_da_format.c +++ b/fs/xfs/libxfs/xfs_da_format.c | |||
@@ -270,7 +270,6 @@ xfs_dir3_data_get_ftype( | |||
270 | { | 270 | { |
271 | __uint8_t ftype = dep->name[dep->namelen]; | 271 | __uint8_t ftype = dep->name[dep->namelen]; |
272 | 272 | ||
273 | ASSERT(ftype < XFS_DIR3_FT_MAX); | ||
274 | if (ftype >= XFS_DIR3_FT_MAX) | 273 | if (ftype >= XFS_DIR3_FT_MAX) |
275 | return XFS_DIR3_FT_UNKNOWN; | 274 | return XFS_DIR3_FT_UNKNOWN; |
276 | return ftype; | 275 | return ftype; |
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 6cef22152fd6..7075aaf131f4 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c | |||
@@ -237,7 +237,8 @@ xfs_dir_init( | |||
237 | } | 237 | } |
238 | 238 | ||
239 | /* | 239 | /* |
240 | Enter a name in a directory. | 240 | * Enter a name in a directory, or check for available space. |
241 | * If inum is 0, only the available space test is performed. | ||
241 | */ | 242 | */ |
242 | int | 243 | int |
243 | xfs_dir_createname( | 244 | xfs_dir_createname( |
@@ -254,10 +255,12 @@ xfs_dir_createname( | |||
254 | int v; /* type-checking value */ | 255 | int v; /* type-checking value */ |
255 | 256 | ||
256 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 257 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
257 | rval = xfs_dir_ino_validate(tp->t_mountp, inum); | 258 | if (inum) { |
258 | if (rval) | 259 | rval = xfs_dir_ino_validate(tp->t_mountp, inum); |
259 | return rval; | 260 | if (rval) |
260 | XFS_STATS_INC(xs_dir_create); | 261 | return rval; |
262 | XFS_STATS_INC(xs_dir_create); | ||
263 | } | ||
261 | 264 | ||
262 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | 265 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); |
263 | if (!args) | 266 | if (!args) |
@@ -276,6 +279,8 @@ xfs_dir_createname( | |||
276 | args->whichfork = XFS_DATA_FORK; | 279 | args->whichfork = XFS_DATA_FORK; |
277 | args->trans = tp; | 280 | args->trans = tp; |
278 | args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; | 281 | args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; |
282 | if (!inum) | ||
283 | args->op_flags |= XFS_DA_OP_JUSTCHECK; | ||
279 | 284 | ||
280 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { | 285 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
281 | rval = xfs_dir2_sf_addname(args); | 286 | rval = xfs_dir2_sf_addname(args); |
@@ -535,62 +540,14 @@ out_free: | |||
535 | 540 | ||
536 | /* | 541 | /* |
537 | * See if this entry can be added to the directory without allocating space. | 542 | * See if this entry can be added to the directory without allocating space. |
538 | * First checks that the caller couldn't reserve enough space (resblks = 0). | ||
539 | */ | 543 | */ |
540 | int | 544 | int |
541 | xfs_dir_canenter( | 545 | xfs_dir_canenter( |
542 | xfs_trans_t *tp, | 546 | xfs_trans_t *tp, |
543 | xfs_inode_t *dp, | 547 | xfs_inode_t *dp, |
544 | struct xfs_name *name, /* name of entry to add */ | 548 | struct xfs_name *name) /* name of entry to add */ |
545 | uint resblks) | ||
546 | { | 549 | { |
547 | struct xfs_da_args *args; | 550 | return xfs_dir_createname(tp, dp, name, 0, NULL, NULL, 0); |
548 | int rval; | ||
549 | int v; /* type-checking value */ | ||
550 | |||
551 | if (resblks) | ||
552 | return 0; | ||
553 | |||
554 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | ||
555 | |||
556 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | ||
557 | if (!args) | ||
558 | return -ENOMEM; | ||
559 | |||
560 | args->geo = dp->i_mount->m_dir_geo; | ||
561 | args->name = name->name; | ||
562 | args->namelen = name->len; | ||
563 | args->filetype = name->type; | ||
564 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); | ||
565 | args->dp = dp; | ||
566 | args->whichfork = XFS_DATA_FORK; | ||
567 | args->trans = tp; | ||
568 | args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | | ||
569 | XFS_DA_OP_OKNOENT; | ||
570 | |||
571 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { | ||
572 | rval = xfs_dir2_sf_addname(args); | ||
573 | goto out_free; | ||
574 | } | ||
575 | |||
576 | rval = xfs_dir2_isblock(args, &v); | ||
577 | if (rval) | ||
578 | goto out_free; | ||
579 | if (v) { | ||
580 | rval = xfs_dir2_block_addname(args); | ||
581 | goto out_free; | ||
582 | } | ||
583 | |||
584 | rval = xfs_dir2_isleaf(args, &v); | ||
585 | if (rval) | ||
586 | goto out_free; | ||
587 | if (v) | ||
588 | rval = xfs_dir2_leaf_addname(args); | ||
589 | else | ||
590 | rval = xfs_dir2_node_addname(args); | ||
591 | out_free: | ||
592 | kmem_free(args); | ||
593 | return rval; | ||
594 | } | 551 | } |
595 | 552 | ||
596 | /* | 553 | /* |
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index c8e86b0b5e99..4dff261e6ed5 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h | |||
@@ -136,7 +136,7 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, | |||
136 | xfs_fsblock_t *first, | 136 | xfs_fsblock_t *first, |
137 | struct xfs_bmap_free *flist, xfs_extlen_t tot); | 137 | struct xfs_bmap_free *flist, xfs_extlen_t tot); |
138 | extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, | 138 | extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, |
139 | struct xfs_name *name, uint resblks); | 139 | struct xfs_name *name); |
140 | 140 | ||
141 | /* | 141 | /* |
142 | * Direct call from the bmap code, bypassing the generic directory layer. | 142 | * Direct call from the bmap code, bypassing the generic directory layer. |
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index b62771f1f4b5..23dcb72fc5e6 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c | |||
@@ -1076,8 +1076,8 @@ xfs_dialloc_ag_finobt_newino( | |||
1076 | int i; | 1076 | int i; |
1077 | 1077 | ||
1078 | if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { | 1078 | if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { |
1079 | error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, | 1079 | error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), |
1080 | &i); | 1080 | XFS_LOOKUP_EQ, &i); |
1081 | if (error) | 1081 | if (error) |
1082 | return error; | 1082 | return error; |
1083 | if (i == 1) { | 1083 | if (i == 1) { |
@@ -1085,7 +1085,6 @@ xfs_dialloc_ag_finobt_newino( | |||
1085 | if (error) | 1085 | if (error) |
1086 | return error; | 1086 | return error; |
1087 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 1087 | XFS_WANT_CORRUPTED_RETURN(i == 1); |
1088 | |||
1089 | return 0; | 1088 | return 0; |
1090 | } | 1089 | } |
1091 | } | 1090 | } |
@@ -2051,6 +2050,8 @@ xfs_agi_verify( | |||
2051 | if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) | 2050 | if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) |
2052 | return false; | 2051 | return false; |
2053 | 2052 | ||
2053 | if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) | ||
2054 | return false; | ||
2054 | /* | 2055 | /* |
2055 | * during growfs operations, the perag is not fully initialised, | 2056 | * during growfs operations, the perag is not fully initialised, |
2056 | * so we can't use it for any useful checking. growfs ensures we can't | 2057 | * so we can't use it for any useful checking. growfs ensures we can't |
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index f4dd697cac08..7c818f1e4484 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c | |||
@@ -424,20 +424,24 @@ xfs_rtfind_forw( | |||
424 | } | 424 | } |
425 | 425 | ||
426 | /* | 426 | /* |
427 | * Read and modify the summary information for a given extent size, | 427 | * Read and/or modify the summary information for a given extent size, |
428 | * bitmap block combination. | 428 | * bitmap block combination. |
429 | * Keeps track of a current summary block, so we don't keep reading | 429 | * Keeps track of a current summary block, so we don't keep reading |
430 | * it from the buffer cache. | 430 | * it from the buffer cache. |
431 | * | ||
432 | * Summary information is returned in *sum if specified. | ||
433 | * If no delta is specified, returns summary only. | ||
431 | */ | 434 | */ |
432 | int | 435 | int |
433 | xfs_rtmodify_summary( | 436 | xfs_rtmodify_summary_int( |
434 | xfs_mount_t *mp, /* file system mount point */ | 437 | xfs_mount_t *mp, /* file system mount structure */ |
435 | xfs_trans_t *tp, /* transaction pointer */ | 438 | xfs_trans_t *tp, /* transaction pointer */ |
436 | int log, /* log2 of extent size */ | 439 | int log, /* log2 of extent size */ |
437 | xfs_rtblock_t bbno, /* bitmap block number */ | 440 | xfs_rtblock_t bbno, /* bitmap block number */ |
438 | int delta, /* change to make to summary info */ | 441 | int delta, /* change to make to summary info */ |
439 | xfs_buf_t **rbpp, /* in/out: summary block buffer */ | 442 | xfs_buf_t **rbpp, /* in/out: summary block buffer */ |
440 | xfs_fsblock_t *rsb) /* in/out: summary block number */ | 443 | xfs_fsblock_t *rsb, /* in/out: summary block number */ |
444 | xfs_suminfo_t *sum) /* out: summary info for this block */ | ||
441 | { | 445 | { |
442 | xfs_buf_t *bp; /* buffer for the summary block */ | 446 | xfs_buf_t *bp; /* buffer for the summary block */ |
443 | int error; /* error value */ | 447 | int error; /* error value */ |
@@ -456,7 +460,7 @@ xfs_rtmodify_summary( | |||
456 | /* | 460 | /* |
457 | * If we have an old buffer, and the block number matches, use that. | 461 | * If we have an old buffer, and the block number matches, use that. |
458 | */ | 462 | */ |
459 | if (rbpp && *rbpp && *rsb == sb) | 463 | if (*rbpp && *rsb == sb) |
460 | bp = *rbpp; | 464 | bp = *rbpp; |
461 | /* | 465 | /* |
462 | * Otherwise we have to get the buffer. | 466 | * Otherwise we have to get the buffer. |
@@ -465,7 +469,7 @@ xfs_rtmodify_summary( | |||
465 | /* | 469 | /* |
466 | * If there was an old one, get rid of it first. | 470 | * If there was an old one, get rid of it first. |
467 | */ | 471 | */ |
468 | if (rbpp && *rbpp) | 472 | if (*rbpp) |
469 | xfs_trans_brelse(tp, *rbpp); | 473 | xfs_trans_brelse(tp, *rbpp); |
470 | error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); | 474 | error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); |
471 | if (error) { | 475 | if (error) { |
@@ -474,21 +478,38 @@ xfs_rtmodify_summary( | |||
474 | /* | 478 | /* |
475 | * Remember this buffer and block for the next call. | 479 | * Remember this buffer and block for the next call. |
476 | */ | 480 | */ |
477 | if (rbpp) { | 481 | *rbpp = bp; |
478 | *rbpp = bp; | 482 | *rsb = sb; |
479 | *rsb = sb; | ||
480 | } | ||
481 | } | 483 | } |
482 | /* | 484 | /* |
483 | * Point to the summary information, modify and log it. | 485 | * Point to the summary information, modify/log it, and/or copy it out. |
484 | */ | 486 | */ |
485 | sp = XFS_SUMPTR(mp, bp, so); | 487 | sp = XFS_SUMPTR(mp, bp, so); |
486 | *sp += delta; | 488 | if (delta) { |
487 | xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), | 489 | uint first = (uint)((char *)sp - (char *)bp->b_addr); |
488 | (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); | 490 | |
491 | *sp += delta; | ||
492 | xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1); | ||
493 | } | ||
494 | if (sum) | ||
495 | *sum = *sp; | ||
489 | return 0; | 496 | return 0; |
490 | } | 497 | } |
491 | 498 | ||
499 | int | ||
500 | xfs_rtmodify_summary( | ||
501 | xfs_mount_t *mp, /* file system mount structure */ | ||
502 | xfs_trans_t *tp, /* transaction pointer */ | ||
503 | int log, /* log2 of extent size */ | ||
504 | xfs_rtblock_t bbno, /* bitmap block number */ | ||
505 | int delta, /* change to make to summary info */ | ||
506 | xfs_buf_t **rbpp, /* in/out: summary block buffer */ | ||
507 | xfs_fsblock_t *rsb) /* in/out: summary block number */ | ||
508 | { | ||
509 | return xfs_rtmodify_summary_int(mp, tp, log, bbno, | ||
510 | delta, rbpp, rsb, NULL); | ||
511 | } | ||
512 | |||
492 | /* | 513 | /* |
493 | * Set the given range of bitmap bits to the given value. | 514 | * Set the given range of bitmap bits to the given value. |
494 | * Do whatever I/O and logging is required. | 515 | * Do whatever I/O and logging is required. |
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index ad525a5623a4..5f902fa7913f 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c | |||
@@ -279,11 +279,13 @@ xfs_mount_validate_sb( | |||
279 | sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || | 279 | sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || |
280 | sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || | 280 | sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || |
281 | sbp->sb_blocksize != (1 << sbp->sb_blocklog) || | 281 | sbp->sb_blocksize != (1 << sbp->sb_blocklog) || |
282 | sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || | ||
282 | sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || | 283 | sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || |
283 | sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || | 284 | sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || |
284 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || | 285 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || |
285 | sbp->sb_inodelog > XFS_DINODE_MAX_LOG || | 286 | sbp->sb_inodelog > XFS_DINODE_MAX_LOG || |
286 | sbp->sb_inodesize != (1 << sbp->sb_inodelog) || | 287 | sbp->sb_inodesize != (1 << sbp->sb_inodelog) || |
288 | sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || | ||
287 | sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || | 289 | sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || |
288 | (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || | 290 | (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || |
289 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || | 291 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || |
@@ -443,6 +445,8 @@ __xfs_sb_from_disk( | |||
443 | to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat); | 445 | to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat); |
444 | to->sb_features_log_incompat = | 446 | to->sb_features_log_incompat = |
445 | be32_to_cpu(from->sb_features_log_incompat); | 447 | be32_to_cpu(from->sb_features_log_incompat); |
448 | /* crc is only used on disk, not in memory; just init to 0 here. */ | ||
449 | to->sb_crc = 0; | ||
446 | to->sb_pad = 0; | 450 | to->sb_pad = 0; |
447 | to->sb_pquotino = be64_to_cpu(from->sb_pquotino); | 451 | to->sb_pquotino = be64_to_cpu(from->sb_pquotino); |
448 | to->sb_lsn = be64_to_cpu(from->sb_lsn); | 452 | to->sb_lsn = be64_to_cpu(from->sb_lsn); |
@@ -548,6 +552,9 @@ xfs_sb_to_disk( | |||
548 | if (!fields) | 552 | if (!fields) |
549 | return; | 553 | return; |
550 | 554 | ||
555 | /* We should never write the crc here, it's updated in the IO path */ | ||
556 | fields &= ~XFS_SB_CRC; | ||
557 | |||
551 | xfs_sb_quota_to_disk(to, from, &fields); | 558 | xfs_sb_quota_to_disk(to, from, &fields); |
552 | while (fields) { | 559 | while (fields) { |
553 | f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); | 560 | f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); |
diff --git a/fs/xfs/time.h b/fs/xfs/time.h deleted file mode 100644 index 387e695a184c..000000000000 --- a/fs/xfs/time.h +++ /dev/null | |||
@@ -1,36 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_SUPPORT_TIME_H__ | ||
19 | #define __XFS_SUPPORT_TIME_H__ | ||
20 | |||
21 | #include <linux/sched.h> | ||
22 | #include <linux/time.h> | ||
23 | |||
24 | typedef struct timespec timespec_t; | ||
25 | |||
26 | static inline void delay(long ticks) | ||
27 | { | ||
28 | schedule_timeout_uninterruptible(ticks); | ||
29 | } | ||
30 | |||
31 | static inline void nanotime(struct timespec *tvp) | ||
32 | { | ||
33 | *tvp = CURRENT_TIME; | ||
34 | } | ||
35 | |||
36 | #endif /* __XFS_SUPPORT_TIME_H__ */ | ||
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index b984647c24db..f5b2453a43b2 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -434,10 +434,22 @@ xfs_start_page_writeback( | |||
434 | { | 434 | { |
435 | ASSERT(PageLocked(page)); | 435 | ASSERT(PageLocked(page)); |
436 | ASSERT(!PageWriteback(page)); | 436 | ASSERT(!PageWriteback(page)); |
437 | if (clear_dirty) | 437 | |
438 | /* | ||
439 | * if the page was not fully cleaned, we need to ensure that the higher | ||
440 | * layers come back to it correctly. That means we need to keep the page | ||
441 | * dirty, and for WB_SYNC_ALL writeback we need to ensure the | ||
442 | * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to | ||
443 | * write this page in this writeback sweep will be made. | ||
444 | */ | ||
445 | if (clear_dirty) { | ||
438 | clear_page_dirty_for_io(page); | 446 | clear_page_dirty_for_io(page); |
439 | set_page_writeback(page); | 447 | set_page_writeback(page); |
448 | } else | ||
449 | set_page_writeback_keepwrite(page); | ||
450 | |||
440 | unlock_page(page); | 451 | unlock_page(page); |
452 | |||
441 | /* If no buffers on the page are to be written, finish it here */ | 453 | /* If no buffers on the page are to be written, finish it here */ |
442 | if (!buffers) | 454 | if (!buffers) |
443 | end_page_writeback(page); | 455 | end_page_writeback(page); |
@@ -548,6 +560,13 @@ xfs_cancel_ioend( | |||
548 | do { | 560 | do { |
549 | next_bh = bh->b_private; | 561 | next_bh = bh->b_private; |
550 | clear_buffer_async_write(bh); | 562 | clear_buffer_async_write(bh); |
563 | /* | ||
564 | * The unwritten flag is cleared when added to the | ||
565 | * ioend. We're not submitting for I/O so mark the | ||
566 | * buffer unwritten again for next time around. | ||
567 | */ | ||
568 | if (ioend->io_type == XFS_IO_UNWRITTEN) | ||
569 | set_buffer_unwritten(bh); | ||
551 | unlock_buffer(bh); | 570 | unlock_buffer(bh); |
552 | } while ((bh = next_bh) != NULL); | 571 | } while ((bh = next_bh) != NULL); |
553 | 572 | ||
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 1707980f9a4b..92e8f99a5857 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -1122,14 +1122,6 @@ xfs_zero_remaining_bytes( | |||
1122 | if (endoff > XFS_ISIZE(ip)) | 1122 | if (endoff > XFS_ISIZE(ip)) |
1123 | endoff = XFS_ISIZE(ip); | 1123 | endoff = XFS_ISIZE(ip); |
1124 | 1124 | ||
1125 | bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? | ||
1126 | mp->m_rtdev_targp : mp->m_ddev_targp, | ||
1127 | BTOBB(mp->m_sb.sb_blocksize), 0); | ||
1128 | if (!bp) | ||
1129 | return -ENOMEM; | ||
1130 | |||
1131 | xfs_buf_unlock(bp); | ||
1132 | |||
1133 | for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { | 1125 | for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { |
1134 | uint lock_mode; | 1126 | uint lock_mode; |
1135 | 1127 | ||
@@ -1152,42 +1144,24 @@ xfs_zero_remaining_bytes( | |||
1152 | ASSERT(imap.br_startblock != DELAYSTARTBLOCK); | 1144 | ASSERT(imap.br_startblock != DELAYSTARTBLOCK); |
1153 | if (imap.br_state == XFS_EXT_UNWRITTEN) | 1145 | if (imap.br_state == XFS_EXT_UNWRITTEN) |
1154 | continue; | 1146 | continue; |
1155 | XFS_BUF_UNDONE(bp); | ||
1156 | XFS_BUF_UNWRITE(bp); | ||
1157 | XFS_BUF_READ(bp); | ||
1158 | XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); | ||
1159 | 1147 | ||
1160 | if (XFS_FORCED_SHUTDOWN(mp)) { | 1148 | error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ? |
1161 | error = -EIO; | 1149 | mp->m_rtdev_targp : mp->m_ddev_targp, |
1162 | break; | 1150 | xfs_fsb_to_db(ip, imap.br_startblock), |
1163 | } | 1151 | BTOBB(mp->m_sb.sb_blocksize), |
1164 | xfs_buf_iorequest(bp); | 1152 | 0, &bp, NULL); |
1165 | error = xfs_buf_iowait(bp); | 1153 | if (error) |
1166 | if (error) { | 1154 | return error; |
1167 | xfs_buf_ioerror_alert(bp, | 1155 | |
1168 | "xfs_zero_remaining_bytes(read)"); | ||
1169 | break; | ||
1170 | } | ||
1171 | memset(bp->b_addr + | 1156 | memset(bp->b_addr + |
1172 | (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), | 1157 | (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), |
1173 | 0, lastoffset - offset + 1); | 1158 | 0, lastoffset - offset + 1); |
1174 | XFS_BUF_UNDONE(bp); | 1159 | |
1175 | XFS_BUF_UNREAD(bp); | 1160 | error = xfs_bwrite(bp); |
1176 | XFS_BUF_WRITE(bp); | 1161 | xfs_buf_relse(bp); |
1177 | 1162 | if (error) | |
1178 | if (XFS_FORCED_SHUTDOWN(mp)) { | 1163 | return error; |
1179 | error = -EIO; | ||
1180 | break; | ||
1181 | } | ||
1182 | xfs_buf_iorequest(bp); | ||
1183 | error = xfs_buf_iowait(bp); | ||
1184 | if (error) { | ||
1185 | xfs_buf_ioerror_alert(bp, | ||
1186 | "xfs_zero_remaining_bytes(write)"); | ||
1187 | break; | ||
1188 | } | ||
1189 | } | 1164 | } |
1190 | xfs_buf_free(bp); | ||
1191 | return error; | 1165 | return error; |
1192 | } | 1166 | } |
1193 | 1167 | ||
@@ -1205,6 +1179,7 @@ xfs_free_file_space( | |||
1205 | xfs_bmap_free_t free_list; | 1179 | xfs_bmap_free_t free_list; |
1206 | xfs_bmbt_irec_t imap; | 1180 | xfs_bmbt_irec_t imap; |
1207 | xfs_off_t ioffset; | 1181 | xfs_off_t ioffset; |
1182 | xfs_off_t iendoffset; | ||
1208 | xfs_extlen_t mod=0; | 1183 | xfs_extlen_t mod=0; |
1209 | xfs_mount_t *mp; | 1184 | xfs_mount_t *mp; |
1210 | int nimap; | 1185 | int nimap; |
@@ -1233,12 +1208,13 @@ xfs_free_file_space( | |||
1233 | inode_dio_wait(VFS_I(ip)); | 1208 | inode_dio_wait(VFS_I(ip)); |
1234 | 1209 | ||
1235 | rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); | 1210 | rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); |
1236 | ioffset = offset & ~(rounding - 1); | 1211 | ioffset = round_down(offset, rounding); |
1237 | error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | 1212 | iendoffset = round_up(offset + len, rounding) - 1; |
1238 | ioffset, -1); | 1213 | error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, |
1214 | iendoffset); | ||
1239 | if (error) | 1215 | if (error) |
1240 | goto out; | 1216 | goto out; |
1241 | truncate_pagecache_range(VFS_I(ip), ioffset, -1); | 1217 | truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset); |
1242 | 1218 | ||
1243 | /* | 1219 | /* |
1244 | * Need to zero the stuff we're not freeing, on disk. | 1220 | * Need to zero the stuff we're not freeing, on disk. |
@@ -1392,14 +1368,14 @@ xfs_zero_file_space( | |||
1392 | 1368 | ||
1393 | if (start_boundary < end_boundary - 1) { | 1369 | if (start_boundary < end_boundary - 1) { |
1394 | /* | 1370 | /* |
1395 | * punch out delayed allocation blocks and the page cache over | 1371 | * Writeback the range to ensure any inode size updates due to |
1396 | * the conversion range | 1372 | * appending writes make it to disk (otherwise we could just |
1373 | * punch out the delalloc blocks). | ||
1397 | */ | 1374 | */ |
1398 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1375 | error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
1399 | error = xfs_bmap_punch_delalloc_range(ip, | 1376 | start_boundary, end_boundary - 1); |
1400 | XFS_B_TO_FSBT(mp, start_boundary), | 1377 | if (error) |
1401 | XFS_B_TO_FSB(mp, end_boundary - start_boundary)); | 1378 | goto out; |
1402 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1403 | truncate_pagecache_range(VFS_I(ip), start_boundary, | 1379 | truncate_pagecache_range(VFS_I(ip), start_boundary, |
1404 | end_boundary - 1); | 1380 | end_boundary - 1); |
1405 | 1381 | ||
@@ -1456,41 +1432,47 @@ xfs_collapse_file_space( | |||
1456 | struct xfs_mount *mp = ip->i_mount; | 1432 | struct xfs_mount *mp = ip->i_mount; |
1457 | struct xfs_trans *tp; | 1433 | struct xfs_trans *tp; |
1458 | int error; | 1434 | int error; |
1459 | xfs_extnum_t current_ext = 0; | ||
1460 | struct xfs_bmap_free free_list; | 1435 | struct xfs_bmap_free free_list; |
1461 | xfs_fsblock_t first_block; | 1436 | xfs_fsblock_t first_block; |
1462 | int committed; | 1437 | int committed; |
1463 | xfs_fileoff_t start_fsb; | 1438 | xfs_fileoff_t start_fsb; |
1439 | xfs_fileoff_t next_fsb; | ||
1464 | xfs_fileoff_t shift_fsb; | 1440 | xfs_fileoff_t shift_fsb; |
1465 | 1441 | ||
1466 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | 1442 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); |
1467 | 1443 | ||
1468 | trace_xfs_collapse_file_space(ip); | 1444 | trace_xfs_collapse_file_space(ip); |
1469 | 1445 | ||
1470 | start_fsb = XFS_B_TO_FSB(mp, offset + len); | 1446 | next_fsb = XFS_B_TO_FSB(mp, offset + len); |
1471 | shift_fsb = XFS_B_TO_FSB(mp, len); | 1447 | shift_fsb = XFS_B_TO_FSB(mp, len); |
1472 | 1448 | ||
1473 | /* | 1449 | error = xfs_free_file_space(ip, offset, len); |
1474 | * Writeback the entire file and force remove any post-eof blocks. The | ||
1475 | * writeback prevents changes to the extent list via concurrent | ||
1476 | * writeback and the eofblocks trim prevents the extent shift algorithm | ||
1477 | * from running into a post-eof delalloc extent. | ||
1478 | * | ||
1479 | * XXX: This is a temporary fix until the extent shift loop below is | ||
1480 | * converted to use offsets and lookups within the ILOCK rather than | ||
1481 | * carrying around the index into the extent list for the next | ||
1482 | * iteration. | ||
1483 | */ | ||
1484 | error = filemap_write_and_wait(VFS_I(ip)->i_mapping); | ||
1485 | if (error) | 1450 | if (error) |
1486 | return error; | 1451 | return error; |
1452 | |||
1453 | /* | ||
1454 | * Trim eofblocks to avoid shifting uninitialized post-eof preallocation | ||
1455 | * into the accessible region of the file. | ||
1456 | */ | ||
1487 | if (xfs_can_free_eofblocks(ip, true)) { | 1457 | if (xfs_can_free_eofblocks(ip, true)) { |
1488 | error = xfs_free_eofblocks(mp, ip, false); | 1458 | error = xfs_free_eofblocks(mp, ip, false); |
1489 | if (error) | 1459 | if (error) |
1490 | return error; | 1460 | return error; |
1491 | } | 1461 | } |
1492 | 1462 | ||
1493 | error = xfs_free_file_space(ip, offset, len); | 1463 | /* |
1464 | * Writeback and invalidate cache for the remainder of the file as we're | ||
1465 | * about to shift down every extent from the collapse range to EOF. The | ||
1466 | * free of the collapse range above might have already done some of | ||
1467 | * this, but we shouldn't rely on it to do anything outside of the range | ||
1468 | * that was freed. | ||
1469 | */ | ||
1470 | error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | ||
1471 | offset + len, -1); | ||
1472 | if (error) | ||
1473 | return error; | ||
1474 | error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, | ||
1475 | (offset + len) >> PAGE_CACHE_SHIFT, -1); | ||
1494 | if (error) | 1476 | if (error) |
1495 | return error; | 1477 | return error; |
1496 | 1478 | ||
@@ -1525,10 +1507,10 @@ xfs_collapse_file_space( | |||
1525 | * We are using the write transaction in which max 2 bmbt | 1507 | * We are using the write transaction in which max 2 bmbt |
1526 | * updates are allowed | 1508 | * updates are allowed |
1527 | */ | 1509 | */ |
1528 | error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb, | 1510 | start_fsb = next_fsb; |
1529 | shift_fsb, ¤t_ext, | 1511 | error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb, |
1530 | &first_block, &free_list, | 1512 | &done, &next_fsb, &first_block, &free_list, |
1531 | XFS_BMAP_MAX_SHIFT_EXTENTS); | 1513 | XFS_BMAP_MAX_SHIFT_EXTENTS); |
1532 | if (error) | 1514 | if (error) |
1533 | goto out; | 1515 | goto out; |
1534 | 1516 | ||
@@ -1638,7 +1620,7 @@ xfs_swap_extents_check_format( | |||
1638 | return 0; | 1620 | return 0; |
1639 | } | 1621 | } |
1640 | 1622 | ||
1641 | int | 1623 | static int |
1642 | xfs_swap_extent_flush( | 1624 | xfs_swap_extent_flush( |
1643 | struct xfs_inode *ip) | 1625 | struct xfs_inode *ip) |
1644 | { | 1626 | { |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index cd7b8ca9b064..017b6afe340b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -623,10 +623,11 @@ _xfs_buf_read( | |||
623 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); | 623 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); |
624 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); | 624 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); |
625 | 625 | ||
626 | xfs_buf_iorequest(bp); | 626 | if (flags & XBF_ASYNC) { |
627 | if (flags & XBF_ASYNC) | 627 | xfs_buf_submit(bp); |
628 | return 0; | 628 | return 0; |
629 | return xfs_buf_iowait(bp); | 629 | } |
630 | return xfs_buf_submit_wait(bp); | ||
630 | } | 631 | } |
631 | 632 | ||
632 | xfs_buf_t * | 633 | xfs_buf_t * |
@@ -687,34 +688,39 @@ xfs_buf_readahead_map( | |||
687 | * Read an uncached buffer from disk. Allocates and returns a locked | 688 | * Read an uncached buffer from disk. Allocates and returns a locked |
688 | * buffer containing the disk contents or nothing. | 689 | * buffer containing the disk contents or nothing. |
689 | */ | 690 | */ |
690 | struct xfs_buf * | 691 | int |
691 | xfs_buf_read_uncached( | 692 | xfs_buf_read_uncached( |
692 | struct xfs_buftarg *target, | 693 | struct xfs_buftarg *target, |
693 | xfs_daddr_t daddr, | 694 | xfs_daddr_t daddr, |
694 | size_t numblks, | 695 | size_t numblks, |
695 | int flags, | 696 | int flags, |
697 | struct xfs_buf **bpp, | ||
696 | const struct xfs_buf_ops *ops) | 698 | const struct xfs_buf_ops *ops) |
697 | { | 699 | { |
698 | struct xfs_buf *bp; | 700 | struct xfs_buf *bp; |
699 | 701 | ||
702 | *bpp = NULL; | ||
703 | |||
700 | bp = xfs_buf_get_uncached(target, numblks, flags); | 704 | bp = xfs_buf_get_uncached(target, numblks, flags); |
701 | if (!bp) | 705 | if (!bp) |
702 | return NULL; | 706 | return -ENOMEM; |
703 | 707 | ||
704 | /* set up the buffer for a read IO */ | 708 | /* set up the buffer for a read IO */ |
705 | ASSERT(bp->b_map_count == 1); | 709 | ASSERT(bp->b_map_count == 1); |
706 | bp->b_bn = daddr; | 710 | bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */ |
707 | bp->b_maps[0].bm_bn = daddr; | 711 | bp->b_maps[0].bm_bn = daddr; |
708 | bp->b_flags |= XBF_READ; | 712 | bp->b_flags |= XBF_READ; |
709 | bp->b_ops = ops; | 713 | bp->b_ops = ops; |
710 | 714 | ||
711 | if (XFS_FORCED_SHUTDOWN(target->bt_mount)) { | 715 | xfs_buf_submit_wait(bp); |
716 | if (bp->b_error) { | ||
717 | int error = bp->b_error; | ||
712 | xfs_buf_relse(bp); | 718 | xfs_buf_relse(bp); |
713 | return NULL; | 719 | return error; |
714 | } | 720 | } |
715 | xfs_buf_iorequest(bp); | 721 | |
716 | xfs_buf_iowait(bp); | 722 | *bpp = bp; |
717 | return bp; | 723 | return 0; |
718 | } | 724 | } |
719 | 725 | ||
720 | /* | 726 | /* |
@@ -998,53 +1004,56 @@ xfs_buf_wait_unpin( | |||
998 | * Buffer Utility Routines | 1004 | * Buffer Utility Routines |
999 | */ | 1005 | */ |
1000 | 1006 | ||
1001 | STATIC void | 1007 | void |
1002 | xfs_buf_iodone_work( | 1008 | xfs_buf_ioend( |
1003 | struct work_struct *work) | 1009 | struct xfs_buf *bp) |
1004 | { | 1010 | { |
1005 | struct xfs_buf *bp = | 1011 | bool read = bp->b_flags & XBF_READ; |
1006 | container_of(work, xfs_buf_t, b_iodone_work); | 1012 | |
1007 | bool read = !!(bp->b_flags & XBF_READ); | 1013 | trace_xfs_buf_iodone(bp, _RET_IP_); |
1008 | 1014 | ||
1009 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); | 1015 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); |
1010 | 1016 | ||
1011 | /* only validate buffers that were read without errors */ | 1017 | /* |
1012 | if (read && bp->b_ops && !bp->b_error && (bp->b_flags & XBF_DONE)) | 1018 | * Pull in IO completion errors now. We are guaranteed to be running |
1019 | * single threaded, so we don't need the lock to read b_io_error. | ||
1020 | */ | ||
1021 | if (!bp->b_error && bp->b_io_error) | ||
1022 | xfs_buf_ioerror(bp, bp->b_io_error); | ||
1023 | |||
1024 | /* Only validate buffers that were read without errors */ | ||
1025 | if (read && !bp->b_error && bp->b_ops) { | ||
1026 | ASSERT(!bp->b_iodone); | ||
1013 | bp->b_ops->verify_read(bp); | 1027 | bp->b_ops->verify_read(bp); |
1028 | } | ||
1029 | |||
1030 | if (!bp->b_error) | ||
1031 | bp->b_flags |= XBF_DONE; | ||
1014 | 1032 | ||
1015 | if (bp->b_iodone) | 1033 | if (bp->b_iodone) |
1016 | (*(bp->b_iodone))(bp); | 1034 | (*(bp->b_iodone))(bp); |
1017 | else if (bp->b_flags & XBF_ASYNC) | 1035 | else if (bp->b_flags & XBF_ASYNC) |
1018 | xfs_buf_relse(bp); | 1036 | xfs_buf_relse(bp); |
1019 | else { | 1037 | else |
1020 | ASSERT(read && bp->b_ops); | ||
1021 | complete(&bp->b_iowait); | 1038 | complete(&bp->b_iowait); |
1022 | } | ||
1023 | } | 1039 | } |
1024 | 1040 | ||
1025 | void | 1041 | static void |
1026 | xfs_buf_ioend( | 1042 | xfs_buf_ioend_work( |
1027 | struct xfs_buf *bp, | 1043 | struct work_struct *work) |
1028 | int schedule) | ||
1029 | { | 1044 | { |
1030 | bool read = !!(bp->b_flags & XBF_READ); | 1045 | struct xfs_buf *bp = |
1031 | 1046 | container_of(work, xfs_buf_t, b_iodone_work); | |
1032 | trace_xfs_buf_iodone(bp, _RET_IP_); | ||
1033 | 1047 | ||
1034 | if (bp->b_error == 0) | 1048 | xfs_buf_ioend(bp); |
1035 | bp->b_flags |= XBF_DONE; | 1049 | } |
1036 | 1050 | ||
1037 | if (bp->b_iodone || (read && bp->b_ops) || (bp->b_flags & XBF_ASYNC)) { | 1051 | void |
1038 | if (schedule) { | 1052 | xfs_buf_ioend_async( |
1039 | INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); | 1053 | struct xfs_buf *bp) |
1040 | queue_work(xfslogd_workqueue, &bp->b_iodone_work); | 1054 | { |
1041 | } else { | 1055 | INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work); |
1042 | xfs_buf_iodone_work(&bp->b_iodone_work); | 1056 | queue_work(xfslogd_workqueue, &bp->b_iodone_work); |
1043 | } | ||
1044 | } else { | ||
1045 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); | ||
1046 | complete(&bp->b_iowait); | ||
1047 | } | ||
1048 | } | 1057 | } |
1049 | 1058 | ||
1050 | void | 1059 | void |
@@ -1067,96 +1076,6 @@ xfs_buf_ioerror_alert( | |||
1067 | (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); | 1076 | (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); |
1068 | } | 1077 | } |
1069 | 1078 | ||
1070 | /* | ||
1071 | * Called when we want to stop a buffer from getting written or read. | ||
1072 | * We attach the EIO error, muck with its flags, and call xfs_buf_ioend | ||
1073 | * so that the proper iodone callbacks get called. | ||
1074 | */ | ||
1075 | STATIC int | ||
1076 | xfs_bioerror( | ||
1077 | xfs_buf_t *bp) | ||
1078 | { | ||
1079 | #ifdef XFSERRORDEBUG | ||
1080 | ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); | ||
1081 | #endif | ||
1082 | |||
1083 | /* | ||
1084 | * No need to wait until the buffer is unpinned, we aren't flushing it. | ||
1085 | */ | ||
1086 | xfs_buf_ioerror(bp, -EIO); | ||
1087 | |||
1088 | /* | ||
1089 | * We're calling xfs_buf_ioend, so delete XBF_DONE flag. | ||
1090 | */ | ||
1091 | XFS_BUF_UNREAD(bp); | ||
1092 | XFS_BUF_UNDONE(bp); | ||
1093 | xfs_buf_stale(bp); | ||
1094 | |||
1095 | xfs_buf_ioend(bp, 0); | ||
1096 | |||
1097 | return -EIO; | ||
1098 | } | ||
1099 | |||
1100 | /* | ||
1101 | * Same as xfs_bioerror, except that we are releasing the buffer | ||
1102 | * here ourselves, and avoiding the xfs_buf_ioend call. | ||
1103 | * This is meant for userdata errors; metadata bufs come with | ||
1104 | * iodone functions attached, so that we can track down errors. | ||
1105 | */ | ||
1106 | int | ||
1107 | xfs_bioerror_relse( | ||
1108 | struct xfs_buf *bp) | ||
1109 | { | ||
1110 | int64_t fl = bp->b_flags; | ||
1111 | /* | ||
1112 | * No need to wait until the buffer is unpinned. | ||
1113 | * We aren't flushing it. | ||
1114 | * | ||
1115 | * chunkhold expects B_DONE to be set, whether | ||
1116 | * we actually finish the I/O or not. We don't want to | ||
1117 | * change that interface. | ||
1118 | */ | ||
1119 | XFS_BUF_UNREAD(bp); | ||
1120 | XFS_BUF_DONE(bp); | ||
1121 | xfs_buf_stale(bp); | ||
1122 | bp->b_iodone = NULL; | ||
1123 | if (!(fl & XBF_ASYNC)) { | ||
1124 | /* | ||
1125 | * Mark b_error and B_ERROR _both_. | ||
1126 | * Lot's of chunkcache code assumes that. | ||
1127 | * There's no reason to mark error for | ||
1128 | * ASYNC buffers. | ||
1129 | */ | ||
1130 | xfs_buf_ioerror(bp, -EIO); | ||
1131 | complete(&bp->b_iowait); | ||
1132 | } else { | ||
1133 | xfs_buf_relse(bp); | ||
1134 | } | ||
1135 | |||
1136 | return -EIO; | ||
1137 | } | ||
1138 | |||
1139 | STATIC int | ||
1140 | xfs_bdstrat_cb( | ||
1141 | struct xfs_buf *bp) | ||
1142 | { | ||
1143 | if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { | ||
1144 | trace_xfs_bdstrat_shut(bp, _RET_IP_); | ||
1145 | /* | ||
1146 | * Metadata write that didn't get logged but | ||
1147 | * written delayed anyway. These aren't associated | ||
1148 | * with a transaction, and can be ignored. | ||
1149 | */ | ||
1150 | if (!bp->b_iodone && !XFS_BUF_ISREAD(bp)) | ||
1151 | return xfs_bioerror_relse(bp); | ||
1152 | else | ||
1153 | return xfs_bioerror(bp); | ||
1154 | } | ||
1155 | |||
1156 | xfs_buf_iorequest(bp); | ||
1157 | return 0; | ||
1158 | } | ||
1159 | |||
1160 | int | 1079 | int |
1161 | xfs_bwrite( | 1080 | xfs_bwrite( |
1162 | struct xfs_buf *bp) | 1081 | struct xfs_buf *bp) |
@@ -1166,11 +1085,10 @@ xfs_bwrite( | |||
1166 | ASSERT(xfs_buf_islocked(bp)); | 1085 | ASSERT(xfs_buf_islocked(bp)); |
1167 | 1086 | ||
1168 | bp->b_flags |= XBF_WRITE; | 1087 | bp->b_flags |= XBF_WRITE; |
1169 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL); | 1088 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | |
1089 | XBF_WRITE_FAIL | XBF_DONE); | ||
1170 | 1090 | ||
1171 | xfs_bdstrat_cb(bp); | 1091 | error = xfs_buf_submit_wait(bp); |
1172 | |||
1173 | error = xfs_buf_iowait(bp); | ||
1174 | if (error) { | 1092 | if (error) { |
1175 | xfs_force_shutdown(bp->b_target->bt_mount, | 1093 | xfs_force_shutdown(bp->b_target->bt_mount, |
1176 | SHUTDOWN_META_IO_ERROR); | 1094 | SHUTDOWN_META_IO_ERROR); |
@@ -1179,15 +1097,6 @@ xfs_bwrite( | |||
1179 | } | 1097 | } |
1180 | 1098 | ||
1181 | STATIC void | 1099 | STATIC void |
1182 | _xfs_buf_ioend( | ||
1183 | xfs_buf_t *bp, | ||
1184 | int schedule) | ||
1185 | { | ||
1186 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) | ||
1187 | xfs_buf_ioend(bp, schedule); | ||
1188 | } | ||
1189 | |||
1190 | STATIC void | ||
1191 | xfs_buf_bio_end_io( | 1100 | xfs_buf_bio_end_io( |
1192 | struct bio *bio, | 1101 | struct bio *bio, |
1193 | int error) | 1102 | int error) |
@@ -1198,13 +1107,18 @@ xfs_buf_bio_end_io( | |||
1198 | * don't overwrite existing errors - otherwise we can lose errors on | 1107 | * don't overwrite existing errors - otherwise we can lose errors on |
1199 | * buffers that require multiple bios to complete. | 1108 | * buffers that require multiple bios to complete. |
1200 | */ | 1109 | */ |
1201 | if (!bp->b_error) | 1110 | if (error) { |
1202 | xfs_buf_ioerror(bp, error); | 1111 | spin_lock(&bp->b_lock); |
1112 | if (!bp->b_io_error) | ||
1113 | bp->b_io_error = error; | ||
1114 | spin_unlock(&bp->b_lock); | ||
1115 | } | ||
1203 | 1116 | ||
1204 | if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) | 1117 | if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) |
1205 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); | 1118 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); |
1206 | 1119 | ||
1207 | _xfs_buf_ioend(bp, 1); | 1120 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) |
1121 | xfs_buf_ioend_async(bp); | ||
1208 | bio_put(bio); | 1122 | bio_put(bio); |
1209 | } | 1123 | } |
1210 | 1124 | ||
@@ -1283,7 +1197,7 @@ next_chunk: | |||
1283 | } else { | 1197 | } else { |
1284 | /* | 1198 | /* |
1285 | * This is guaranteed not to be the last io reference count | 1199 | * This is guaranteed not to be the last io reference count |
1286 | * because the caller (xfs_buf_iorequest) holds a count itself. | 1200 | * because the caller (xfs_buf_submit) holds a count itself. |
1287 | */ | 1201 | */ |
1288 | atomic_dec(&bp->b_io_remaining); | 1202 | atomic_dec(&bp->b_io_remaining); |
1289 | xfs_buf_ioerror(bp, -EIO); | 1203 | xfs_buf_ioerror(bp, -EIO); |
@@ -1373,53 +1287,131 @@ _xfs_buf_ioapply( | |||
1373 | blk_finish_plug(&plug); | 1287 | blk_finish_plug(&plug); |
1374 | } | 1288 | } |
1375 | 1289 | ||
1290 | /* | ||
1291 | * Asynchronous IO submission path. This transfers the buffer lock ownership and | ||
1292 | * the current reference to the IO. It is not safe to reference the buffer after | ||
1293 | * a call to this function unless the caller holds an additional reference | ||
1294 | * itself. | ||
1295 | */ | ||
1376 | void | 1296 | void |
1377 | xfs_buf_iorequest( | 1297 | xfs_buf_submit( |
1378 | xfs_buf_t *bp) | 1298 | struct xfs_buf *bp) |
1379 | { | 1299 | { |
1380 | trace_xfs_buf_iorequest(bp, _RET_IP_); | 1300 | trace_xfs_buf_submit(bp, _RET_IP_); |
1381 | 1301 | ||
1382 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); | 1302 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); |
1303 | ASSERT(bp->b_flags & XBF_ASYNC); | ||
1304 | |||
1305 | /* on shutdown we stale and complete the buffer immediately */ | ||
1306 | if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { | ||
1307 | xfs_buf_ioerror(bp, -EIO); | ||
1308 | bp->b_flags &= ~XBF_DONE; | ||
1309 | xfs_buf_stale(bp); | ||
1310 | xfs_buf_ioend(bp); | ||
1311 | return; | ||
1312 | } | ||
1383 | 1313 | ||
1384 | if (bp->b_flags & XBF_WRITE) | 1314 | if (bp->b_flags & XBF_WRITE) |
1385 | xfs_buf_wait_unpin(bp); | 1315 | xfs_buf_wait_unpin(bp); |
1316 | |||
1317 | /* clear the internal error state to avoid spurious errors */ | ||
1318 | bp->b_io_error = 0; | ||
1319 | |||
1320 | /* | ||
1321 | * The caller's reference is released during I/O completion. | ||
1322 | * This occurs some time after the last b_io_remaining reference is | ||
1323 | * released, so after we drop our Io reference we have to have some | ||
1324 | * other reference to ensure the buffer doesn't go away from underneath | ||
1325 | * us. Take a direct reference to ensure we have safe access to the | ||
1326 | * buffer until we are finished with it. | ||
1327 | */ | ||
1386 | xfs_buf_hold(bp); | 1328 | xfs_buf_hold(bp); |
1387 | 1329 | ||
1388 | /* | 1330 | /* |
1389 | * Set the count to 1 initially, this will stop an I/O | 1331 | * Set the count to 1 initially, this will stop an I/O completion |
1390 | * completion callout which happens before we have started | 1332 | * callout which happens before we have started all the I/O from calling |
1391 | * all the I/O from calling xfs_buf_ioend too early. | 1333 | * xfs_buf_ioend too early. |
1392 | */ | 1334 | */ |
1393 | atomic_set(&bp->b_io_remaining, 1); | 1335 | atomic_set(&bp->b_io_remaining, 1); |
1394 | _xfs_buf_ioapply(bp); | 1336 | _xfs_buf_ioapply(bp); |
1337 | |||
1395 | /* | 1338 | /* |
1396 | * If _xfs_buf_ioapply failed, we'll get back here with | 1339 | * If _xfs_buf_ioapply failed, we can get back here with only the IO |
1397 | * only the reference we took above. _xfs_buf_ioend will | 1340 | * reference we took above. If we drop it to zero, run completion so |
1398 | * drop it to zero, so we'd better not queue it for later, | 1341 | * that we don't return to the caller with completion still pending. |
1399 | * or we'll free it before it's done. | ||
1400 | */ | 1342 | */ |
1401 | _xfs_buf_ioend(bp, bp->b_error ? 0 : 1); | 1343 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { |
1344 | if (bp->b_error) | ||
1345 | xfs_buf_ioend(bp); | ||
1346 | else | ||
1347 | xfs_buf_ioend_async(bp); | ||
1348 | } | ||
1402 | 1349 | ||
1403 | xfs_buf_rele(bp); | 1350 | xfs_buf_rele(bp); |
1351 | /* Note: it is not safe to reference bp now we've dropped our ref */ | ||
1404 | } | 1352 | } |
1405 | 1353 | ||
1406 | /* | 1354 | /* |
1407 | * Waits for I/O to complete on the buffer supplied. It returns immediately if | 1355 | * Synchronous buffer IO submission path, read or write. |
1408 | * no I/O is pending or there is already a pending error on the buffer, in which | ||
1409 | * case nothing will ever complete. It returns the I/O error code, if any, or | ||
1410 | * 0 if there was no error. | ||
1411 | */ | 1356 | */ |
1412 | int | 1357 | int |
1413 | xfs_buf_iowait( | 1358 | xfs_buf_submit_wait( |
1414 | xfs_buf_t *bp) | 1359 | struct xfs_buf *bp) |
1415 | { | 1360 | { |
1416 | trace_xfs_buf_iowait(bp, _RET_IP_); | 1361 | int error; |
1417 | 1362 | ||
1418 | if (!bp->b_error) | 1363 | trace_xfs_buf_submit_wait(bp, _RET_IP_); |
1419 | wait_for_completion(&bp->b_iowait); | 1364 | |
1365 | ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC))); | ||
1366 | |||
1367 | if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { | ||
1368 | xfs_buf_ioerror(bp, -EIO); | ||
1369 | xfs_buf_stale(bp); | ||
1370 | bp->b_flags &= ~XBF_DONE; | ||
1371 | return -EIO; | ||
1372 | } | ||
1373 | |||
1374 | if (bp->b_flags & XBF_WRITE) | ||
1375 | xfs_buf_wait_unpin(bp); | ||
1376 | |||
1377 | /* clear the internal error state to avoid spurious errors */ | ||
1378 | bp->b_io_error = 0; | ||
1379 | |||
1380 | /* | ||
1381 | * For synchronous IO, the IO does not inherit the submitters reference | ||
1382 | * count, nor the buffer lock. Hence we cannot release the reference we | ||
1383 | * are about to take until we've waited for all IO completion to occur, | ||
1384 | * including any xfs_buf_ioend_async() work that may be pending. | ||
1385 | */ | ||
1386 | xfs_buf_hold(bp); | ||
1387 | |||
1388 | /* | ||
1389 | * Set the count to 1 initially, this will stop an I/O completion | ||
1390 | * callout which happens before we have started all the I/O from calling | ||
1391 | * xfs_buf_ioend too early. | ||
1392 | */ | ||
1393 | atomic_set(&bp->b_io_remaining, 1); | ||
1394 | _xfs_buf_ioapply(bp); | ||
1395 | |||
1396 | /* | ||
1397 | * make sure we run completion synchronously if it raced with us and is | ||
1398 | * already complete. | ||
1399 | */ | ||
1400 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) | ||
1401 | xfs_buf_ioend(bp); | ||
1420 | 1402 | ||
1403 | /* wait for completion before gathering the error from the buffer */ | ||
1404 | trace_xfs_buf_iowait(bp, _RET_IP_); | ||
1405 | wait_for_completion(&bp->b_iowait); | ||
1421 | trace_xfs_buf_iowait_done(bp, _RET_IP_); | 1406 | trace_xfs_buf_iowait_done(bp, _RET_IP_); |
1422 | return bp->b_error; | 1407 | error = bp->b_error; |
1408 | |||
1409 | /* | ||
1410 | * all done now, we can release the hold that keeps the buffer | ||
1411 | * referenced for the entire IO. | ||
1412 | */ | ||
1413 | xfs_buf_rele(bp); | ||
1414 | return error; | ||
1423 | } | 1415 | } |
1424 | 1416 | ||
1425 | xfs_caddr_t | 1417 | xfs_caddr_t |
@@ -1813,13 +1805,19 @@ __xfs_buf_delwri_submit( | |||
1813 | blk_start_plug(&plug); | 1805 | blk_start_plug(&plug); |
1814 | list_for_each_entry_safe(bp, n, io_list, b_list) { | 1806 | list_for_each_entry_safe(bp, n, io_list, b_list) { |
1815 | bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL); | 1807 | bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL); |
1816 | bp->b_flags |= XBF_WRITE; | 1808 | bp->b_flags |= XBF_WRITE | XBF_ASYNC; |
1817 | 1809 | ||
1818 | if (!wait) { | 1810 | /* |
1819 | bp->b_flags |= XBF_ASYNC; | 1811 | * we do all Io submission async. This means if we need to wait |
1812 | * for IO completion we need to take an extra reference so the | ||
1813 | * buffer is still valid on the other side. | ||
1814 | */ | ||
1815 | if (wait) | ||
1816 | xfs_buf_hold(bp); | ||
1817 | else | ||
1820 | list_del_init(&bp->b_list); | 1818 | list_del_init(&bp->b_list); |
1821 | } | 1819 | |
1822 | xfs_bdstrat_cb(bp); | 1820 | xfs_buf_submit(bp); |
1823 | } | 1821 | } |
1824 | blk_finish_plug(&plug); | 1822 | blk_finish_plug(&plug); |
1825 | 1823 | ||
@@ -1866,7 +1864,10 @@ xfs_buf_delwri_submit( | |||
1866 | bp = list_first_entry(&io_list, struct xfs_buf, b_list); | 1864 | bp = list_first_entry(&io_list, struct xfs_buf, b_list); |
1867 | 1865 | ||
1868 | list_del_init(&bp->b_list); | 1866 | list_del_init(&bp->b_list); |
1869 | error2 = xfs_buf_iowait(bp); | 1867 | |
1868 | /* locking the buffer will wait for async IO completion. */ | ||
1869 | xfs_buf_lock(bp); | ||
1870 | error2 = bp->b_error; | ||
1870 | xfs_buf_relse(bp); | 1871 | xfs_buf_relse(bp); |
1871 | if (!error) | 1872 | if (!error) |
1872 | error = error2; | 1873 | error = error2; |
@@ -1884,7 +1885,7 @@ xfs_buf_init(void) | |||
1884 | goto out; | 1885 | goto out; |
1885 | 1886 | ||
1886 | xfslogd_workqueue = alloc_workqueue("xfslogd", | 1887 | xfslogd_workqueue = alloc_workqueue("xfslogd", |
1887 | WQ_MEM_RECLAIM | WQ_HIGHPRI, 1); | 1888 | WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 1); |
1888 | if (!xfslogd_workqueue) | 1889 | if (!xfslogd_workqueue) |
1889 | goto out_free_buf_zone; | 1890 | goto out_free_buf_zone; |
1890 | 1891 | ||
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index c753183900b3..82002c00af90 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -158,6 +158,7 @@ typedef struct xfs_buf { | |||
158 | struct list_head b_lru; /* lru list */ | 158 | struct list_head b_lru; /* lru list */ |
159 | spinlock_t b_lock; /* internal state lock */ | 159 | spinlock_t b_lock; /* internal state lock */ |
160 | unsigned int b_state; /* internal state flags */ | 160 | unsigned int b_state; /* internal state flags */ |
161 | int b_io_error; /* internal IO error state */ | ||
161 | wait_queue_head_t b_waiters; /* unpin waiters */ | 162 | wait_queue_head_t b_waiters; /* unpin waiters */ |
162 | struct list_head b_list; | 163 | struct list_head b_list; |
163 | struct xfs_perag *b_pag; /* contains rbtree root */ | 164 | struct xfs_perag *b_pag; /* contains rbtree root */ |
@@ -268,9 +269,9 @@ int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); | |||
268 | 269 | ||
269 | struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, | 270 | struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, |
270 | int flags); | 271 | int flags); |
271 | struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target, | 272 | int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, |
272 | xfs_daddr_t daddr, size_t numblks, int flags, | 273 | size_t numblks, int flags, struct xfs_buf **bpp, |
273 | const struct xfs_buf_ops *ops); | 274 | const struct xfs_buf_ops *ops); |
274 | void xfs_buf_hold(struct xfs_buf *bp); | 275 | void xfs_buf_hold(struct xfs_buf *bp); |
275 | 276 | ||
276 | /* Releasing Buffers */ | 277 | /* Releasing Buffers */ |
@@ -286,18 +287,16 @@ extern void xfs_buf_unlock(xfs_buf_t *); | |||
286 | 287 | ||
287 | /* Buffer Read and Write Routines */ | 288 | /* Buffer Read and Write Routines */ |
288 | extern int xfs_bwrite(struct xfs_buf *bp); | 289 | extern int xfs_bwrite(struct xfs_buf *bp); |
289 | extern void xfs_buf_ioend(xfs_buf_t *, int); | 290 | extern void xfs_buf_ioend(struct xfs_buf *bp); |
290 | extern void xfs_buf_ioerror(xfs_buf_t *, int); | 291 | extern void xfs_buf_ioerror(xfs_buf_t *, int); |
291 | extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); | 292 | extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); |
292 | extern void xfs_buf_iorequest(xfs_buf_t *); | 293 | extern void xfs_buf_submit(struct xfs_buf *bp); |
293 | extern int xfs_buf_iowait(xfs_buf_t *); | 294 | extern int xfs_buf_submit_wait(struct xfs_buf *bp); |
294 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, | 295 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, |
295 | xfs_buf_rw_t); | 296 | xfs_buf_rw_t); |
296 | #define xfs_buf_zero(bp, off, len) \ | 297 | #define xfs_buf_zero(bp, off, len) \ |
297 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) | 298 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) |
298 | 299 | ||
299 | extern int xfs_bioerror_relse(struct xfs_buf *); | ||
300 | |||
301 | /* Buffer Utility Routines */ | 300 | /* Buffer Utility Routines */ |
302 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); | 301 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); |
303 | 302 | ||
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 76007deed31f..f15969543326 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -491,7 +491,7 @@ xfs_buf_item_unpin( | |||
491 | xfs_buf_ioerror(bp, -EIO); | 491 | xfs_buf_ioerror(bp, -EIO); |
492 | XFS_BUF_UNDONE(bp); | 492 | XFS_BUF_UNDONE(bp); |
493 | xfs_buf_stale(bp); | 493 | xfs_buf_stale(bp); |
494 | xfs_buf_ioend(bp, 0); | 494 | xfs_buf_ioend(bp); |
495 | } | 495 | } |
496 | } | 496 | } |
497 | 497 | ||
@@ -501,7 +501,7 @@ xfs_buf_item_unpin( | |||
501 | * buffer being bad.. | 501 | * buffer being bad.. |
502 | */ | 502 | */ |
503 | 503 | ||
504 | DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); | 504 | static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); |
505 | 505 | ||
506 | STATIC uint | 506 | STATIC uint |
507 | xfs_buf_item_push( | 507 | xfs_buf_item_push( |
@@ -1081,7 +1081,7 @@ xfs_buf_iodone_callbacks( | |||
1081 | * a way to shut the filesystem down if the writes keep failing. | 1081 | * a way to shut the filesystem down if the writes keep failing. |
1082 | * | 1082 | * |
1083 | * In practice we'll shut the filesystem down soon as non-transient | 1083 | * In practice we'll shut the filesystem down soon as non-transient |
1084 | * erorrs tend to affect the whole device and a failing log write | 1084 | * errors tend to affect the whole device and a failing log write |
1085 | * will make us give up. But we really ought to do better here. | 1085 | * will make us give up. But we really ought to do better here. |
1086 | */ | 1086 | */ |
1087 | if (XFS_BUF_ISASYNC(bp)) { | 1087 | if (XFS_BUF_ISASYNC(bp)) { |
@@ -1094,7 +1094,7 @@ xfs_buf_iodone_callbacks( | |||
1094 | if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { | 1094 | if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { |
1095 | bp->b_flags |= XBF_WRITE | XBF_ASYNC | | 1095 | bp->b_flags |= XBF_WRITE | XBF_ASYNC | |
1096 | XBF_DONE | XBF_WRITE_FAIL; | 1096 | XBF_DONE | XBF_WRITE_FAIL; |
1097 | xfs_buf_iorequest(bp); | 1097 | xfs_buf_submit(bp); |
1098 | } else { | 1098 | } else { |
1099 | xfs_buf_relse(bp); | 1099 | xfs_buf_relse(bp); |
1100 | } | 1100 | } |
@@ -1115,7 +1115,7 @@ do_callbacks: | |||
1115 | xfs_buf_do_callbacks(bp); | 1115 | xfs_buf_do_callbacks(bp); |
1116 | bp->b_fspriv = NULL; | 1116 | bp->b_fspriv = NULL; |
1117 | bp->b_iodone = NULL; | 1117 | bp->b_iodone = NULL; |
1118 | xfs_buf_ioend(bp, 0); | 1118 | xfs_buf_ioend(bp); |
1119 | } | 1119 | } |
1120 | 1120 | ||
1121 | /* | 1121 | /* |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index de5368c803f9..eb596b419942 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -983,7 +983,7 @@ xfs_vm_page_mkwrite( | |||
983 | 983 | ||
984 | /* | 984 | /* |
985 | * This type is designed to indicate the type of offset we would like | 985 | * This type is designed to indicate the type of offset we would like |
986 | * to search from page cache for either xfs_seek_data() or xfs_seek_hole(). | 986 | * to search from page cache for xfs_seek_hole_data(). |
987 | */ | 987 | */ |
988 | enum { | 988 | enum { |
989 | HOLE_OFF = 0, | 989 | HOLE_OFF = 0, |
@@ -1040,7 +1040,7 @@ xfs_lookup_buffer_offset( | |||
1040 | /* | 1040 | /* |
1041 | * This routine is called to find out and return a data or hole offset | 1041 | * This routine is called to find out and return a data or hole offset |
1042 | * from the page cache for unwritten extents according to the desired | 1042 | * from the page cache for unwritten extents according to the desired |
1043 | * type for xfs_seek_data() or xfs_seek_hole(). | 1043 | * type for xfs_seek_hole_data(). |
1044 | * | 1044 | * |
1045 | * The argument offset is used to tell where we start to search from the | 1045 | * The argument offset is used to tell where we start to search from the |
1046 | * page cache. Map is used to figure out the end points of the range to | 1046 | * page cache. Map is used to figure out the end points of the range to |
@@ -1200,9 +1200,10 @@ out: | |||
1200 | } | 1200 | } |
1201 | 1201 | ||
1202 | STATIC loff_t | 1202 | STATIC loff_t |
1203 | xfs_seek_data( | 1203 | xfs_seek_hole_data( |
1204 | struct file *file, | 1204 | struct file *file, |
1205 | loff_t start) | 1205 | loff_t start, |
1206 | int whence) | ||
1206 | { | 1207 | { |
1207 | struct inode *inode = file->f_mapping->host; | 1208 | struct inode *inode = file->f_mapping->host; |
1208 | struct xfs_inode *ip = XFS_I(inode); | 1209 | struct xfs_inode *ip = XFS_I(inode); |
@@ -1214,6 +1215,9 @@ xfs_seek_data( | |||
1214 | uint lock; | 1215 | uint lock; |
1215 | int error; | 1216 | int error; |
1216 | 1217 | ||
1218 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1219 | return -EIO; | ||
1220 | |||
1217 | lock = xfs_ilock_data_map_shared(ip); | 1221 | lock = xfs_ilock_data_map_shared(ip); |
1218 | 1222 | ||
1219 | isize = i_size_read(inode); | 1223 | isize = i_size_read(inode); |
@@ -1228,6 +1232,7 @@ xfs_seek_data( | |||
1228 | */ | 1232 | */ |
1229 | fsbno = XFS_B_TO_FSBT(mp, start); | 1233 | fsbno = XFS_B_TO_FSBT(mp, start); |
1230 | end = XFS_B_TO_FSB(mp, isize); | 1234 | end = XFS_B_TO_FSB(mp, isize); |
1235 | |||
1231 | for (;;) { | 1236 | for (;;) { |
1232 | struct xfs_bmbt_irec map[2]; | 1237 | struct xfs_bmbt_irec map[2]; |
1233 | int nmap = 2; | 1238 | int nmap = 2; |
@@ -1248,29 +1253,48 @@ xfs_seek_data( | |||
1248 | offset = max_t(loff_t, start, | 1253 | offset = max_t(loff_t, start, |
1249 | XFS_FSB_TO_B(mp, map[i].br_startoff)); | 1254 | XFS_FSB_TO_B(mp, map[i].br_startoff)); |
1250 | 1255 | ||
1251 | /* Landed in a data extent */ | 1256 | /* Landed in the hole we wanted? */ |
1252 | if (map[i].br_startblock == DELAYSTARTBLOCK || | 1257 | if (whence == SEEK_HOLE && |
1253 | (map[i].br_state == XFS_EXT_NORM && | 1258 | map[i].br_startblock == HOLESTARTBLOCK) |
1254 | !isnullstartblock(map[i].br_startblock))) | 1259 | goto out; |
1260 | |||
1261 | /* Landed in the data extent we wanted? */ | ||
1262 | if (whence == SEEK_DATA && | ||
1263 | (map[i].br_startblock == DELAYSTARTBLOCK || | ||
1264 | (map[i].br_state == XFS_EXT_NORM && | ||
1265 | !isnullstartblock(map[i].br_startblock)))) | ||
1255 | goto out; | 1266 | goto out; |
1256 | 1267 | ||
1257 | /* | 1268 | /* |
1258 | * Landed in an unwritten extent, try to search data | 1269 | * Landed in an unwritten extent, try to search |
1259 | * from page cache. | 1270 | * for hole or data from page cache. |
1260 | */ | 1271 | */ |
1261 | if (map[i].br_state == XFS_EXT_UNWRITTEN) { | 1272 | if (map[i].br_state == XFS_EXT_UNWRITTEN) { |
1262 | if (xfs_find_get_desired_pgoff(inode, &map[i], | 1273 | if (xfs_find_get_desired_pgoff(inode, &map[i], |
1263 | DATA_OFF, &offset)) | 1274 | whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF, |
1275 | &offset)) | ||
1264 | goto out; | 1276 | goto out; |
1265 | } | 1277 | } |
1266 | } | 1278 | } |
1267 | 1279 | ||
1268 | /* | 1280 | /* |
1269 | * map[0] is hole or its an unwritten extent but | 1281 | * We only received one extent out of the two requested. This |
1270 | * without data in page cache. Probably means that | 1282 | * means we've hit EOF and didn't find what we are looking for. |
1271 | * we are reading after EOF if nothing in map[1]. | ||
1272 | */ | 1283 | */ |
1273 | if (nmap == 1) { | 1284 | if (nmap == 1) { |
1285 | /* | ||
1286 | * If we were looking for a hole, set offset to | ||
1287 | * the end of the file (i.e., there is an implicit | ||
1288 | * hole at the end of any file). | ||
1289 | */ | ||
1290 | if (whence == SEEK_HOLE) { | ||
1291 | offset = isize; | ||
1292 | break; | ||
1293 | } | ||
1294 | /* | ||
1295 | * If we were looking for data, it's nowhere to be found | ||
1296 | */ | ||
1297 | ASSERT(whence == SEEK_DATA); | ||
1274 | error = -ENXIO; | 1298 | error = -ENXIO; |
1275 | goto out_unlock; | 1299 | goto out_unlock; |
1276 | } | 1300 | } |
@@ -1279,125 +1303,30 @@ xfs_seek_data( | |||
1279 | 1303 | ||
1280 | /* | 1304 | /* |
1281 | * Nothing was found, proceed to the next round of search | 1305 | * Nothing was found, proceed to the next round of search |
1282 | * if reading offset not beyond or hit EOF. | 1306 | * if the next reading offset is not at or beyond EOF. |
1283 | */ | 1307 | */ |
1284 | fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; | 1308 | fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; |
1285 | start = XFS_FSB_TO_B(mp, fsbno); | 1309 | start = XFS_FSB_TO_B(mp, fsbno); |
1286 | if (start >= isize) { | 1310 | if (start >= isize) { |
1311 | if (whence == SEEK_HOLE) { | ||
1312 | offset = isize; | ||
1313 | break; | ||
1314 | } | ||
1315 | ASSERT(whence == SEEK_DATA); | ||
1287 | error = -ENXIO; | 1316 | error = -ENXIO; |
1288 | goto out_unlock; | 1317 | goto out_unlock; |
1289 | } | 1318 | } |
1290 | } | 1319 | } |
1291 | 1320 | ||
1292 | out: | 1321 | out: |
1293 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); | ||
1294 | |||
1295 | out_unlock: | ||
1296 | xfs_iunlock(ip, lock); | ||
1297 | |||
1298 | if (error) | ||
1299 | return error; | ||
1300 | return offset; | ||
1301 | } | ||
1302 | |||
1303 | STATIC loff_t | ||
1304 | xfs_seek_hole( | ||
1305 | struct file *file, | ||
1306 | loff_t start) | ||
1307 | { | ||
1308 | struct inode *inode = file->f_mapping->host; | ||
1309 | struct xfs_inode *ip = XFS_I(inode); | ||
1310 | struct xfs_mount *mp = ip->i_mount; | ||
1311 | loff_t uninitialized_var(offset); | ||
1312 | xfs_fsize_t isize; | ||
1313 | xfs_fileoff_t fsbno; | ||
1314 | xfs_filblks_t end; | ||
1315 | uint lock; | ||
1316 | int error; | ||
1317 | |||
1318 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1319 | return -EIO; | ||
1320 | |||
1321 | lock = xfs_ilock_data_map_shared(ip); | ||
1322 | |||
1323 | isize = i_size_read(inode); | ||
1324 | if (start >= isize) { | ||
1325 | error = -ENXIO; | ||
1326 | goto out_unlock; | ||
1327 | } | ||
1328 | |||
1329 | fsbno = XFS_B_TO_FSBT(mp, start); | ||
1330 | end = XFS_B_TO_FSB(mp, isize); | ||
1331 | |||
1332 | for (;;) { | ||
1333 | struct xfs_bmbt_irec map[2]; | ||
1334 | int nmap = 2; | ||
1335 | unsigned int i; | ||
1336 | |||
1337 | error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, | ||
1338 | XFS_BMAPI_ENTIRE); | ||
1339 | if (error) | ||
1340 | goto out_unlock; | ||
1341 | |||
1342 | /* No extents at given offset, must be beyond EOF */ | ||
1343 | if (nmap == 0) { | ||
1344 | error = -ENXIO; | ||
1345 | goto out_unlock; | ||
1346 | } | ||
1347 | |||
1348 | for (i = 0; i < nmap; i++) { | ||
1349 | offset = max_t(loff_t, start, | ||
1350 | XFS_FSB_TO_B(mp, map[i].br_startoff)); | ||
1351 | |||
1352 | /* Landed in a hole */ | ||
1353 | if (map[i].br_startblock == HOLESTARTBLOCK) | ||
1354 | goto out; | ||
1355 | |||
1356 | /* | ||
1357 | * Landed in an unwritten extent, try to search hole | ||
1358 | * from page cache. | ||
1359 | */ | ||
1360 | if (map[i].br_state == XFS_EXT_UNWRITTEN) { | ||
1361 | if (xfs_find_get_desired_pgoff(inode, &map[i], | ||
1362 | HOLE_OFF, &offset)) | ||
1363 | goto out; | ||
1364 | } | ||
1365 | } | ||
1366 | |||
1367 | /* | ||
1368 | * map[0] contains data or its unwritten but contains | ||
1369 | * data in page cache, probably means that we are | ||
1370 | * reading after EOF. We should fix offset to point | ||
1371 | * to the end of the file(i.e., there is an implicit | ||
1372 | * hole at the end of any file). | ||
1373 | */ | ||
1374 | if (nmap == 1) { | ||
1375 | offset = isize; | ||
1376 | break; | ||
1377 | } | ||
1378 | |||
1379 | ASSERT(i > 1); | ||
1380 | |||
1381 | /* | ||
1382 | * Both mappings contains data, proceed to the next round of | ||
1383 | * search if the current reading offset not beyond or hit EOF. | ||
1384 | */ | ||
1385 | fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; | ||
1386 | start = XFS_FSB_TO_B(mp, fsbno); | ||
1387 | if (start >= isize) { | ||
1388 | offset = isize; | ||
1389 | break; | ||
1390 | } | ||
1391 | } | ||
1392 | |||
1393 | out: | ||
1394 | /* | 1322 | /* |
1395 | * At this point, we must have found a hole. However, the returned | 1323 | * If at this point we have found the hole we wanted, the returned |
1396 | * offset may be bigger than the file size as it may be aligned to | 1324 | * offset may be bigger than the file size as it may be aligned to |
1397 | * page boundary for unwritten extents, we need to deal with this | 1325 | * page boundary for unwritten extents. We need to deal with this |
1398 | * situation in particular. | 1326 | * situation in particular. |
1399 | */ | 1327 | */ |
1400 | offset = min_t(loff_t, offset, isize); | 1328 | if (whence == SEEK_HOLE) |
1329 | offset = min_t(loff_t, offset, isize); | ||
1401 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); | 1330 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
1402 | 1331 | ||
1403 | out_unlock: | 1332 | out_unlock: |
@@ -1412,17 +1341,16 @@ STATIC loff_t | |||
1412 | xfs_file_llseek( | 1341 | xfs_file_llseek( |
1413 | struct file *file, | 1342 | struct file *file, |
1414 | loff_t offset, | 1343 | loff_t offset, |
1415 | int origin) | 1344 | int whence) |
1416 | { | 1345 | { |
1417 | switch (origin) { | 1346 | switch (whence) { |
1418 | case SEEK_END: | 1347 | case SEEK_END: |
1419 | case SEEK_CUR: | 1348 | case SEEK_CUR: |
1420 | case SEEK_SET: | 1349 | case SEEK_SET: |
1421 | return generic_file_llseek(file, offset, origin); | 1350 | return generic_file_llseek(file, offset, whence); |
1422 | case SEEK_DATA: | ||
1423 | return xfs_seek_data(file, offset); | ||
1424 | case SEEK_HOLE: | 1351 | case SEEK_HOLE: |
1425 | return xfs_seek_hole(file, offset); | 1352 | case SEEK_DATA: |
1353 | return xfs_seek_hole_data(file, offset, whence); | ||
1426 | default: | 1354 | default: |
1427 | return -EINVAL; | 1355 | return -EINVAL; |
1428 | } | 1356 | } |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index f91de1ef05e1..c05ac8b70fa9 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -172,16 +172,11 @@ xfs_growfs_data_private( | |||
172 | if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) | 172 | if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) |
173 | return error; | 173 | return error; |
174 | dpct = pct - mp->m_sb.sb_imax_pct; | 174 | dpct = pct - mp->m_sb.sb_imax_pct; |
175 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, | 175 | error = xfs_buf_read_uncached(mp->m_ddev_targp, |
176 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), | 176 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), |
177 | XFS_FSS_TO_BB(mp, 1), 0, NULL); | 177 | XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); |
178 | if (!bp) | 178 | if (error) |
179 | return -EIO; | ||
180 | if (bp->b_error) { | ||
181 | error = bp->b_error; | ||
182 | xfs_buf_relse(bp); | ||
183 | return error; | 179 | return error; |
184 | } | ||
185 | xfs_buf_relse(bp); | 180 | xfs_buf_relse(bp); |
186 | 181 | ||
187 | new = nb; /* use new as a temporary here */ | 182 | new = nb; /* use new as a temporary here */ |
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index 5399ef222dd7..4d41b241298f 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c | |||
@@ -43,3 +43,7 @@ xfs_param_t xfs_params = { | |||
43 | .fstrm_timer = { 1, 30*100, 3600*100}, | 43 | .fstrm_timer = { 1, 30*100, 3600*100}, |
44 | .eofb_timer = { 1, 300, 3600*24}, | 44 | .eofb_timer = { 1, 300, 3600*24}, |
45 | }; | 45 | }; |
46 | |||
47 | struct xfs_globals xfs_globals = { | ||
48 | .log_recovery_delay = 0, /* no delay by default */ | ||
49 | }; | ||
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 981b2cf51985..b45f7b27b5df 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include "xfs_trace.h" | 33 | #include "xfs_trace.h" |
34 | #include "xfs_icache.h" | 34 | #include "xfs_icache.h" |
35 | #include "xfs_bmap_util.h" | 35 | #include "xfs_bmap_util.h" |
36 | #include "xfs_quota.h" | ||
37 | #include "xfs_dquot_item.h" | 36 | #include "xfs_dquot_item.h" |
38 | #include "xfs_dquot.h" | 37 | #include "xfs_dquot.h" |
39 | 38 | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index fea3c92fb3f0..8ed049d1e332 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -654,7 +654,7 @@ xfs_ialloc( | |||
654 | xfs_inode_t *ip; | 654 | xfs_inode_t *ip; |
655 | uint flags; | 655 | uint flags; |
656 | int error; | 656 | int error; |
657 | timespec_t tv; | 657 | struct timespec tv; |
658 | 658 | ||
659 | /* | 659 | /* |
660 | * Call the space management code to pick | 660 | * Call the space management code to pick |
@@ -720,7 +720,7 @@ xfs_ialloc( | |||
720 | ip->i_d.di_nextents = 0; | 720 | ip->i_d.di_nextents = 0; |
721 | ASSERT(ip->i_d.di_nblocks == 0); | 721 | ASSERT(ip->i_d.di_nblocks == 0); |
722 | 722 | ||
723 | nanotime(&tv); | 723 | tv = current_fs_time(mp->m_super); |
724 | ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; | 724 | ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; |
725 | ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; | 725 | ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; |
726 | ip->i_d.di_atime = ip->i_d.di_mtime; | 726 | ip->i_d.di_atime = ip->i_d.di_mtime; |
@@ -769,6 +769,8 @@ xfs_ialloc( | |||
769 | di_flags |= XFS_DIFLAG_EXTSZINHERIT; | 769 | di_flags |= XFS_DIFLAG_EXTSZINHERIT; |
770 | ip->i_d.di_extsize = pip->i_d.di_extsize; | 770 | ip->i_d.di_extsize = pip->i_d.di_extsize; |
771 | } | 771 | } |
772 | if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | ||
773 | di_flags |= XFS_DIFLAG_PROJINHERIT; | ||
772 | } else if (S_ISREG(mode)) { | 774 | } else if (S_ISREG(mode)) { |
773 | if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) | 775 | if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) |
774 | di_flags |= XFS_DIFLAG_REALTIME; | 776 | di_flags |= XFS_DIFLAG_REALTIME; |
@@ -789,8 +791,6 @@ xfs_ialloc( | |||
789 | if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && | 791 | if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && |
790 | xfs_inherit_nosymlinks) | 792 | xfs_inherit_nosymlinks) |
791 | di_flags |= XFS_DIFLAG_NOSYMLINKS; | 793 | di_flags |= XFS_DIFLAG_NOSYMLINKS; |
792 | if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | ||
793 | di_flags |= XFS_DIFLAG_PROJINHERIT; | ||
794 | if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && | 794 | if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && |
795 | xfs_inherit_nodefrag) | 795 | xfs_inherit_nodefrag) |
796 | di_flags |= XFS_DIFLAG_NODEFRAG; | 796 | di_flags |= XFS_DIFLAG_NODEFRAG; |
@@ -1153,9 +1153,11 @@ xfs_create( | |||
1153 | if (error) | 1153 | if (error) |
1154 | goto out_trans_cancel; | 1154 | goto out_trans_cancel; |
1155 | 1155 | ||
1156 | error = xfs_dir_canenter(tp, dp, name, resblks); | 1156 | if (!resblks) { |
1157 | if (error) | 1157 | error = xfs_dir_canenter(tp, dp, name); |
1158 | goto out_trans_cancel; | 1158 | if (error) |
1159 | goto out_trans_cancel; | ||
1160 | } | ||
1159 | 1161 | ||
1160 | /* | 1162 | /* |
1161 | * A newly created regular or special file just has one directory | 1163 | * A newly created regular or special file just has one directory |
@@ -1421,9 +1423,11 @@ xfs_link( | |||
1421 | goto error_return; | 1423 | goto error_return; |
1422 | } | 1424 | } |
1423 | 1425 | ||
1424 | error = xfs_dir_canenter(tp, tdp, target_name, resblks); | 1426 | if (!resblks) { |
1425 | if (error) | 1427 | error = xfs_dir_canenter(tp, tdp, target_name); |
1426 | goto error_return; | 1428 | if (error) |
1429 | goto error_return; | ||
1430 | } | ||
1427 | 1431 | ||
1428 | xfs_bmap_init(&free_list, &first_block); | 1432 | xfs_bmap_init(&free_list, &first_block); |
1429 | 1433 | ||
@@ -2759,9 +2763,11 @@ xfs_rename( | |||
2759 | * If there's no space reservation, check the entry will | 2763 | * If there's no space reservation, check the entry will |
2760 | * fit before actually inserting it. | 2764 | * fit before actually inserting it. |
2761 | */ | 2765 | */ |
2762 | error = xfs_dir_canenter(tp, target_dp, target_name, spaceres); | 2766 | if (!spaceres) { |
2763 | if (error) | 2767 | error = xfs_dir_canenter(tp, target_dp, target_name); |
2764 | goto error_return; | 2768 | if (error) |
2769 | goto error_return; | ||
2770 | } | ||
2765 | /* | 2771 | /* |
2766 | * If target does not exist and the rename crosses | 2772 | * If target does not exist and the rename crosses |
2767 | * directories, adjust the target directory link count | 2773 | * directories, adjust the target directory link count |
@@ -3056,7 +3062,7 @@ cluster_corrupt_out: | |||
3056 | XFS_BUF_UNDONE(bp); | 3062 | XFS_BUF_UNDONE(bp); |
3057 | xfs_buf_stale(bp); | 3063 | xfs_buf_stale(bp); |
3058 | xfs_buf_ioerror(bp, -EIO); | 3064 | xfs_buf_ioerror(bp, -EIO); |
3059 | xfs_buf_ioend(bp, 0); | 3065 | xfs_buf_ioend(bp); |
3060 | } else { | 3066 | } else { |
3061 | xfs_buf_stale(bp); | 3067 | xfs_buf_stale(bp); |
3062 | xfs_buf_relse(bp); | 3068 | xfs_buf_relse(bp); |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index c10e3fadd9af..9af2882e1f4c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -102,7 +102,7 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size) | |||
102 | { | 102 | { |
103 | xfs_fsize_t i_size = i_size_read(VFS_I(ip)); | 103 | xfs_fsize_t i_size = i_size_read(VFS_I(ip)); |
104 | 104 | ||
105 | if (new_size > i_size) | 105 | if (new_size > i_size || new_size < 0) |
106 | new_size = i_size; | 106 | new_size = i_size; |
107 | return new_size > ip->i_d.di_size ? new_size : 0; | 107 | return new_size > ip->i_d.di_size ? new_size : 0; |
108 | } | 108 | } |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index de5a7be36e60..63de0b0acc32 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -615,7 +615,7 @@ xfs_iflush_done( | |||
615 | blip = bp->b_fspriv; | 615 | blip = bp->b_fspriv; |
616 | prev = NULL; | 616 | prev = NULL; |
617 | while (blip != NULL) { | 617 | while (blip != NULL) { |
618 | if (lip->li_cb != xfs_iflush_done) { | 618 | if (blip->li_cb != xfs_iflush_done) { |
619 | prev = blip; | 619 | prev = blip; |
620 | blip = blip->li_bio_list; | 620 | blip = blip->li_bio_list; |
621 | continue; | 621 | continue; |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 3799695b9249..24c926b6fe85 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -968,8 +968,6 @@ xfs_set_diflags( | |||
968 | di_flags |= XFS_DIFLAG_NOATIME; | 968 | di_flags |= XFS_DIFLAG_NOATIME; |
969 | if (xflags & XFS_XFLAG_NODUMP) | 969 | if (xflags & XFS_XFLAG_NODUMP) |
970 | di_flags |= XFS_DIFLAG_NODUMP; | 970 | di_flags |= XFS_DIFLAG_NODUMP; |
971 | if (xflags & XFS_XFLAG_PROJINHERIT) | ||
972 | di_flags |= XFS_DIFLAG_PROJINHERIT; | ||
973 | if (xflags & XFS_XFLAG_NODEFRAG) | 971 | if (xflags & XFS_XFLAG_NODEFRAG) |
974 | di_flags |= XFS_DIFLAG_NODEFRAG; | 972 | di_flags |= XFS_DIFLAG_NODEFRAG; |
975 | if (xflags & XFS_XFLAG_FILESTREAM) | 973 | if (xflags & XFS_XFLAG_FILESTREAM) |
@@ -981,6 +979,8 @@ xfs_set_diflags( | |||
981 | di_flags |= XFS_DIFLAG_NOSYMLINKS; | 979 | di_flags |= XFS_DIFLAG_NOSYMLINKS; |
982 | if (xflags & XFS_XFLAG_EXTSZINHERIT) | 980 | if (xflags & XFS_XFLAG_EXTSZINHERIT) |
983 | di_flags |= XFS_DIFLAG_EXTSZINHERIT; | 981 | di_flags |= XFS_DIFLAG_EXTSZINHERIT; |
982 | if (xflags & XFS_XFLAG_PROJINHERIT) | ||
983 | di_flags |= XFS_DIFLAG_PROJINHERIT; | ||
984 | } else if (S_ISREG(ip->i_d.di_mode)) { | 984 | } else if (S_ISREG(ip->i_d.di_mode)) { |
985 | if (xflags & XFS_XFLAG_REALTIME) | 985 | if (xflags & XFS_XFLAG_REALTIME) |
986 | di_flags |= XFS_DIFLAG_REALTIME; | 986 | di_flags |= XFS_DIFLAG_REALTIME; |
@@ -1231,13 +1231,25 @@ xfs_ioctl_setattr( | |||
1231 | 1231 | ||
1232 | } | 1232 | } |
1233 | 1233 | ||
1234 | if (mask & FSX_EXTSIZE) | ||
1235 | ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; | ||
1236 | if (mask & FSX_XFLAGS) { | 1234 | if (mask & FSX_XFLAGS) { |
1237 | xfs_set_diflags(ip, fa->fsx_xflags); | 1235 | xfs_set_diflags(ip, fa->fsx_xflags); |
1238 | xfs_diflags_to_linux(ip); | 1236 | xfs_diflags_to_linux(ip); |
1239 | } | 1237 | } |
1240 | 1238 | ||
1239 | /* | ||
1240 | * Only set the extent size hint if we've already determined that the | ||
1241 | * extent size hint should be set on the inode. If no extent size flags | ||
1242 | * are set on the inode then unconditionally clear the extent size hint. | ||
1243 | */ | ||
1244 | if (mask & FSX_EXTSIZE) { | ||
1245 | int extsize = 0; | ||
1246 | |||
1247 | if (ip->i_d.di_flags & | ||
1248 | (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT)) | ||
1249 | extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; | ||
1250 | ip->i_d.di_extsize = extsize; | ||
1251 | } | ||
1252 | |||
1241 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); | 1253 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); |
1242 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 1254 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
1243 | 1255 | ||
@@ -1349,7 +1361,7 @@ xfs_ioc_setxflags( | |||
1349 | STATIC int | 1361 | STATIC int |
1350 | xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) | 1362 | xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) |
1351 | { | 1363 | { |
1352 | struct getbmap __user *base = *ap; | 1364 | struct getbmap __user *base = (struct getbmap __user *)*ap; |
1353 | 1365 | ||
1354 | /* copy only getbmap portion (not getbmapx) */ | 1366 | /* copy only getbmap portion (not getbmapx) */ |
1355 | if (copy_to_user(base, bmv, sizeof(struct getbmap))) | 1367 | if (copy_to_user(base, bmv, sizeof(struct getbmap))) |
@@ -1380,7 +1392,7 @@ xfs_ioc_getbmap( | |||
1380 | bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; | 1392 | bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; |
1381 | 1393 | ||
1382 | error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, | 1394 | error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, |
1383 | (struct getbmap *)arg+1); | 1395 | (__force struct getbmap *)arg+1); |
1384 | if (error) | 1396 | if (error) |
1385 | return error; | 1397 | return error; |
1386 | 1398 | ||
@@ -1393,7 +1405,7 @@ xfs_ioc_getbmap( | |||
1393 | STATIC int | 1405 | STATIC int |
1394 | xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) | 1406 | xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) |
1395 | { | 1407 | { |
1396 | struct getbmapx __user *base = *ap; | 1408 | struct getbmapx __user *base = (struct getbmapx __user *)*ap; |
1397 | 1409 | ||
1398 | if (copy_to_user(base, bmv, sizeof(struct getbmapx))) | 1410 | if (copy_to_user(base, bmv, sizeof(struct getbmapx))) |
1399 | return -EFAULT; | 1411 | return -EFAULT; |
@@ -1420,7 +1432,7 @@ xfs_ioc_getbmapx( | |||
1420 | return -EINVAL; | 1432 | return -EINVAL; |
1421 | 1433 | ||
1422 | error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, | 1434 | error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, |
1423 | (struct getbmapx *)arg+1); | 1435 | (__force struct getbmapx *)arg+1); |
1424 | if (error) | 1436 | if (error) |
1425 | return error; | 1437 | return error; |
1426 | 1438 | ||
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index a554646ff141..94ce027e28e3 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
@@ -160,6 +160,7 @@ xfs_ioctl32_bstat_copyin( | |||
160 | get_user(bstat->bs_gen, &bstat32->bs_gen) || | 160 | get_user(bstat->bs_gen, &bstat32->bs_gen) || |
161 | get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || | 161 | get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || |
162 | get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || | 162 | get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || |
163 | get_user(bstat->bs_forkoff, &bstat32->bs_forkoff) || | ||
163 | get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || | 164 | get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || |
164 | get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || | 165 | get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || |
165 | get_user(bstat->bs_aextents, &bstat32->bs_aextents)) | 166 | get_user(bstat->bs_aextents, &bstat32->bs_aextents)) |
@@ -214,6 +215,7 @@ xfs_bulkstat_one_fmt_compat( | |||
214 | put_user(buffer->bs_gen, &p32->bs_gen) || | 215 | put_user(buffer->bs_gen, &p32->bs_gen) || |
215 | put_user(buffer->bs_projid, &p32->bs_projid) || | 216 | put_user(buffer->bs_projid, &p32->bs_projid) || |
216 | put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || | 217 | put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || |
218 | put_user(buffer->bs_forkoff, &p32->bs_forkoff) || | ||
217 | put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || | 219 | put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || |
218 | put_user(buffer->bs_dmstate, &p32->bs_dmstate) || | 220 | put_user(buffer->bs_dmstate, &p32->bs_dmstate) || |
219 | put_user(buffer->bs_aextents, &p32->bs_aextents)) | 221 | put_user(buffer->bs_aextents, &p32->bs_aextents)) |
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index 80f4060e8970..b1bb45444df8 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h | |||
@@ -67,8 +67,9 @@ typedef struct compat_xfs_bstat { | |||
67 | __u32 bs_gen; /* generation count */ | 67 | __u32 bs_gen; /* generation count */ |
68 | __u16 bs_projid_lo; /* lower part of project id */ | 68 | __u16 bs_projid_lo; /* lower part of project id */ |
69 | #define bs_projid bs_projid_lo /* (previously just bs_projid) */ | 69 | #define bs_projid bs_projid_lo /* (previously just bs_projid) */ |
70 | __u16 bs_forkoff; /* inode fork offset in bytes */ | ||
70 | __u16 bs_projid_hi; /* high part of project id */ | 71 | __u16 bs_projid_hi; /* high part of project id */ |
71 | unsigned char bs_pad[12]; /* pad space, unused */ | 72 | unsigned char bs_pad[10]; /* pad space, unused */ |
72 | __u32 bs_dmevmask; /* DMIG event mask */ | 73 | __u32 bs_dmevmask; /* DMIG event mask */ |
73 | __u16 bs_dmstate; /* DMIG state info */ | 74 | __u16 bs_dmstate; /* DMIG state info */ |
74 | __u16 bs_aextents; /* attribute number of extents */ | 75 | __u16 bs_aextents; /* attribute number of extents */ |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index e9c47b6f5e5a..afcf3c926565 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -404,8 +404,8 @@ xfs_quota_calc_throttle( | |||
404 | int shift = 0; | 404 | int shift = 0; |
405 | struct xfs_dquot *dq = xfs_inode_dquot(ip, type); | 405 | struct xfs_dquot *dq = xfs_inode_dquot(ip, type); |
406 | 406 | ||
407 | /* over hi wmark, squash the prealloc completely */ | 407 | /* no dq, or over hi wmark, squash the prealloc completely */ |
408 | if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { | 408 | if (!dq || dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { |
409 | *qblocks = 0; | 409 | *qblocks = 0; |
410 | *qfreesp = 0; | 410 | *qfreesp = 0; |
411 | return; | 411 | return; |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 72129493e9d3..ec6dcdc181ee 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -849,6 +849,36 @@ xfs_setattr_size( | |||
849 | return error; | 849 | return error; |
850 | truncate_setsize(inode, newsize); | 850 | truncate_setsize(inode, newsize); |
851 | 851 | ||
852 | /* | ||
853 | * The "we can't serialise against page faults" pain gets worse. | ||
854 | * | ||
855 | * If the file is mapped then we have to clean the page at the old EOF | ||
856 | * when extending the file. Extending the file can expose changes the | ||
857 | * underlying page mapping (e.g. from beyond EOF to a hole or | ||
858 | * unwritten), and so on the next attempt to write to that page we need | ||
859 | * to remap it for write. i.e. we need .page_mkwrite() to be called. | ||
860 | * Hence we need to clean the page to clean the pte and so a new write | ||
861 | * fault will be triggered appropriately. | ||
862 | * | ||
863 | * If we do it before we change the inode size, then we can race with a | ||
864 | * page fault that maps the page with exactly the same problem. If we do | ||
865 | * it after we change the file size, then a new page fault can come in | ||
866 | * and allocate space before we've run the rest of the truncate | ||
867 | * transaction. That's kinda grotesque, but it's better than have data | ||
868 | * over a hole, and so that's the lesser evil that has been chosen here. | ||
869 | * | ||
870 | * The real solution, however, is to have some mechanism for locking out | ||
871 | * page faults while a truncate is in progress. | ||
872 | */ | ||
873 | if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) { | ||
874 | error = filemap_write_and_wait_range( | ||
875 | VFS_I(ip)->i_mapping, | ||
876 | round_down(oldsize, PAGE_CACHE_SIZE), | ||
877 | round_up(oldsize, PAGE_CACHE_SIZE) - 1); | ||
878 | if (error) | ||
879 | return error; | ||
880 | } | ||
881 | |||
852 | tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); | 882 | tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); |
853 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); | 883 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); |
854 | if (error) | 884 | if (error) |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f71be9c68017..f1deb961a296 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -639,7 +639,8 @@ next_ag: | |||
639 | xfs_buf_relse(agbp); | 639 | xfs_buf_relse(agbp); |
640 | agbp = NULL; | 640 | agbp = NULL; |
641 | agino = 0; | 641 | agino = 0; |
642 | } while (++agno < mp->m_sb.sb_agcount); | 642 | agno++; |
643 | } while (agno < mp->m_sb.sb_agcount); | ||
643 | 644 | ||
644 | if (!error) { | 645 | if (!error) { |
645 | if (bufidx) { | 646 | if (bufidx) { |
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index d10dc8f397c9..6a51619d8690 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h | |||
@@ -56,7 +56,6 @@ typedef __uint64_t __psunsigned_t; | |||
56 | 56 | ||
57 | #include "kmem.h" | 57 | #include "kmem.h" |
58 | #include "mrlock.h" | 58 | #include "mrlock.h" |
59 | #include "time.h" | ||
60 | #include "uuid.h" | 59 | #include "uuid.h" |
61 | 60 | ||
62 | #include <linux/semaphore.h> | 61 | #include <linux/semaphore.h> |
@@ -179,6 +178,11 @@ typedef __uint64_t __psunsigned_t; | |||
179 | #define MAX(a,b) (max(a,b)) | 178 | #define MAX(a,b) (max(a,b)) |
180 | #define howmany(x, y) (((x)+((y)-1))/(y)) | 179 | #define howmany(x, y) (((x)+((y)-1))/(y)) |
181 | 180 | ||
181 | static inline void delay(long ticks) | ||
182 | { | ||
183 | schedule_timeout_uninterruptible(ticks); | ||
184 | } | ||
185 | |||
182 | /* | 186 | /* |
183 | * XFS wrapper structure for sysfs support. It depends on external data | 187 | * XFS wrapper structure for sysfs support. It depends on external data |
184 | * structures and is embedded in various internal data structures to implement | 188 | * structures and is embedded in various internal data structures to implement |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ca4fd5bd8522..fe88ef67f93a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -1678,7 +1678,7 @@ xlog_bdstrat( | |||
1678 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 1678 | if (iclog->ic_state & XLOG_STATE_IOERROR) { |
1679 | xfs_buf_ioerror(bp, -EIO); | 1679 | xfs_buf_ioerror(bp, -EIO); |
1680 | xfs_buf_stale(bp); | 1680 | xfs_buf_stale(bp); |
1681 | xfs_buf_ioend(bp, 0); | 1681 | xfs_buf_ioend(bp); |
1682 | /* | 1682 | /* |
1683 | * It would seem logical to return EIO here, but we rely on | 1683 | * It would seem logical to return EIO here, but we rely on |
1684 | * the log state machine to propagate I/O errors instead of | 1684 | * the log state machine to propagate I/O errors instead of |
@@ -1688,7 +1688,7 @@ xlog_bdstrat( | |||
1688 | return 0; | 1688 | return 0; |
1689 | } | 1689 | } |
1690 | 1690 | ||
1691 | xfs_buf_iorequest(bp); | 1691 | xfs_buf_submit(bp); |
1692 | return 0; | 1692 | return 0; |
1693 | } | 1693 | } |
1694 | 1694 | ||
@@ -3867,18 +3867,17 @@ xlog_state_ioerror( | |||
3867 | * This is called from xfs_force_shutdown, when we're forcibly | 3867 | * This is called from xfs_force_shutdown, when we're forcibly |
3868 | * shutting down the filesystem, typically because of an IO error. | 3868 | * shutting down the filesystem, typically because of an IO error. |
3869 | * Our main objectives here are to make sure that: | 3869 | * Our main objectives here are to make sure that: |
3870 | * a. the filesystem gets marked 'SHUTDOWN' for all interested | 3870 | * a. if !logerror, flush the logs to disk. Anything modified |
3871 | * after this is ignored. | ||
3872 | * b. the filesystem gets marked 'SHUTDOWN' for all interested | ||
3871 | * parties to find out, 'atomically'. | 3873 | * parties to find out, 'atomically'. |
3872 | * b. those who're sleeping on log reservations, pinned objects and | 3874 | * c. those who're sleeping on log reservations, pinned objects and |
3873 | * other resources get woken up, and be told the bad news. | 3875 | * other resources get woken up, and be told the bad news. |
3874 | * c. nothing new gets queued up after (a) and (b) are done. | 3876 | * d. nothing new gets queued up after (b) and (c) are done. |
3875 | * d. if !logerror, flush the iclogs to disk, then seal them off | ||
3876 | * for business. | ||
3877 | * | 3877 | * |
3878 | * Note: for delayed logging the !logerror case needs to flush the regions | 3878 | * Note: for the !logerror case we need to flush the regions held in memory out |
3879 | * held in memory out to the iclogs before flushing them to disk. This needs | 3879 | * to disk first. This needs to be done before the log is marked as shutdown, |
3880 | * to be done before the log is marked as shutdown, otherwise the flush to the | 3880 | * otherwise the iclog writes will fail. |
3881 | * iclogs will fail. | ||
3882 | */ | 3881 | */ |
3883 | int | 3882 | int |
3884 | xfs_log_force_umount( | 3883 | xfs_log_force_umount( |
@@ -3910,16 +3909,16 @@ xfs_log_force_umount( | |||
3910 | ASSERT(XLOG_FORCED_SHUTDOWN(log)); | 3909 | ASSERT(XLOG_FORCED_SHUTDOWN(log)); |
3911 | return 1; | 3910 | return 1; |
3912 | } | 3911 | } |
3913 | retval = 0; | ||
3914 | 3912 | ||
3915 | /* | 3913 | /* |
3916 | * Flush the in memory commit item list before marking the log as | 3914 | * Flush all the completed transactions to disk before marking the log |
3917 | * being shut down. We need to do it in this order to ensure all the | 3915 | * being shut down. We need to do it in this order to ensure that |
3918 | * completed transactions are flushed to disk with the xfs_log_force() | 3916 | * completed operations are safely on disk before we shut down, and that |
3919 | * call below. | 3917 | * we don't have to issue any buffer IO after the shutdown flags are set |
3918 | * to guarantee this. | ||
3920 | */ | 3919 | */ |
3921 | if (!logerror) | 3920 | if (!logerror) |
3922 | xlog_cil_force(log); | 3921 | _xfs_log_force(mp, XFS_LOG_SYNC, NULL); |
3923 | 3922 | ||
3924 | /* | 3923 | /* |
3925 | * mark the filesystem and the as in a shutdown state and wake | 3924 | * mark the filesystem and the as in a shutdown state and wake |
@@ -3931,18 +3930,11 @@ xfs_log_force_umount( | |||
3931 | XFS_BUF_DONE(mp->m_sb_bp); | 3930 | XFS_BUF_DONE(mp->m_sb_bp); |
3932 | 3931 | ||
3933 | /* | 3932 | /* |
3934 | * This flag is sort of redundant because of the mount flag, but | 3933 | * Mark the log and the iclogs with IO error flags to prevent any |
3935 | * it's good to maintain the separation between the log and the rest | 3934 | * further log IO from being issued or completed. |
3936 | * of XFS. | ||
3937 | */ | 3935 | */ |
3938 | log->l_flags |= XLOG_IO_ERROR; | 3936 | log->l_flags |= XLOG_IO_ERROR; |
3939 | 3937 | retval = xlog_state_ioerror(log); | |
3940 | /* | ||
3941 | * If we hit a log error, we want to mark all the iclogs IOERROR | ||
3942 | * while we're still holding the loglock. | ||
3943 | */ | ||
3944 | if (logerror) | ||
3945 | retval = xlog_state_ioerror(log); | ||
3946 | spin_unlock(&log->l_icloglock); | 3938 | spin_unlock(&log->l_icloglock); |
3947 | 3939 | ||
3948 | /* | 3940 | /* |
@@ -3955,19 +3947,6 @@ xfs_log_force_umount( | |||
3955 | xlog_grant_head_wake_all(&log->l_reserve_head); | 3947 | xlog_grant_head_wake_all(&log->l_reserve_head); |
3956 | xlog_grant_head_wake_all(&log->l_write_head); | 3948 | xlog_grant_head_wake_all(&log->l_write_head); |
3957 | 3949 | ||
3958 | if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { | ||
3959 | ASSERT(!logerror); | ||
3960 | /* | ||
3961 | * Force the incore logs to disk before shutting the | ||
3962 | * log down completely. | ||
3963 | */ | ||
3964 | _xfs_log_force(mp, XFS_LOG_SYNC, NULL); | ||
3965 | |||
3966 | spin_lock(&log->l_icloglock); | ||
3967 | retval = xlog_state_ioerror(log); | ||
3968 | spin_unlock(&log->l_icloglock); | ||
3969 | } | ||
3970 | |||
3971 | /* | 3950 | /* |
3972 | * Wake up everybody waiting on xfs_log_force. Wake the CIL push first | 3951 | * Wake up everybody waiting on xfs_log_force. Wake the CIL push first |
3973 | * as if the log writes were completed. The abort handling in the log | 3952 | * as if the log writes were completed. The abort handling in the log |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index f6b79e5325dd..f506c457011e 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -463,12 +463,40 @@ xlog_cil_push( | |||
463 | spin_unlock(&cil->xc_push_lock); | 463 | spin_unlock(&cil->xc_push_lock); |
464 | goto out_skip; | 464 | goto out_skip; |
465 | } | 465 | } |
466 | spin_unlock(&cil->xc_push_lock); | ||
467 | 466 | ||
468 | 467 | ||
469 | /* check for a previously pushed seqeunce */ | 468 | /* check for a previously pushed seqeunce */ |
470 | if (push_seq < cil->xc_ctx->sequence) | 469 | if (push_seq < cil->xc_ctx->sequence) { |
470 | spin_unlock(&cil->xc_push_lock); | ||
471 | goto out_skip; | 471 | goto out_skip; |
472 | } | ||
473 | |||
474 | /* | ||
475 | * We are now going to push this context, so add it to the committing | ||
476 | * list before we do anything else. This ensures that anyone waiting on | ||
477 | * this push can easily detect the difference between a "push in | ||
478 | * progress" and "CIL is empty, nothing to do". | ||
479 | * | ||
480 | * IOWs, a wait loop can now check for: | ||
481 | * the current sequence not being found on the committing list; | ||
482 | * an empty CIL; and | ||
483 | * an unchanged sequence number | ||
484 | * to detect a push that had nothing to do and therefore does not need | ||
485 | * waiting on. If the CIL is not empty, we get put on the committing | ||
486 | * list before emptying the CIL and bumping the sequence number. Hence | ||
487 | * an empty CIL and an unchanged sequence number means we jumped out | ||
488 | * above after doing nothing. | ||
489 | * | ||
490 | * Hence the waiter will either find the commit sequence on the | ||
491 | * committing list or the sequence number will be unchanged and the CIL | ||
492 | * still dirty. In that latter case, the push has not yet started, and | ||
493 | * so the waiter will have to continue trying to check the CIL | ||
494 | * committing list until it is found. In extreme cases of delay, the | ||
495 | * sequence may fully commit between the attempts the wait makes to wait | ||
496 | * on the commit sequence. | ||
497 | */ | ||
498 | list_add(&ctx->committing, &cil->xc_committing); | ||
499 | spin_unlock(&cil->xc_push_lock); | ||
472 | 500 | ||
473 | /* | 501 | /* |
474 | * pull all the log vectors off the items in the CIL, and | 502 | * pull all the log vectors off the items in the CIL, and |
@@ -532,7 +560,6 @@ xlog_cil_push( | |||
532 | */ | 560 | */ |
533 | spin_lock(&cil->xc_push_lock); | 561 | spin_lock(&cil->xc_push_lock); |
534 | cil->xc_current_sequence = new_ctx->sequence; | 562 | cil->xc_current_sequence = new_ctx->sequence; |
535 | list_add(&ctx->committing, &cil->xc_committing); | ||
536 | spin_unlock(&cil->xc_push_lock); | 563 | spin_unlock(&cil->xc_push_lock); |
537 | up_write(&cil->xc_ctx_lock); | 564 | up_write(&cil->xc_ctx_lock); |
538 | 565 | ||
@@ -855,13 +882,15 @@ restart: | |||
855 | * Hence by the time we have got here it our sequence may not have been | 882 | * Hence by the time we have got here it our sequence may not have been |
856 | * pushed yet. This is true if the current sequence still matches the | 883 | * pushed yet. This is true if the current sequence still matches the |
857 | * push sequence after the above wait loop and the CIL still contains | 884 | * push sequence after the above wait loop and the CIL still contains |
858 | * dirty objects. | 885 | * dirty objects. This is guaranteed by the push code first adding the |
886 | * context to the committing list before emptying the CIL. | ||
859 | * | 887 | * |
860 | * When the push occurs, it will empty the CIL and atomically increment | 888 | * Hence if we don't find the context in the committing list and the |
861 | * the currect sequence past the push sequence and move it into the | 889 | * current sequence number is unchanged then the CIL contents are |
862 | * committing list. Of course, if the CIL is clean at the time of the | 890 | * significant. If the CIL is empty, if means there was nothing to push |
863 | * push, it won't have pushed the CIL at all, so in that case we should | 891 | * and that means there is nothing to wait for. If the CIL is not empty, |
864 | * try the push for this sequence again from the start just in case. | 892 | * it means we haven't yet started the push, because if it had started |
893 | * we would have found the context on the committing list. | ||
865 | */ | 894 | */ |
866 | if (sequence == cil->xc_current_sequence && | 895 | if (sequence == cil->xc_current_sequence && |
867 | !list_empty(&cil->xc_cil)) { | 896 | !list_empty(&cil->xc_cil)) { |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 1fd5787add99..00cd7f3a8f59 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -193,12 +193,8 @@ xlog_bread_noalign( | |||
193 | bp->b_io_length = nbblks; | 193 | bp->b_io_length = nbblks; |
194 | bp->b_error = 0; | 194 | bp->b_error = 0; |
195 | 195 | ||
196 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) | 196 | error = xfs_buf_submit_wait(bp); |
197 | return -EIO; | 197 | if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) |
198 | |||
199 | xfs_buf_iorequest(bp); | ||
200 | error = xfs_buf_iowait(bp); | ||
201 | if (error) | ||
202 | xfs_buf_ioerror_alert(bp, __func__); | 198 | xfs_buf_ioerror_alert(bp, __func__); |
203 | return error; | 199 | return error; |
204 | } | 200 | } |
@@ -378,12 +374,14 @@ xlog_recover_iodone( | |||
378 | * We're not going to bother about retrying | 374 | * We're not going to bother about retrying |
379 | * this during recovery. One strike! | 375 | * this during recovery. One strike! |
380 | */ | 376 | */ |
381 | xfs_buf_ioerror_alert(bp, __func__); | 377 | if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { |
382 | xfs_force_shutdown(bp->b_target->bt_mount, | 378 | xfs_buf_ioerror_alert(bp, __func__); |
383 | SHUTDOWN_META_IO_ERROR); | 379 | xfs_force_shutdown(bp->b_target->bt_mount, |
380 | SHUTDOWN_META_IO_ERROR); | ||
381 | } | ||
384 | } | 382 | } |
385 | bp->b_iodone = NULL; | 383 | bp->b_iodone = NULL; |
386 | xfs_buf_ioend(bp, 0); | 384 | xfs_buf_ioend(bp); |
387 | } | 385 | } |
388 | 386 | ||
389 | /* | 387 | /* |
@@ -1445,160 +1443,6 @@ xlog_clear_stale_blocks( | |||
1445 | ****************************************************************************** | 1443 | ****************************************************************************** |
1446 | */ | 1444 | */ |
1447 | 1445 | ||
1448 | STATIC xlog_recover_t * | ||
1449 | xlog_recover_find_tid( | ||
1450 | struct hlist_head *head, | ||
1451 | xlog_tid_t tid) | ||
1452 | { | ||
1453 | xlog_recover_t *trans; | ||
1454 | |||
1455 | hlist_for_each_entry(trans, head, r_list) { | ||
1456 | if (trans->r_log_tid == tid) | ||
1457 | return trans; | ||
1458 | } | ||
1459 | return NULL; | ||
1460 | } | ||
1461 | |||
1462 | STATIC void | ||
1463 | xlog_recover_new_tid( | ||
1464 | struct hlist_head *head, | ||
1465 | xlog_tid_t tid, | ||
1466 | xfs_lsn_t lsn) | ||
1467 | { | ||
1468 | xlog_recover_t *trans; | ||
1469 | |||
1470 | trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); | ||
1471 | trans->r_log_tid = tid; | ||
1472 | trans->r_lsn = lsn; | ||
1473 | INIT_LIST_HEAD(&trans->r_itemq); | ||
1474 | |||
1475 | INIT_HLIST_NODE(&trans->r_list); | ||
1476 | hlist_add_head(&trans->r_list, head); | ||
1477 | } | ||
1478 | |||
1479 | STATIC void | ||
1480 | xlog_recover_add_item( | ||
1481 | struct list_head *head) | ||
1482 | { | ||
1483 | xlog_recover_item_t *item; | ||
1484 | |||
1485 | item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); | ||
1486 | INIT_LIST_HEAD(&item->ri_list); | ||
1487 | list_add_tail(&item->ri_list, head); | ||
1488 | } | ||
1489 | |||
1490 | STATIC int | ||
1491 | xlog_recover_add_to_cont_trans( | ||
1492 | struct xlog *log, | ||
1493 | struct xlog_recover *trans, | ||
1494 | xfs_caddr_t dp, | ||
1495 | int len) | ||
1496 | { | ||
1497 | xlog_recover_item_t *item; | ||
1498 | xfs_caddr_t ptr, old_ptr; | ||
1499 | int old_len; | ||
1500 | |||
1501 | if (list_empty(&trans->r_itemq)) { | ||
1502 | /* finish copying rest of trans header */ | ||
1503 | xlog_recover_add_item(&trans->r_itemq); | ||
1504 | ptr = (xfs_caddr_t) &trans->r_theader + | ||
1505 | sizeof(xfs_trans_header_t) - len; | ||
1506 | memcpy(ptr, dp, len); /* d, s, l */ | ||
1507 | return 0; | ||
1508 | } | ||
1509 | /* take the tail entry */ | ||
1510 | item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); | ||
1511 | |||
1512 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; | ||
1513 | old_len = item->ri_buf[item->ri_cnt-1].i_len; | ||
1514 | |||
1515 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); | ||
1516 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ | ||
1517 | item->ri_buf[item->ri_cnt-1].i_len += len; | ||
1518 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | ||
1519 | trace_xfs_log_recover_item_add_cont(log, trans, item, 0); | ||
1520 | return 0; | ||
1521 | } | ||
1522 | |||
1523 | /* | ||
1524 | * The next region to add is the start of a new region. It could be | ||
1525 | * a whole region or it could be the first part of a new region. Because | ||
1526 | * of this, the assumption here is that the type and size fields of all | ||
1527 | * format structures fit into the first 32 bits of the structure. | ||
1528 | * | ||
1529 | * This works because all regions must be 32 bit aligned. Therefore, we | ||
1530 | * either have both fields or we have neither field. In the case we have | ||
1531 | * neither field, the data part of the region is zero length. We only have | ||
1532 | * a log_op_header and can throw away the header since a new one will appear | ||
1533 | * later. If we have at least 4 bytes, then we can determine how many regions | ||
1534 | * will appear in the current log item. | ||
1535 | */ | ||
1536 | STATIC int | ||
1537 | xlog_recover_add_to_trans( | ||
1538 | struct xlog *log, | ||
1539 | struct xlog_recover *trans, | ||
1540 | xfs_caddr_t dp, | ||
1541 | int len) | ||
1542 | { | ||
1543 | xfs_inode_log_format_t *in_f; /* any will do */ | ||
1544 | xlog_recover_item_t *item; | ||
1545 | xfs_caddr_t ptr; | ||
1546 | |||
1547 | if (!len) | ||
1548 | return 0; | ||
1549 | if (list_empty(&trans->r_itemq)) { | ||
1550 | /* we need to catch log corruptions here */ | ||
1551 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { | ||
1552 | xfs_warn(log->l_mp, "%s: bad header magic number", | ||
1553 | __func__); | ||
1554 | ASSERT(0); | ||
1555 | return -EIO; | ||
1556 | } | ||
1557 | if (len == sizeof(xfs_trans_header_t)) | ||
1558 | xlog_recover_add_item(&trans->r_itemq); | ||
1559 | memcpy(&trans->r_theader, dp, len); /* d, s, l */ | ||
1560 | return 0; | ||
1561 | } | ||
1562 | |||
1563 | ptr = kmem_alloc(len, KM_SLEEP); | ||
1564 | memcpy(ptr, dp, len); | ||
1565 | in_f = (xfs_inode_log_format_t *)ptr; | ||
1566 | |||
1567 | /* take the tail entry */ | ||
1568 | item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); | ||
1569 | if (item->ri_total != 0 && | ||
1570 | item->ri_total == item->ri_cnt) { | ||
1571 | /* tail item is in use, get a new one */ | ||
1572 | xlog_recover_add_item(&trans->r_itemq); | ||
1573 | item = list_entry(trans->r_itemq.prev, | ||
1574 | xlog_recover_item_t, ri_list); | ||
1575 | } | ||
1576 | |||
1577 | if (item->ri_total == 0) { /* first region to be added */ | ||
1578 | if (in_f->ilf_size == 0 || | ||
1579 | in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { | ||
1580 | xfs_warn(log->l_mp, | ||
1581 | "bad number of regions (%d) in inode log format", | ||
1582 | in_f->ilf_size); | ||
1583 | ASSERT(0); | ||
1584 | kmem_free(ptr); | ||
1585 | return -EIO; | ||
1586 | } | ||
1587 | |||
1588 | item->ri_total = in_f->ilf_size; | ||
1589 | item->ri_buf = | ||
1590 | kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), | ||
1591 | KM_SLEEP); | ||
1592 | } | ||
1593 | ASSERT(item->ri_total > item->ri_cnt); | ||
1594 | /* Description region is ri_buf[0] */ | ||
1595 | item->ri_buf[item->ri_cnt].i_addr = ptr; | ||
1596 | item->ri_buf[item->ri_cnt].i_len = len; | ||
1597 | item->ri_cnt++; | ||
1598 | trace_xfs_log_recover_item_add(log, trans, item, 0); | ||
1599 | return 0; | ||
1600 | } | ||
1601 | |||
1602 | /* | 1446 | /* |
1603 | * Sort the log items in the transaction. | 1447 | * Sort the log items in the transaction. |
1604 | * | 1448 | * |
@@ -3254,31 +3098,6 @@ xlog_recover_do_icreate_pass2( | |||
3254 | return 0; | 3098 | return 0; |
3255 | } | 3099 | } |
3256 | 3100 | ||
3257 | /* | ||
3258 | * Free up any resources allocated by the transaction | ||
3259 | * | ||
3260 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | ||
3261 | */ | ||
3262 | STATIC void | ||
3263 | xlog_recover_free_trans( | ||
3264 | struct xlog_recover *trans) | ||
3265 | { | ||
3266 | xlog_recover_item_t *item, *n; | ||
3267 | int i; | ||
3268 | |||
3269 | list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { | ||
3270 | /* Free the regions in the item. */ | ||
3271 | list_del(&item->ri_list); | ||
3272 | for (i = 0; i < item->ri_cnt; i++) | ||
3273 | kmem_free(item->ri_buf[i].i_addr); | ||
3274 | /* Free the item itself */ | ||
3275 | kmem_free(item->ri_buf); | ||
3276 | kmem_free(item); | ||
3277 | } | ||
3278 | /* Free the transaction recover structure */ | ||
3279 | kmem_free(trans); | ||
3280 | } | ||
3281 | |||
3282 | STATIC void | 3101 | STATIC void |
3283 | xlog_recover_buffer_ra_pass2( | 3102 | xlog_recover_buffer_ra_pass2( |
3284 | struct xlog *log, | 3103 | struct xlog *log, |
@@ -3528,22 +3347,309 @@ out: | |||
3528 | if (!list_empty(&done_list)) | 3347 | if (!list_empty(&done_list)) |
3529 | list_splice_init(&done_list, &trans->r_itemq); | 3348 | list_splice_init(&done_list, &trans->r_itemq); |
3530 | 3349 | ||
3531 | xlog_recover_free_trans(trans); | ||
3532 | |||
3533 | error2 = xfs_buf_delwri_submit(&buffer_list); | 3350 | error2 = xfs_buf_delwri_submit(&buffer_list); |
3534 | return error ? error : error2; | 3351 | return error ? error : error2; |
3535 | } | 3352 | } |
3536 | 3353 | ||
3354 | STATIC void | ||
3355 | xlog_recover_add_item( | ||
3356 | struct list_head *head) | ||
3357 | { | ||
3358 | xlog_recover_item_t *item; | ||
3359 | |||
3360 | item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); | ||
3361 | INIT_LIST_HEAD(&item->ri_list); | ||
3362 | list_add_tail(&item->ri_list, head); | ||
3363 | } | ||
3364 | |||
3537 | STATIC int | 3365 | STATIC int |
3538 | xlog_recover_unmount_trans( | 3366 | xlog_recover_add_to_cont_trans( |
3539 | struct xlog *log) | 3367 | struct xlog *log, |
3368 | struct xlog_recover *trans, | ||
3369 | xfs_caddr_t dp, | ||
3370 | int len) | ||
3540 | { | 3371 | { |
3541 | /* Do nothing now */ | 3372 | xlog_recover_item_t *item; |
3542 | xfs_warn(log->l_mp, "%s: Unmount LR", __func__); | 3373 | xfs_caddr_t ptr, old_ptr; |
3374 | int old_len; | ||
3375 | |||
3376 | if (list_empty(&trans->r_itemq)) { | ||
3377 | /* finish copying rest of trans header */ | ||
3378 | xlog_recover_add_item(&trans->r_itemq); | ||
3379 | ptr = (xfs_caddr_t) &trans->r_theader + | ||
3380 | sizeof(xfs_trans_header_t) - len; | ||
3381 | memcpy(ptr, dp, len); | ||
3382 | return 0; | ||
3383 | } | ||
3384 | /* take the tail entry */ | ||
3385 | item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); | ||
3386 | |||
3387 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; | ||
3388 | old_len = item->ri_buf[item->ri_cnt-1].i_len; | ||
3389 | |||
3390 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); | ||
3391 | memcpy(&ptr[old_len], dp, len); | ||
3392 | item->ri_buf[item->ri_cnt-1].i_len += len; | ||
3393 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | ||
3394 | trace_xfs_log_recover_item_add_cont(log, trans, item, 0); | ||
3395 | return 0; | ||
3396 | } | ||
3397 | |||
3398 | /* | ||
3399 | * The next region to add is the start of a new region. It could be | ||
3400 | * a whole region or it could be the first part of a new region. Because | ||
3401 | * of this, the assumption here is that the type and size fields of all | ||
3402 | * format structures fit into the first 32 bits of the structure. | ||
3403 | * | ||
3404 | * This works because all regions must be 32 bit aligned. Therefore, we | ||
3405 | * either have both fields or we have neither field. In the case we have | ||
3406 | * neither field, the data part of the region is zero length. We only have | ||
3407 | * a log_op_header and can throw away the header since a new one will appear | ||
3408 | * later. If we have at least 4 bytes, then we can determine how many regions | ||
3409 | * will appear in the current log item. | ||
3410 | */ | ||
3411 | STATIC int | ||
3412 | xlog_recover_add_to_trans( | ||
3413 | struct xlog *log, | ||
3414 | struct xlog_recover *trans, | ||
3415 | xfs_caddr_t dp, | ||
3416 | int len) | ||
3417 | { | ||
3418 | xfs_inode_log_format_t *in_f; /* any will do */ | ||
3419 | xlog_recover_item_t *item; | ||
3420 | xfs_caddr_t ptr; | ||
3421 | |||
3422 | if (!len) | ||
3423 | return 0; | ||
3424 | if (list_empty(&trans->r_itemq)) { | ||
3425 | /* we need to catch log corruptions here */ | ||
3426 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { | ||
3427 | xfs_warn(log->l_mp, "%s: bad header magic number", | ||
3428 | __func__); | ||
3429 | ASSERT(0); | ||
3430 | return -EIO; | ||
3431 | } | ||
3432 | if (len == sizeof(xfs_trans_header_t)) | ||
3433 | xlog_recover_add_item(&trans->r_itemq); | ||
3434 | memcpy(&trans->r_theader, dp, len); | ||
3435 | return 0; | ||
3436 | } | ||
3437 | |||
3438 | ptr = kmem_alloc(len, KM_SLEEP); | ||
3439 | memcpy(ptr, dp, len); | ||
3440 | in_f = (xfs_inode_log_format_t *)ptr; | ||
3441 | |||
3442 | /* take the tail entry */ | ||
3443 | item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); | ||
3444 | if (item->ri_total != 0 && | ||
3445 | item->ri_total == item->ri_cnt) { | ||
3446 | /* tail item is in use, get a new one */ | ||
3447 | xlog_recover_add_item(&trans->r_itemq); | ||
3448 | item = list_entry(trans->r_itemq.prev, | ||
3449 | xlog_recover_item_t, ri_list); | ||
3450 | } | ||
3451 | |||
3452 | if (item->ri_total == 0) { /* first region to be added */ | ||
3453 | if (in_f->ilf_size == 0 || | ||
3454 | in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { | ||
3455 | xfs_warn(log->l_mp, | ||
3456 | "bad number of regions (%d) in inode log format", | ||
3457 | in_f->ilf_size); | ||
3458 | ASSERT(0); | ||
3459 | kmem_free(ptr); | ||
3460 | return -EIO; | ||
3461 | } | ||
3462 | |||
3463 | item->ri_total = in_f->ilf_size; | ||
3464 | item->ri_buf = | ||
3465 | kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), | ||
3466 | KM_SLEEP); | ||
3467 | } | ||
3468 | ASSERT(item->ri_total > item->ri_cnt); | ||
3469 | /* Description region is ri_buf[0] */ | ||
3470 | item->ri_buf[item->ri_cnt].i_addr = ptr; | ||
3471 | item->ri_buf[item->ri_cnt].i_len = len; | ||
3472 | item->ri_cnt++; | ||
3473 | trace_xfs_log_recover_item_add(log, trans, item, 0); | ||
3543 | return 0; | 3474 | return 0; |
3544 | } | 3475 | } |
3545 | 3476 | ||
3546 | /* | 3477 | /* |
3478 | * Free up any resources allocated by the transaction | ||
3479 | * | ||
3480 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | ||
3481 | */ | ||
3482 | STATIC void | ||
3483 | xlog_recover_free_trans( | ||
3484 | struct xlog_recover *trans) | ||
3485 | { | ||
3486 | xlog_recover_item_t *item, *n; | ||
3487 | int i; | ||
3488 | |||
3489 | list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { | ||
3490 | /* Free the regions in the item. */ | ||
3491 | list_del(&item->ri_list); | ||
3492 | for (i = 0; i < item->ri_cnt; i++) | ||
3493 | kmem_free(item->ri_buf[i].i_addr); | ||
3494 | /* Free the item itself */ | ||
3495 | kmem_free(item->ri_buf); | ||
3496 | kmem_free(item); | ||
3497 | } | ||
3498 | /* Free the transaction recover structure */ | ||
3499 | kmem_free(trans); | ||
3500 | } | ||
3501 | |||
3502 | /* | ||
3503 | * On error or completion, trans is freed. | ||
3504 | */ | ||
3505 | STATIC int | ||
3506 | xlog_recovery_process_trans( | ||
3507 | struct xlog *log, | ||
3508 | struct xlog_recover *trans, | ||
3509 | xfs_caddr_t dp, | ||
3510 | unsigned int len, | ||
3511 | unsigned int flags, | ||
3512 | int pass) | ||
3513 | { | ||
3514 | int error = 0; | ||
3515 | bool freeit = false; | ||
3516 | |||
3517 | /* mask off ophdr transaction container flags */ | ||
3518 | flags &= ~XLOG_END_TRANS; | ||
3519 | if (flags & XLOG_WAS_CONT_TRANS) | ||
3520 | flags &= ~XLOG_CONTINUE_TRANS; | ||
3521 | |||
3522 | /* | ||
3523 | * Callees must not free the trans structure. We'll decide if we need to | ||
3524 | * free it or not based on the operation being done and it's result. | ||
3525 | */ | ||
3526 | switch (flags) { | ||
3527 | /* expected flag values */ | ||
3528 | case 0: | ||
3529 | case XLOG_CONTINUE_TRANS: | ||
3530 | error = xlog_recover_add_to_trans(log, trans, dp, len); | ||
3531 | break; | ||
3532 | case XLOG_WAS_CONT_TRANS: | ||
3533 | error = xlog_recover_add_to_cont_trans(log, trans, dp, len); | ||
3534 | break; | ||
3535 | case XLOG_COMMIT_TRANS: | ||
3536 | error = xlog_recover_commit_trans(log, trans, pass); | ||
3537 | /* success or fail, we are now done with this transaction. */ | ||
3538 | freeit = true; | ||
3539 | break; | ||
3540 | |||
3541 | /* unexpected flag values */ | ||
3542 | case XLOG_UNMOUNT_TRANS: | ||
3543 | /* just skip trans */ | ||
3544 | xfs_warn(log->l_mp, "%s: Unmount LR", __func__); | ||
3545 | freeit = true; | ||
3546 | break; | ||
3547 | case XLOG_START_TRANS: | ||
3548 | default: | ||
3549 | xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags); | ||
3550 | ASSERT(0); | ||
3551 | error = -EIO; | ||
3552 | break; | ||
3553 | } | ||
3554 | if (error || freeit) | ||
3555 | xlog_recover_free_trans(trans); | ||
3556 | return error; | ||
3557 | } | ||
3558 | |||
3559 | /* | ||
3560 | * Lookup the transaction recovery structure associated with the ID in the | ||
3561 | * current ophdr. If the transaction doesn't exist and the start flag is set in | ||
3562 | * the ophdr, then allocate a new transaction for future ID matches to find. | ||
3563 | * Either way, return what we found during the lookup - an existing transaction | ||
3564 | * or nothing. | ||
3565 | */ | ||
3566 | STATIC struct xlog_recover * | ||
3567 | xlog_recover_ophdr_to_trans( | ||
3568 | struct hlist_head rhash[], | ||
3569 | struct xlog_rec_header *rhead, | ||
3570 | struct xlog_op_header *ohead) | ||
3571 | { | ||
3572 | struct xlog_recover *trans; | ||
3573 | xlog_tid_t tid; | ||
3574 | struct hlist_head *rhp; | ||
3575 | |||
3576 | tid = be32_to_cpu(ohead->oh_tid); | ||
3577 | rhp = &rhash[XLOG_RHASH(tid)]; | ||
3578 | hlist_for_each_entry(trans, rhp, r_list) { | ||
3579 | if (trans->r_log_tid == tid) | ||
3580 | return trans; | ||
3581 | } | ||
3582 | |||
3583 | /* | ||
3584 | * skip over non-start transaction headers - we could be | ||
3585 | * processing slack space before the next transaction starts | ||
3586 | */ | ||
3587 | if (!(ohead->oh_flags & XLOG_START_TRANS)) | ||
3588 | return NULL; | ||
3589 | |||
3590 | ASSERT(be32_to_cpu(ohead->oh_len) == 0); | ||
3591 | |||
3592 | /* | ||
3593 | * This is a new transaction so allocate a new recovery container to | ||
3594 | * hold the recovery ops that will follow. | ||
3595 | */ | ||
3596 | trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP); | ||
3597 | trans->r_log_tid = tid; | ||
3598 | trans->r_lsn = be64_to_cpu(rhead->h_lsn); | ||
3599 | INIT_LIST_HEAD(&trans->r_itemq); | ||
3600 | INIT_HLIST_NODE(&trans->r_list); | ||
3601 | hlist_add_head(&trans->r_list, rhp); | ||
3602 | |||
3603 | /* | ||
3604 | * Nothing more to do for this ophdr. Items to be added to this new | ||
3605 | * transaction will be in subsequent ophdr containers. | ||
3606 | */ | ||
3607 | return NULL; | ||
3608 | } | ||
3609 | |||
3610 | STATIC int | ||
3611 | xlog_recover_process_ophdr( | ||
3612 | struct xlog *log, | ||
3613 | struct hlist_head rhash[], | ||
3614 | struct xlog_rec_header *rhead, | ||
3615 | struct xlog_op_header *ohead, | ||
3616 | xfs_caddr_t dp, | ||
3617 | xfs_caddr_t end, | ||
3618 | int pass) | ||
3619 | { | ||
3620 | struct xlog_recover *trans; | ||
3621 | unsigned int len; | ||
3622 | |||
3623 | /* Do we understand who wrote this op? */ | ||
3624 | if (ohead->oh_clientid != XFS_TRANSACTION && | ||
3625 | ohead->oh_clientid != XFS_LOG) { | ||
3626 | xfs_warn(log->l_mp, "%s: bad clientid 0x%x", | ||
3627 | __func__, ohead->oh_clientid); | ||
3628 | ASSERT(0); | ||
3629 | return -EIO; | ||
3630 | } | ||
3631 | |||
3632 | /* | ||
3633 | * Check the ophdr contains all the data it is supposed to contain. | ||
3634 | */ | ||
3635 | len = be32_to_cpu(ohead->oh_len); | ||
3636 | if (dp + len > end) { | ||
3637 | xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len); | ||
3638 | WARN_ON(1); | ||
3639 | return -EIO; | ||
3640 | } | ||
3641 | |||
3642 | trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead); | ||
3643 | if (!trans) { | ||
3644 | /* nothing to do, so skip over this ophdr */ | ||
3645 | return 0; | ||
3646 | } | ||
3647 | |||
3648 | return xlog_recovery_process_trans(log, trans, dp, len, | ||
3649 | ohead->oh_flags, pass); | ||
3650 | } | ||
3651 | |||
3652 | /* | ||
3547 | * There are two valid states of the r_state field. 0 indicates that the | 3653 | * There are two valid states of the r_state field. 0 indicates that the |
3548 | * transaction structure is in a normal state. We have either seen the | 3654 | * transaction structure is in a normal state. We have either seen the |
3549 | * start of the transaction or the last operation we added was not a partial | 3655 | * start of the transaction or the last operation we added was not a partial |
@@ -3560,86 +3666,30 @@ xlog_recover_process_data( | |||
3560 | xfs_caddr_t dp, | 3666 | xfs_caddr_t dp, |
3561 | int pass) | 3667 | int pass) |
3562 | { | 3668 | { |
3563 | xfs_caddr_t lp; | 3669 | struct xlog_op_header *ohead; |
3670 | xfs_caddr_t end; | ||
3564 | int num_logops; | 3671 | int num_logops; |
3565 | xlog_op_header_t *ohead; | ||
3566 | xlog_recover_t *trans; | ||
3567 | xlog_tid_t tid; | ||
3568 | int error; | 3672 | int error; |
3569 | unsigned long hash; | ||
3570 | uint flags; | ||
3571 | 3673 | ||
3572 | lp = dp + be32_to_cpu(rhead->h_len); | 3674 | end = dp + be32_to_cpu(rhead->h_len); |
3573 | num_logops = be32_to_cpu(rhead->h_num_logops); | 3675 | num_logops = be32_to_cpu(rhead->h_num_logops); |
3574 | 3676 | ||
3575 | /* check the log format matches our own - else we can't recover */ | 3677 | /* check the log format matches our own - else we can't recover */ |
3576 | if (xlog_header_check_recover(log->l_mp, rhead)) | 3678 | if (xlog_header_check_recover(log->l_mp, rhead)) |
3577 | return -EIO; | 3679 | return -EIO; |
3578 | 3680 | ||
3579 | while ((dp < lp) && num_logops) { | 3681 | while ((dp < end) && num_logops) { |
3580 | ASSERT(dp + sizeof(xlog_op_header_t) <= lp); | 3682 | |
3581 | ohead = (xlog_op_header_t *)dp; | 3683 | ohead = (struct xlog_op_header *)dp; |
3582 | dp += sizeof(xlog_op_header_t); | 3684 | dp += sizeof(*ohead); |
3583 | if (ohead->oh_clientid != XFS_TRANSACTION && | 3685 | ASSERT(dp <= end); |
3584 | ohead->oh_clientid != XFS_LOG) { | 3686 | |
3585 | xfs_warn(log->l_mp, "%s: bad clientid 0x%x", | 3687 | /* errors will abort recovery */ |
3586 | __func__, ohead->oh_clientid); | 3688 | error = xlog_recover_process_ophdr(log, rhash, rhead, ohead, |
3587 | ASSERT(0); | 3689 | dp, end, pass); |
3588 | return -EIO; | 3690 | if (error) |
3589 | } | 3691 | return error; |
3590 | tid = be32_to_cpu(ohead->oh_tid); | 3692 | |
3591 | hash = XLOG_RHASH(tid); | ||
3592 | trans = xlog_recover_find_tid(&rhash[hash], tid); | ||
3593 | if (trans == NULL) { /* not found; add new tid */ | ||
3594 | if (ohead->oh_flags & XLOG_START_TRANS) | ||
3595 | xlog_recover_new_tid(&rhash[hash], tid, | ||
3596 | be64_to_cpu(rhead->h_lsn)); | ||
3597 | } else { | ||
3598 | if (dp + be32_to_cpu(ohead->oh_len) > lp) { | ||
3599 | xfs_warn(log->l_mp, "%s: bad length 0x%x", | ||
3600 | __func__, be32_to_cpu(ohead->oh_len)); | ||
3601 | WARN_ON(1); | ||
3602 | return -EIO; | ||
3603 | } | ||
3604 | flags = ohead->oh_flags & ~XLOG_END_TRANS; | ||
3605 | if (flags & XLOG_WAS_CONT_TRANS) | ||
3606 | flags &= ~XLOG_CONTINUE_TRANS; | ||
3607 | switch (flags) { | ||
3608 | case XLOG_COMMIT_TRANS: | ||
3609 | error = xlog_recover_commit_trans(log, | ||
3610 | trans, pass); | ||
3611 | break; | ||
3612 | case XLOG_UNMOUNT_TRANS: | ||
3613 | error = xlog_recover_unmount_trans(log); | ||
3614 | break; | ||
3615 | case XLOG_WAS_CONT_TRANS: | ||
3616 | error = xlog_recover_add_to_cont_trans(log, | ||
3617 | trans, dp, | ||
3618 | be32_to_cpu(ohead->oh_len)); | ||
3619 | break; | ||
3620 | case XLOG_START_TRANS: | ||
3621 | xfs_warn(log->l_mp, "%s: bad transaction", | ||
3622 | __func__); | ||
3623 | ASSERT(0); | ||
3624 | error = -EIO; | ||
3625 | break; | ||
3626 | case 0: | ||
3627 | case XLOG_CONTINUE_TRANS: | ||
3628 | error = xlog_recover_add_to_trans(log, trans, | ||
3629 | dp, be32_to_cpu(ohead->oh_len)); | ||
3630 | break; | ||
3631 | default: | ||
3632 | xfs_warn(log->l_mp, "%s: bad flag 0x%x", | ||
3633 | __func__, flags); | ||
3634 | ASSERT(0); | ||
3635 | error = -EIO; | ||
3636 | break; | ||
3637 | } | ||
3638 | if (error) { | ||
3639 | xlog_recover_free_trans(trans); | ||
3640 | return error; | ||
3641 | } | ||
3642 | } | ||
3643 | dp += be32_to_cpu(ohead->oh_len); | 3693 | dp += be32_to_cpu(ohead->oh_len); |
3644 | num_logops--; | 3694 | num_logops--; |
3645 | } | 3695 | } |
@@ -4132,41 +4182,13 @@ xlog_do_recovery_pass( | |||
4132 | } | 4182 | } |
4133 | 4183 | ||
4134 | memset(rhash, 0, sizeof(rhash)); | 4184 | memset(rhash, 0, sizeof(rhash)); |
4135 | if (tail_blk <= head_blk) { | 4185 | blk_no = tail_blk; |
4136 | for (blk_no = tail_blk; blk_no < head_blk; ) { | 4186 | if (tail_blk > head_blk) { |
4137 | error = xlog_bread(log, blk_no, hblks, hbp, &offset); | ||
4138 | if (error) | ||
4139 | goto bread_err2; | ||
4140 | |||
4141 | rhead = (xlog_rec_header_t *)offset; | ||
4142 | error = xlog_valid_rec_header(log, rhead, blk_no); | ||
4143 | if (error) | ||
4144 | goto bread_err2; | ||
4145 | |||
4146 | /* blocks in data section */ | ||
4147 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | ||
4148 | error = xlog_bread(log, blk_no + hblks, bblks, dbp, | ||
4149 | &offset); | ||
4150 | if (error) | ||
4151 | goto bread_err2; | ||
4152 | |||
4153 | error = xlog_unpack_data(rhead, offset, log); | ||
4154 | if (error) | ||
4155 | goto bread_err2; | ||
4156 | |||
4157 | error = xlog_recover_process_data(log, | ||
4158 | rhash, rhead, offset, pass); | ||
4159 | if (error) | ||
4160 | goto bread_err2; | ||
4161 | blk_no += bblks + hblks; | ||
4162 | } | ||
4163 | } else { | ||
4164 | /* | 4187 | /* |
4165 | * Perform recovery around the end of the physical log. | 4188 | * Perform recovery around the end of the physical log. |
4166 | * When the head is not on the same cycle number as the tail, | 4189 | * When the head is not on the same cycle number as the tail, |
4167 | * we can't do a sequential recovery as above. | 4190 | * we can't do a sequential recovery. |
4168 | */ | 4191 | */ |
4169 | blk_no = tail_blk; | ||
4170 | while (blk_no < log->l_logBBsize) { | 4192 | while (blk_no < log->l_logBBsize) { |
4171 | /* | 4193 | /* |
4172 | * Check for header wrapping around physical end-of-log | 4194 | * Check for header wrapping around physical end-of-log |
@@ -4280,34 +4302,35 @@ xlog_do_recovery_pass( | |||
4280 | 4302 | ||
4281 | ASSERT(blk_no >= log->l_logBBsize); | 4303 | ASSERT(blk_no >= log->l_logBBsize); |
4282 | blk_no -= log->l_logBBsize; | 4304 | blk_no -= log->l_logBBsize; |
4305 | } | ||
4283 | 4306 | ||
4284 | /* read first part of physical log */ | 4307 | /* read first part of physical log */ |
4285 | while (blk_no < head_blk) { | 4308 | while (blk_no < head_blk) { |
4286 | error = xlog_bread(log, blk_no, hblks, hbp, &offset); | 4309 | error = xlog_bread(log, blk_no, hblks, hbp, &offset); |
4287 | if (error) | 4310 | if (error) |
4288 | goto bread_err2; | 4311 | goto bread_err2; |
4289 | 4312 | ||
4290 | rhead = (xlog_rec_header_t *)offset; | 4313 | rhead = (xlog_rec_header_t *)offset; |
4291 | error = xlog_valid_rec_header(log, rhead, blk_no); | 4314 | error = xlog_valid_rec_header(log, rhead, blk_no); |
4292 | if (error) | 4315 | if (error) |
4293 | goto bread_err2; | 4316 | goto bread_err2; |
4294 | 4317 | ||
4295 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 4318 | /* blocks in data section */ |
4296 | error = xlog_bread(log, blk_no+hblks, bblks, dbp, | 4319 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
4297 | &offset); | 4320 | error = xlog_bread(log, blk_no+hblks, bblks, dbp, |
4298 | if (error) | 4321 | &offset); |
4299 | goto bread_err2; | 4322 | if (error) |
4323 | goto bread_err2; | ||
4300 | 4324 | ||
4301 | error = xlog_unpack_data(rhead, offset, log); | 4325 | error = xlog_unpack_data(rhead, offset, log); |
4302 | if (error) | 4326 | if (error) |
4303 | goto bread_err2; | 4327 | goto bread_err2; |
4304 | 4328 | ||
4305 | error = xlog_recover_process_data(log, rhash, | 4329 | error = xlog_recover_process_data(log, rhash, |
4306 | rhead, offset, pass); | 4330 | rhead, offset, pass); |
4307 | if (error) | 4331 | if (error) |
4308 | goto bread_err2; | 4332 | goto bread_err2; |
4309 | blk_no += bblks + hblks; | 4333 | blk_no += bblks + hblks; |
4310 | } | ||
4311 | } | 4334 | } |
4312 | 4335 | ||
4313 | bread_err2: | 4336 | bread_err2: |
@@ -4427,16 +4450,12 @@ xlog_do_recover( | |||
4427 | XFS_BUF_UNASYNC(bp); | 4450 | XFS_BUF_UNASYNC(bp); |
4428 | bp->b_ops = &xfs_sb_buf_ops; | 4451 | bp->b_ops = &xfs_sb_buf_ops; |
4429 | 4452 | ||
4430 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) { | 4453 | error = xfs_buf_submit_wait(bp); |
4431 | xfs_buf_relse(bp); | ||
4432 | return -EIO; | ||
4433 | } | ||
4434 | |||
4435 | xfs_buf_iorequest(bp); | ||
4436 | error = xfs_buf_iowait(bp); | ||
4437 | if (error) { | 4454 | if (error) { |
4438 | xfs_buf_ioerror_alert(bp, __func__); | 4455 | if (!XFS_FORCED_SHUTDOWN(log->l_mp)) { |
4439 | ASSERT(0); | 4456 | xfs_buf_ioerror_alert(bp, __func__); |
4457 | ASSERT(0); | ||
4458 | } | ||
4440 | xfs_buf_relse(bp); | 4459 | xfs_buf_relse(bp); |
4441 | return error; | 4460 | return error; |
4442 | } | 4461 | } |
@@ -4509,6 +4528,18 @@ xlog_recover( | |||
4509 | return -EINVAL; | 4528 | return -EINVAL; |
4510 | } | 4529 | } |
4511 | 4530 | ||
4531 | /* | ||
4532 | * Delay log recovery if the debug hook is set. This is debug | ||
4533 | * instrumention to coordinate simulation of I/O failures with | ||
4534 | * log recovery. | ||
4535 | */ | ||
4536 | if (xfs_globals.log_recovery_delay) { | ||
4537 | xfs_notice(log->l_mp, | ||
4538 | "Delaying log recovery for %d seconds.", | ||
4539 | xfs_globals.log_recovery_delay); | ||
4540 | msleep(xfs_globals.log_recovery_delay * 1000); | ||
4541 | } | ||
4542 | |||
4512 | xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", | 4543 | xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", |
4513 | log->l_mp->m_logname ? log->l_mp->m_logname | 4544 | log->l_mp->m_logname ? log->l_mp->m_logname |
4514 | : "internal"); | 4545 | : "internal"); |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index fbf0384a466f..51435dbce9c4 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -61,8 +61,6 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex); | |||
61 | static int xfs_uuid_table_size; | 61 | static int xfs_uuid_table_size; |
62 | static uuid_t *xfs_uuid_table; | 62 | static uuid_t *xfs_uuid_table; |
63 | 63 | ||
64 | extern struct kset *xfs_kset; | ||
65 | |||
66 | /* | 64 | /* |
67 | * See if the UUID is unique among mounted XFS filesystems. | 65 | * See if the UUID is unique among mounted XFS filesystems. |
68 | * Mount fails if UUID is nil or a FS with the same UUID is already mounted. | 66 | * Mount fails if UUID is nil or a FS with the same UUID is already mounted. |
@@ -302,21 +300,15 @@ xfs_readsb( | |||
302 | * access to the superblock. | 300 | * access to the superblock. |
303 | */ | 301 | */ |
304 | reread: | 302 | reread: |
305 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, | 303 | error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, |
306 | BTOBB(sector_size), 0, buf_ops); | 304 | BTOBB(sector_size), 0, &bp, buf_ops); |
307 | if (!bp) { | 305 | if (error) { |
308 | if (loud) | ||
309 | xfs_warn(mp, "SB buffer read failed"); | ||
310 | return -EIO; | ||
311 | } | ||
312 | if (bp->b_error) { | ||
313 | error = bp->b_error; | ||
314 | if (loud) | 306 | if (loud) |
315 | xfs_warn(mp, "SB validate failed with error %d.", error); | 307 | xfs_warn(mp, "SB validate failed with error %d.", error); |
316 | /* bad CRC means corrupted metadata */ | 308 | /* bad CRC means corrupted metadata */ |
317 | if (error == -EFSBADCRC) | 309 | if (error == -EFSBADCRC) |
318 | error = -EFSCORRUPTED; | 310 | error = -EFSCORRUPTED; |
319 | goto release_buf; | 311 | return error; |
320 | } | 312 | } |
321 | 313 | ||
322 | /* | 314 | /* |
@@ -546,40 +538,43 @@ xfs_set_inoalignment(xfs_mount_t *mp) | |||
546 | * Check that the data (and log if separate) is an ok size. | 538 | * Check that the data (and log if separate) is an ok size. |
547 | */ | 539 | */ |
548 | STATIC int | 540 | STATIC int |
549 | xfs_check_sizes(xfs_mount_t *mp) | 541 | xfs_check_sizes( |
542 | struct xfs_mount *mp) | ||
550 | { | 543 | { |
551 | xfs_buf_t *bp; | 544 | struct xfs_buf *bp; |
552 | xfs_daddr_t d; | 545 | xfs_daddr_t d; |
546 | int error; | ||
553 | 547 | ||
554 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); | 548 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); |
555 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { | 549 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { |
556 | xfs_warn(mp, "filesystem size mismatch detected"); | 550 | xfs_warn(mp, "filesystem size mismatch detected"); |
557 | return -EFBIG; | 551 | return -EFBIG; |
558 | } | 552 | } |
559 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, | 553 | error = xfs_buf_read_uncached(mp->m_ddev_targp, |
560 | d - XFS_FSS_TO_BB(mp, 1), | 554 | d - XFS_FSS_TO_BB(mp, 1), |
561 | XFS_FSS_TO_BB(mp, 1), 0, NULL); | 555 | XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); |
562 | if (!bp) { | 556 | if (error) { |
563 | xfs_warn(mp, "last sector read failed"); | 557 | xfs_warn(mp, "last sector read failed"); |
564 | return -EIO; | 558 | return error; |
565 | } | 559 | } |
566 | xfs_buf_relse(bp); | 560 | xfs_buf_relse(bp); |
567 | 561 | ||
568 | if (mp->m_logdev_targp != mp->m_ddev_targp) { | 562 | if (mp->m_logdev_targp == mp->m_ddev_targp) |
569 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); | 563 | return 0; |
570 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { | 564 | |
571 | xfs_warn(mp, "log size mismatch detected"); | 565 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); |
572 | return -EFBIG; | 566 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { |
573 | } | 567 | xfs_warn(mp, "log size mismatch detected"); |
574 | bp = xfs_buf_read_uncached(mp->m_logdev_targp, | 568 | return -EFBIG; |
569 | } | ||
570 | error = xfs_buf_read_uncached(mp->m_logdev_targp, | ||
575 | d - XFS_FSB_TO_BB(mp, 1), | 571 | d - XFS_FSB_TO_BB(mp, 1), |
576 | XFS_FSB_TO_BB(mp, 1), 0, NULL); | 572 | XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); |
577 | if (!bp) { | 573 | if (error) { |
578 | xfs_warn(mp, "log device read failed"); | 574 | xfs_warn(mp, "log device read failed"); |
579 | return -EIO; | 575 | return error; |
580 | } | ||
581 | xfs_buf_relse(bp); | ||
582 | } | 576 | } |
577 | xfs_buf_relse(bp); | ||
583 | return 0; | 578 | return 0; |
584 | } | 579 | } |
585 | 580 | ||
@@ -729,7 +724,6 @@ xfs_mountfs( | |||
729 | 724 | ||
730 | xfs_set_maxicount(mp); | 725 | xfs_set_maxicount(mp); |
731 | 726 | ||
732 | mp->m_kobj.kobject.kset = xfs_kset; | ||
733 | error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); | 727 | error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); |
734 | if (error) | 728 | if (error) |
735 | goto out; | 729 | goto out; |
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 1eb6f3df698c..30ecca3037e3 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c | |||
@@ -304,7 +304,8 @@ _xfs_mru_cache_reap( | |||
304 | int | 304 | int |
305 | xfs_mru_cache_init(void) | 305 | xfs_mru_cache_init(void) |
306 | { | 306 | { |
307 | xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); | 307 | xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", |
308 | WQ_MEM_RECLAIM|WQ_FREEZABLE, 1); | ||
308 | if (!xfs_mru_reap_wq) | 309 | if (!xfs_mru_reap_wq) |
309 | return -ENOMEM; | 310 | return -ENOMEM; |
310 | return 0; | 311 | return 0; |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 10232102b4a6..d68f23021af3 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -434,6 +434,7 @@ xfs_qm_dquot_isolate( | |||
434 | struct list_head *item, | 434 | struct list_head *item, |
435 | spinlock_t *lru_lock, | 435 | spinlock_t *lru_lock, |
436 | void *arg) | 436 | void *arg) |
437 | __releases(lru_lock) __acquires(lru_lock) | ||
437 | { | 438 | { |
438 | struct xfs_dquot *dqp = container_of(item, | 439 | struct xfs_dquot *dqp = container_of(item, |
439 | struct xfs_dquot, q_lru); | 440 | struct xfs_dquot, q_lru); |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 909e143b87ae..e1175ea9b551 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -46,7 +46,7 @@ | |||
46 | * Keeps track of a current summary block, so we don't keep reading | 46 | * Keeps track of a current summary block, so we don't keep reading |
47 | * it from the buffer cache. | 47 | * it from the buffer cache. |
48 | */ | 48 | */ |
49 | STATIC int /* error */ | 49 | static int |
50 | xfs_rtget_summary( | 50 | xfs_rtget_summary( |
51 | xfs_mount_t *mp, /* file system mount structure */ | 51 | xfs_mount_t *mp, /* file system mount structure */ |
52 | xfs_trans_t *tp, /* transaction pointer */ | 52 | xfs_trans_t *tp, /* transaction pointer */ |
@@ -56,60 +56,9 @@ xfs_rtget_summary( | |||
56 | xfs_fsblock_t *rsb, /* in/out: summary block number */ | 56 | xfs_fsblock_t *rsb, /* in/out: summary block number */ |
57 | xfs_suminfo_t *sum) /* out: summary info for this block */ | 57 | xfs_suminfo_t *sum) /* out: summary info for this block */ |
58 | { | 58 | { |
59 | xfs_buf_t *bp; /* buffer for summary block */ | 59 | return xfs_rtmodify_summary_int(mp, tp, log, bbno, 0, rbpp, rsb, sum); |
60 | int error; /* error value */ | ||
61 | xfs_fsblock_t sb; /* summary fsblock */ | ||
62 | int so; /* index into the summary file */ | ||
63 | xfs_suminfo_t *sp; /* pointer to returned data */ | ||
64 | |||
65 | /* | ||
66 | * Compute entry number in the summary file. | ||
67 | */ | ||
68 | so = XFS_SUMOFFS(mp, log, bbno); | ||
69 | /* | ||
70 | * Compute the block number in the summary file. | ||
71 | */ | ||
72 | sb = XFS_SUMOFFSTOBLOCK(mp, so); | ||
73 | /* | ||
74 | * If we have an old buffer, and the block number matches, use that. | ||
75 | */ | ||
76 | if (rbpp && *rbpp && *rsb == sb) | ||
77 | bp = *rbpp; | ||
78 | /* | ||
79 | * Otherwise we have to get the buffer. | ||
80 | */ | ||
81 | else { | ||
82 | /* | ||
83 | * If there was an old one, get rid of it first. | ||
84 | */ | ||
85 | if (rbpp && *rbpp) | ||
86 | xfs_trans_brelse(tp, *rbpp); | ||
87 | error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); | ||
88 | if (error) { | ||
89 | return error; | ||
90 | } | ||
91 | /* | ||
92 | * Remember this buffer and block for the next call. | ||
93 | */ | ||
94 | if (rbpp) { | ||
95 | *rbpp = bp; | ||
96 | *rsb = sb; | ||
97 | } | ||
98 | } | ||
99 | /* | ||
100 | * Point to the summary information & copy it out. | ||
101 | */ | ||
102 | sp = XFS_SUMPTR(mp, bp, so); | ||
103 | *sum = *sp; | ||
104 | /* | ||
105 | * Drop the buffer if we're not asked to remember it. | ||
106 | */ | ||
107 | if (!rbpp) | ||
108 | xfs_trans_brelse(tp, bp); | ||
109 | return 0; | ||
110 | } | 60 | } |
111 | 61 | ||
112 | |||
113 | /* | 62 | /* |
114 | * Return whether there are any free extents in the size range given | 63 | * Return whether there are any free extents in the size range given |
115 | * by low and high, for the bitmap block bbno. | 64 | * by low and high, for the bitmap block bbno. |
@@ -972,16 +921,11 @@ xfs_growfs_rt( | |||
972 | /* | 921 | /* |
973 | * Read in the last block of the device, make sure it exists. | 922 | * Read in the last block of the device, make sure it exists. |
974 | */ | 923 | */ |
975 | bp = xfs_buf_read_uncached(mp->m_rtdev_targp, | 924 | error = xfs_buf_read_uncached(mp->m_rtdev_targp, |
976 | XFS_FSB_TO_BB(mp, nrblocks - 1), | 925 | XFS_FSB_TO_BB(mp, nrblocks - 1), |
977 | XFS_FSB_TO_BB(mp, 1), 0, NULL); | 926 | XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); |
978 | if (!bp) | 927 | if (error) |
979 | return -EIO; | ||
980 | if (bp->b_error) { | ||
981 | error = bp->b_error; | ||
982 | xfs_buf_relse(bp); | ||
983 | return error; | 928 | return error; |
984 | } | ||
985 | xfs_buf_relse(bp); | 929 | xfs_buf_relse(bp); |
986 | 930 | ||
987 | /* | 931 | /* |
@@ -1235,11 +1179,12 @@ xfs_rtallocate_extent( | |||
1235 | */ | 1179 | */ |
1236 | int /* error */ | 1180 | int /* error */ |
1237 | xfs_rtmount_init( | 1181 | xfs_rtmount_init( |
1238 | xfs_mount_t *mp) /* file system mount structure */ | 1182 | struct xfs_mount *mp) /* file system mount structure */ |
1239 | { | 1183 | { |
1240 | xfs_buf_t *bp; /* buffer for last block of subvolume */ | 1184 | struct xfs_buf *bp; /* buffer for last block of subvolume */ |
1241 | xfs_daddr_t d; /* address of last block of subvolume */ | 1185 | struct xfs_sb *sbp; /* filesystem superblock copy in mount */ |
1242 | xfs_sb_t *sbp; /* filesystem superblock copy in mount */ | 1186 | xfs_daddr_t d; /* address of last block of subvolume */ |
1187 | int error; | ||
1243 | 1188 | ||
1244 | sbp = &mp->m_sb; | 1189 | sbp = &mp->m_sb; |
1245 | if (sbp->sb_rblocks == 0) | 1190 | if (sbp->sb_rblocks == 0) |
@@ -1265,14 +1210,12 @@ xfs_rtmount_init( | |||
1265 | (unsigned long long) mp->m_sb.sb_rblocks); | 1210 | (unsigned long long) mp->m_sb.sb_rblocks); |
1266 | return -EFBIG; | 1211 | return -EFBIG; |
1267 | } | 1212 | } |
1268 | bp = xfs_buf_read_uncached(mp->m_rtdev_targp, | 1213 | error = xfs_buf_read_uncached(mp->m_rtdev_targp, |
1269 | d - XFS_FSB_TO_BB(mp, 1), | 1214 | d - XFS_FSB_TO_BB(mp, 1), |
1270 | XFS_FSB_TO_BB(mp, 1), 0, NULL); | 1215 | XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); |
1271 | if (!bp || bp->b_error) { | 1216 | if (error) { |
1272 | xfs_warn(mp, "realtime device size check failed"); | 1217 | xfs_warn(mp, "realtime device size check failed"); |
1273 | if (bp) | 1218 | return error; |
1274 | xfs_buf_relse(bp); | ||
1275 | return -EIO; | ||
1276 | } | 1219 | } |
1277 | xfs_buf_relse(bp); | 1220 | xfs_buf_relse(bp); |
1278 | return 0; | 1221 | return 0; |
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index c642795324af..76c0a4a9bb17 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h | |||
@@ -111,6 +111,10 @@ int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp, | |||
111 | xfs_rtblock_t *rtblock); | 111 | xfs_rtblock_t *rtblock); |
112 | int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, | 112 | int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, |
113 | xfs_rtblock_t start, xfs_extlen_t len, int val); | 113 | xfs_rtblock_t start, xfs_extlen_t len, int val); |
114 | int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp, | ||
115 | int log, xfs_rtblock_t bbno, int delta, | ||
116 | xfs_buf_t **rbpp, xfs_fsblock_t *rsb, | ||
117 | xfs_suminfo_t *sum); | ||
114 | int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, | 118 | int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, |
115 | xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp, | 119 | xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp, |
116 | xfs_fsblock_t *rsb); | 120 | xfs_fsblock_t *rsb); |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index b194652033cd..9f622feda6a4 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include "xfs_dinode.h" | 47 | #include "xfs_dinode.h" |
48 | #include "xfs_filestream.h" | 48 | #include "xfs_filestream.h" |
49 | #include "xfs_quota.h" | 49 | #include "xfs_quota.h" |
50 | #include "xfs_sysfs.h" | ||
50 | 51 | ||
51 | #include <linux/namei.h> | 52 | #include <linux/namei.h> |
52 | #include <linux/init.h> | 53 | #include <linux/init.h> |
@@ -61,7 +62,11 @@ | |||
61 | static const struct super_operations xfs_super_operations; | 62 | static const struct super_operations xfs_super_operations; |
62 | static kmem_zone_t *xfs_ioend_zone; | 63 | static kmem_zone_t *xfs_ioend_zone; |
63 | mempool_t *xfs_ioend_pool; | 64 | mempool_t *xfs_ioend_pool; |
64 | struct kset *xfs_kset; | 65 | |
66 | static struct kset *xfs_kset; /* top-level xfs sysfs dir */ | ||
67 | #ifdef DEBUG | ||
68 | static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ | ||
69 | #endif | ||
65 | 70 | ||
66 | #define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ | 71 | #define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ |
67 | #define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ | 72 | #define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ |
@@ -838,32 +843,32 @@ xfs_init_mount_workqueues( | |||
838 | struct xfs_mount *mp) | 843 | struct xfs_mount *mp) |
839 | { | 844 | { |
840 | mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", | 845 | mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", |
841 | WQ_MEM_RECLAIM, 0, mp->m_fsname); | 846 | WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); |
842 | if (!mp->m_data_workqueue) | 847 | if (!mp->m_data_workqueue) |
843 | goto out; | 848 | goto out; |
844 | 849 | ||
845 | mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", | 850 | mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", |
846 | WQ_MEM_RECLAIM, 0, mp->m_fsname); | 851 | WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); |
847 | if (!mp->m_unwritten_workqueue) | 852 | if (!mp->m_unwritten_workqueue) |
848 | goto out_destroy_data_iodone_queue; | 853 | goto out_destroy_data_iodone_queue; |
849 | 854 | ||
850 | mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", | 855 | mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", |
851 | WQ_MEM_RECLAIM, 0, mp->m_fsname); | 856 | WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); |
852 | if (!mp->m_cil_workqueue) | 857 | if (!mp->m_cil_workqueue) |
853 | goto out_destroy_unwritten; | 858 | goto out_destroy_unwritten; |
854 | 859 | ||
855 | mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", | 860 | mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", |
856 | 0, 0, mp->m_fsname); | 861 | WQ_FREEZABLE, 0, mp->m_fsname); |
857 | if (!mp->m_reclaim_workqueue) | 862 | if (!mp->m_reclaim_workqueue) |
858 | goto out_destroy_cil; | 863 | goto out_destroy_cil; |
859 | 864 | ||
860 | mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", | 865 | mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", |
861 | 0, 0, mp->m_fsname); | 866 | WQ_FREEZABLE, 0, mp->m_fsname); |
862 | if (!mp->m_log_workqueue) | 867 | if (!mp->m_log_workqueue) |
863 | goto out_destroy_reclaim; | 868 | goto out_destroy_reclaim; |
864 | 869 | ||
865 | mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", | 870 | mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", |
866 | 0, 0, mp->m_fsname); | 871 | WQ_FREEZABLE, 0, mp->m_fsname); |
867 | if (!mp->m_eofblocks_workqueue) | 872 | if (!mp->m_eofblocks_workqueue) |
868 | goto out_destroy_log; | 873 | goto out_destroy_log; |
869 | 874 | ||
@@ -1406,6 +1411,7 @@ xfs_fs_fill_super( | |||
1406 | atomic_set(&mp->m_active_trans, 0); | 1411 | atomic_set(&mp->m_active_trans, 0); |
1407 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); | 1412 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); |
1408 | INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); | 1413 | INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); |
1414 | mp->m_kobj.kobject.kset = xfs_kset; | ||
1409 | 1415 | ||
1410 | mp->m_super = sb; | 1416 | mp->m_super = sb; |
1411 | sb->s_fs_info = mp; | 1417 | sb->s_fs_info = mp; |
@@ -1715,7 +1721,8 @@ xfs_init_workqueues(void) | |||
1715 | * AGs in all the filesystems mounted. Hence use the default large | 1721 | * AGs in all the filesystems mounted. Hence use the default large |
1716 | * max_active value for this workqueue. | 1722 | * max_active value for this workqueue. |
1717 | */ | 1723 | */ |
1718 | xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0); | 1724 | xfs_alloc_wq = alloc_workqueue("xfsalloc", |
1725 | WQ_MEM_RECLAIM|WQ_FREEZABLE, 0); | ||
1719 | if (!xfs_alloc_wq) | 1726 | if (!xfs_alloc_wq) |
1720 | return -ENOMEM; | 1727 | return -ENOMEM; |
1721 | 1728 | ||
@@ -1768,9 +1775,16 @@ init_xfs_fs(void) | |||
1768 | goto out_sysctl_unregister;; | 1775 | goto out_sysctl_unregister;; |
1769 | } | 1776 | } |
1770 | 1777 | ||
1771 | error = xfs_qm_init(); | 1778 | #ifdef DEBUG |
1779 | xfs_dbg_kobj.kobject.kset = xfs_kset; | ||
1780 | error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); | ||
1772 | if (error) | 1781 | if (error) |
1773 | goto out_kset_unregister; | 1782 | goto out_kset_unregister; |
1783 | #endif | ||
1784 | |||
1785 | error = xfs_qm_init(); | ||
1786 | if (error) | ||
1787 | goto out_remove_kobj; | ||
1774 | 1788 | ||
1775 | error = register_filesystem(&xfs_fs_type); | 1789 | error = register_filesystem(&xfs_fs_type); |
1776 | if (error) | 1790 | if (error) |
@@ -1779,7 +1793,11 @@ init_xfs_fs(void) | |||
1779 | 1793 | ||
1780 | out_qm_exit: | 1794 | out_qm_exit: |
1781 | xfs_qm_exit(); | 1795 | xfs_qm_exit(); |
1796 | out_remove_kobj: | ||
1797 | #ifdef DEBUG | ||
1798 | xfs_sysfs_del(&xfs_dbg_kobj); | ||
1782 | out_kset_unregister: | 1799 | out_kset_unregister: |
1800 | #endif | ||
1783 | kset_unregister(xfs_kset); | 1801 | kset_unregister(xfs_kset); |
1784 | out_sysctl_unregister: | 1802 | out_sysctl_unregister: |
1785 | xfs_sysctl_unregister(); | 1803 | xfs_sysctl_unregister(); |
@@ -1802,6 +1820,9 @@ exit_xfs_fs(void) | |||
1802 | { | 1820 | { |
1803 | xfs_qm_exit(); | 1821 | xfs_qm_exit(); |
1804 | unregister_filesystem(&xfs_fs_type); | 1822 | unregister_filesystem(&xfs_fs_type); |
1823 | #ifdef DEBUG | ||
1824 | xfs_sysfs_del(&xfs_dbg_kobj); | ||
1825 | #endif | ||
1805 | kset_unregister(xfs_kset); | 1826 | kset_unregister(xfs_kset); |
1806 | xfs_sysctl_unregister(); | 1827 | xfs_sysctl_unregister(); |
1807 | xfs_cleanup_procfs(); | 1828 | xfs_cleanup_procfs(); |
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 6a944a2cd36f..02ae62a998e0 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c | |||
@@ -269,9 +269,11 @@ xfs_symlink( | |||
269 | /* | 269 | /* |
270 | * Check for ability to enter directory entry, if no space reserved. | 270 | * Check for ability to enter directory entry, if no space reserved. |
271 | */ | 271 | */ |
272 | error = xfs_dir_canenter(tp, dp, link_name, resblks); | 272 | if (!resblks) { |
273 | if (error) | 273 | error = xfs_dir_canenter(tp, dp, link_name); |
274 | goto error_return; | 274 | if (error) |
275 | goto error_return; | ||
276 | } | ||
275 | /* | 277 | /* |
276 | * Initialize the bmap freelist prior to calling either | 278 | * Initialize the bmap freelist prior to calling either |
277 | * bmapi or the directory create code. | 279 | * bmapi or the directory create code. |
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index bd8e157c20ef..ffef45375754 100644 --- a/fs/xfs/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h | |||
@@ -92,6 +92,11 @@ enum { | |||
92 | 92 | ||
93 | extern xfs_param_t xfs_params; | 93 | extern xfs_param_t xfs_params; |
94 | 94 | ||
95 | struct xfs_globals { | ||
96 | int log_recovery_delay; /* log recovery delay (secs) */ | ||
97 | }; | ||
98 | extern struct xfs_globals xfs_globals; | ||
99 | |||
95 | #ifdef CONFIG_SYSCTL | 100 | #ifdef CONFIG_SYSCTL |
96 | extern int xfs_sysctl_register(void); | 101 | extern int xfs_sysctl_register(void); |
97 | extern void xfs_sysctl_unregister(void); | 102 | extern void xfs_sysctl_unregister(void); |
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 9835139ce1ec..aa03670851d8 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c | |||
@@ -51,6 +51,80 @@ struct kobj_type xfs_mp_ktype = { | |||
51 | .release = xfs_sysfs_release, | 51 | .release = xfs_sysfs_release, |
52 | }; | 52 | }; |
53 | 53 | ||
54 | #ifdef DEBUG | ||
55 | /* debug */ | ||
56 | |||
57 | STATIC ssize_t | ||
58 | log_recovery_delay_store( | ||
59 | const char *buf, | ||
60 | size_t count, | ||
61 | void *data) | ||
62 | { | ||
63 | int ret; | ||
64 | int val; | ||
65 | |||
66 | ret = kstrtoint(buf, 0, &val); | ||
67 | if (ret) | ||
68 | return ret; | ||
69 | |||
70 | if (val < 0 || val > 60) | ||
71 | return -EINVAL; | ||
72 | |||
73 | xfs_globals.log_recovery_delay = val; | ||
74 | |||
75 | return count; | ||
76 | } | ||
77 | |||
78 | STATIC ssize_t | ||
79 | log_recovery_delay_show( | ||
80 | char *buf, | ||
81 | void *data) | ||
82 | { | ||
83 | return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay); | ||
84 | } | ||
85 | XFS_SYSFS_ATTR_RW(log_recovery_delay); | ||
86 | |||
87 | static struct attribute *xfs_dbg_attrs[] = { | ||
88 | ATTR_LIST(log_recovery_delay), | ||
89 | NULL, | ||
90 | }; | ||
91 | |||
92 | STATIC ssize_t | ||
93 | xfs_dbg_show( | ||
94 | struct kobject *kobject, | ||
95 | struct attribute *attr, | ||
96 | char *buf) | ||
97 | { | ||
98 | struct xfs_sysfs_attr *xfs_attr = to_attr(attr); | ||
99 | |||
100 | return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0; | ||
101 | } | ||
102 | |||
103 | STATIC ssize_t | ||
104 | xfs_dbg_store( | ||
105 | struct kobject *kobject, | ||
106 | struct attribute *attr, | ||
107 | const char *buf, | ||
108 | size_t count) | ||
109 | { | ||
110 | struct xfs_sysfs_attr *xfs_attr = to_attr(attr); | ||
111 | |||
112 | return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0; | ||
113 | } | ||
114 | |||
115 | static struct sysfs_ops xfs_dbg_ops = { | ||
116 | .show = xfs_dbg_show, | ||
117 | .store = xfs_dbg_store, | ||
118 | }; | ||
119 | |||
120 | struct kobj_type xfs_dbg_ktype = { | ||
121 | .release = xfs_sysfs_release, | ||
122 | .sysfs_ops = &xfs_dbg_ops, | ||
123 | .default_attrs = xfs_dbg_attrs, | ||
124 | }; | ||
125 | |||
126 | #endif /* DEBUG */ | ||
127 | |||
54 | /* xlog */ | 128 | /* xlog */ |
55 | 129 | ||
56 | STATIC ssize_t | 130 | STATIC ssize_t |
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index 54a2091183c0..240eee35f342 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #define __XFS_SYSFS_H__ | 20 | #define __XFS_SYSFS_H__ |
21 | 21 | ||
22 | extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ | 22 | extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ |
23 | extern struct kobj_type xfs_dbg_ktype; /* debug */ | ||
23 | extern struct kobj_type xfs_log_ktype; /* xlog */ | 24 | extern struct kobj_type xfs_log_ktype; /* xlog */ |
24 | 25 | ||
25 | static inline struct xfs_kobj * | 26 | static inline struct xfs_kobj * |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 152f82782630..51372e34d988 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -349,7 +349,8 @@ DEFINE_BUF_EVENT(xfs_buf_free); | |||
349 | DEFINE_BUF_EVENT(xfs_buf_hold); | 349 | DEFINE_BUF_EVENT(xfs_buf_hold); |
350 | DEFINE_BUF_EVENT(xfs_buf_rele); | 350 | DEFINE_BUF_EVENT(xfs_buf_rele); |
351 | DEFINE_BUF_EVENT(xfs_buf_iodone); | 351 | DEFINE_BUF_EVENT(xfs_buf_iodone); |
352 | DEFINE_BUF_EVENT(xfs_buf_iorequest); | 352 | DEFINE_BUF_EVENT(xfs_buf_submit); |
353 | DEFINE_BUF_EVENT(xfs_buf_submit_wait); | ||
353 | DEFINE_BUF_EVENT(xfs_buf_bawrite); | 354 | DEFINE_BUF_EVENT(xfs_buf_bawrite); |
354 | DEFINE_BUF_EVENT(xfs_buf_lock); | 355 | DEFINE_BUF_EVENT(xfs_buf_lock); |
355 | DEFINE_BUF_EVENT(xfs_buf_lock_done); | 356 | DEFINE_BUF_EVENT(xfs_buf_lock_done); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 96c898e7ac9a..e2b2216b1635 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -318,20 +318,10 @@ xfs_trans_read_buf_map( | |||
318 | XFS_BUF_READ(bp); | 318 | XFS_BUF_READ(bp); |
319 | bp->b_ops = ops; | 319 | bp->b_ops = ops; |
320 | 320 | ||
321 | /* | 321 | error = xfs_buf_submit_wait(bp); |
322 | * XXX(hch): clean up the error handling here to be less | ||
323 | * of a mess.. | ||
324 | */ | ||
325 | if (XFS_FORCED_SHUTDOWN(mp)) { | ||
326 | trace_xfs_bdstrat_shut(bp, _RET_IP_); | ||
327 | xfs_bioerror_relse(bp); | ||
328 | } else { | ||
329 | xfs_buf_iorequest(bp); | ||
330 | } | ||
331 | |||
332 | error = xfs_buf_iowait(bp); | ||
333 | if (error) { | 322 | if (error) { |
334 | xfs_buf_ioerror_alert(bp, __func__); | 323 | if (!XFS_FORCED_SHUTDOWN(mp)) |
324 | xfs_buf_ioerror_alert(bp, __func__); | ||
335 | xfs_buf_relse(bp); | 325 | xfs_buf_relse(bp); |
336 | /* | 326 | /* |
337 | * We can gracefully recover from most read | 327 | * We can gracefully recover from most read |
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 50c3f5614288..cdb4d86520e1 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -70,7 +70,7 @@ xfs_trans_ichgtime( | |||
70 | int flags) | 70 | int flags) |
71 | { | 71 | { |
72 | struct inode *inode = VFS_I(ip); | 72 | struct inode *inode = VFS_I(ip); |
73 | timespec_t tv; | 73 | struct timespec tv; |
74 | 74 | ||
75 | ASSERT(tp); | 75 | ASSERT(tp); |
76 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 76 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |