aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-13 06:06:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-13 06:06:54 -0400
commit5ff0b9e1a1da58b584aa4b8ea234be20b5a1164b (patch)
tree4849a305c073d4add184c1474a6c000a847285e7
parent77c688ac87183537ed0fb84ec2cb8fa8ec97c458 (diff)
parent6889e783cd68b79f8330ad4d10a2571c67c3f7df (diff)
Merge tag 'xfs-for-linus-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs update from Dave Chinner: "This update contains: - various cleanups - log recovery debug hooks - seek hole/data implementation merge - extent shift rework to fix collapse range bugs - various sparse warning fixes - log recovery transaction processing rework to fix use after free bugs - metadata buffer IO infrastructuer rework to ensure all buffers under IO have valid reference counts - various fixes for ondisk flags, writeback and zero range corner cases" * tag 'xfs-for-linus-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (56 commits) xfs: fix agno increment in xfs_inumbers() loop xfs: xfs_iflush_done checks the wrong log item callback xfs: flush the range before zero range conversion xfs: restore buffer_head unwritten bit on ioend cancel xfs: check for null dquot in xfs_quota_calc_throttle() xfs: fix crc field handling in xfs_sb_to/from_disk xfs: don't send null bp to xfs_trans_brelse() xfs: check for inode size overflow in xfs_new_eof() xfs: only set extent size hint when asked xfs: project id inheritance is a directory only flag xfs: kill time.h xfs: compat_xfs_bstat does not have forkoff xfs: simplify xfs_zero_remaining_bytes xfs: check xfs_buf_read_uncached returns correctly xfs: introduce xfs_buf_submit[_wait] xfs: kill xfs_bioerror_relse xfs: xfs_bioerror can die. xfs: kill xfs_bdstrat_cb xfs: rework xfs_buf_bio_endio error handling xfs: xfs_buf_ioend and xfs_buf_iodone_work duplicate functionality ...
-rw-r--r--fs/xfs/kmem.c1
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c365
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h7
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c3
-rw-r--r--fs/xfs/libxfs/xfs_da_format.c1
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c67
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c7
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c49
-rw-r--r--fs/xfs/libxfs/xfs_sb.c7
-rw-r--r--fs/xfs/time.h36
-rw-r--r--fs/xfs/xfs_aops.c23
-rw-r--r--fs/xfs/xfs_bmap_util.c126
-rw-r--r--fs/xfs/xfs_buf.c355
-rw-r--r--fs/xfs/xfs_buf.h15
-rw-r--r--fs/xfs/xfs_buf_item.c10
-rw-r--r--fs/xfs/xfs_file.c178
-rw-r--r--fs/xfs/xfs_fsops.c11
-rw-r--r--fs/xfs/xfs_globals.c4
-rw-r--r--fs/xfs/xfs_icache.c1
-rw-r--r--fs/xfs/xfs_inode.c34
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_ioctl.c28
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_ioctl32.h3
-rw-r--r--fs/xfs/xfs_iomap.c4
-rw-r--r--fs/xfs/xfs_iops.c30
-rw-r--r--fs/xfs/xfs_itable.c3
-rw-r--r--fs/xfs/xfs_linux.h6
-rw-r--r--fs/xfs/xfs_log.c59
-rw-r--r--fs/xfs/xfs_log_cil.c47
-rw-r--r--fs/xfs/xfs_log_recover.c689
-rw-r--r--fs/xfs/xfs_mount.c58
-rw-r--r--fs/xfs/xfs_mru_cache.c3
-rw-r--r--fs/xfs/xfs_qm.c1
-rw-r--r--fs/xfs/xfs_rtalloc.c85
-rw-r--r--fs/xfs/xfs_rtalloc.h4
-rw-r--r--fs/xfs/xfs_super.c39
-rw-r--r--fs/xfs/xfs_symlink.c8
-rw-r--r--fs/xfs/xfs_sysctl.h5
-rw-r--r--fs/xfs/xfs_sysfs.c74
-rw-r--r--fs/xfs/xfs_sysfs.h1
-rw-r--r--fs/xfs/xfs_trace.h3
-rw-r--r--fs/xfs/xfs_trans_buf.c16
-rw-r--r--fs/xfs/xfs_trans_inode.c2
47 files changed, 1304 insertions, 1176 deletions
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 844e288b9576..53e95b2a1369 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -21,7 +21,6 @@
21#include <linux/swap.h> 21#include <linux/swap.h>
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/backing-dev.h> 23#include <linux/backing-dev.h>
24#include "time.h"
25#include "kmem.h" 24#include "kmem.h"
26#include "xfs_message.h" 25#include "xfs_message.h"
27 26
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 4bffffe038a1..eff34218f405 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2209,6 +2209,10 @@ xfs_agf_verify(
2209 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) 2209 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2210 return false; 2210 return false;
2211 2211
2212 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2213 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2214 return false;
2215
2212 /* 2216 /*
2213 * during growfs operations, the perag is not fully initialised, 2217 * during growfs operations, the perag is not fully initialised,
2214 * so we can't use it for any useful checking. growfs ensures we can't 2218 * so we can't use it for any useful checking. growfs ensures we can't
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 86df952d3e24..79c981984dca 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5404,22 +5404,223 @@ error0:
5404} 5404}
5405 5405
5406/* 5406/*
5407 * Determine whether an extent shift can be accomplished by a merge with the
5408 * extent that precedes the target hole of the shift.
5409 */
5410STATIC bool
5411xfs_bmse_can_merge(
5412 struct xfs_bmbt_irec *left, /* preceding extent */
5413 struct xfs_bmbt_irec *got, /* current extent to shift */
5414 xfs_fileoff_t shift) /* shift fsb */
5415{
5416 xfs_fileoff_t startoff;
5417
5418 startoff = got->br_startoff - shift;
5419
5420 /*
5421 * The extent, once shifted, must be adjacent in-file and on-disk with
5422 * the preceding extent.
5423 */
5424 if ((left->br_startoff + left->br_blockcount != startoff) ||
5425 (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5426 (left->br_state != got->br_state) ||
5427 (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5428 return false;
5429
5430 return true;
5431}
5432
5433/*
5434 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5435 * hole in the file. If an extent shift would result in the extent being fully
5436 * adjacent to the extent that currently precedes the hole, we can merge with
5437 * the preceding extent rather than do the shift.
5438 *
5439 * This function assumes the caller has verified a shift-by-merge is possible
5440 * with the provided extents via xfs_bmse_can_merge().
5441 */
5442STATIC int
5443xfs_bmse_merge(
5444 struct xfs_inode *ip,
5445 int whichfork,
5446 xfs_fileoff_t shift, /* shift fsb */
5447 int current_ext, /* idx of gotp */
5448 struct xfs_bmbt_rec_host *gotp, /* extent to shift */
5449 struct xfs_bmbt_rec_host *leftp, /* preceding extent */
5450 struct xfs_btree_cur *cur,
5451 int *logflags) /* output */
5452{
5453 struct xfs_ifork *ifp;
5454 struct xfs_bmbt_irec got;
5455 struct xfs_bmbt_irec left;
5456 xfs_filblks_t blockcount;
5457 int error, i;
5458
5459 ifp = XFS_IFORK_PTR(ip, whichfork);
5460 xfs_bmbt_get_all(gotp, &got);
5461 xfs_bmbt_get_all(leftp, &left);
5462 blockcount = left.br_blockcount + got.br_blockcount;
5463
5464 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5465 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5466 ASSERT(xfs_bmse_can_merge(&left, &got, shift));
5467
5468 /*
5469 * Merge the in-core extents. Note that the host record pointers and
5470 * current_ext index are invalid once the extent has been removed via
5471 * xfs_iext_remove().
5472 */
5473 xfs_bmbt_set_blockcount(leftp, blockcount);
5474 xfs_iext_remove(ip, current_ext, 1, 0);
5475
5476 /*
5477 * Update the on-disk extent count, the btree if necessary and log the
5478 * inode.
5479 */
5480 XFS_IFORK_NEXT_SET(ip, whichfork,
5481 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5482 *logflags |= XFS_ILOG_CORE;
5483 if (!cur) {
5484 *logflags |= XFS_ILOG_DEXT;
5485 return 0;
5486 }
5487
5488 /* lookup and remove the extent to merge */
5489 error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5490 got.br_blockcount, &i);
5491 if (error)
5492 goto out_error;
5493 XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
5494
5495 error = xfs_btree_delete(cur, &i);
5496 if (error)
5497 goto out_error;
5498 XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
5499
5500 /* lookup and update size of the previous extent */
5501 error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
5502 left.br_blockcount, &i);
5503 if (error)
5504 goto out_error;
5505 XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
5506
5507 left.br_blockcount = blockcount;
5508
5509 error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock,
5510 left.br_blockcount, left.br_state);
5511 if (error)
5512 goto out_error;
5513
5514 return 0;
5515
5516out_error:
5517 return error;
5518}
5519
5520/*
5521 * Shift a single extent.
5522 */
5523STATIC int
5524xfs_bmse_shift_one(
5525 struct xfs_inode *ip,
5526 int whichfork,
5527 xfs_fileoff_t offset_shift_fsb,
5528 int *current_ext,
5529 struct xfs_bmbt_rec_host *gotp,
5530 struct xfs_btree_cur *cur,
5531 int *logflags)
5532{
5533 struct xfs_ifork *ifp;
5534 xfs_fileoff_t startoff;
5535 struct xfs_bmbt_rec_host *leftp;
5536 struct xfs_bmbt_irec got;
5537 struct xfs_bmbt_irec left;
5538 int error;
5539 int i;
5540
5541 ifp = XFS_IFORK_PTR(ip, whichfork);
5542
5543 xfs_bmbt_get_all(gotp, &got);
5544 startoff = got.br_startoff - offset_shift_fsb;
5545
5546 /* delalloc extents should be prevented by caller */
5547 XFS_WANT_CORRUPTED_GOTO(!isnullstartblock(got.br_startblock),
5548 out_error);
5549
5550 /*
5551 * If this is the first extent in the file, make sure there's enough
5552 * room at the start of the file and jump right to the shift as there's
5553 * no left extent to merge.
5554 */
5555 if (*current_ext == 0) {
5556 if (got.br_startoff < offset_shift_fsb)
5557 return -EINVAL;
5558 goto shift_extent;
5559 }
5560
5561 /* grab the left extent and check for a large enough hole */
5562 leftp = xfs_iext_get_ext(ifp, *current_ext - 1);
5563 xfs_bmbt_get_all(leftp, &left);
5564
5565 if (startoff < left.br_startoff + left.br_blockcount)
5566 return -EINVAL;
5567
5568 /* check whether to merge the extent or shift it down */
5569 if (!xfs_bmse_can_merge(&left, &got, offset_shift_fsb))
5570 goto shift_extent;
5571
5572 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, *current_ext,
5573 gotp, leftp, cur, logflags);
5574
5575shift_extent:
5576 /*
5577 * Increment the extent index for the next iteration, update the start
5578 * offset of the in-core extent and update the btree if applicable.
5579 */
5580 (*current_ext)++;
5581 xfs_bmbt_set_startoff(gotp, startoff);
5582 *logflags |= XFS_ILOG_CORE;
5583 if (!cur) {
5584 *logflags |= XFS_ILOG_DEXT;
5585 return 0;
5586 }
5587
5588 error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5589 got.br_blockcount, &i);
5590 if (error)
5591 return error;
5592 XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
5593
5594 got.br_startoff = startoff;
5595 error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5596 got.br_blockcount, got.br_state);
5597 if (error)
5598 return error;
5599
5600 return 0;
5601
5602out_error:
5603 return error;
5604}
5605
5606/*
5407 * Shift extent records to the left to cover a hole. 5607 * Shift extent records to the left to cover a hole.
5408 * 5608 *
5409 * The maximum number of extents to be shifted in a single operation 5609 * The maximum number of extents to be shifted in a single operation is
5410 * is @num_exts, and @current_ext keeps track of the current extent 5610 * @num_exts. @start_fsb specifies the file offset to start the shift and the
5411 * index we have shifted. @offset_shift_fsb is the length by which each 5611 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
5412 * extent is shifted. If there is no hole to shift the extents 5612 * is the length by which each extent is shifted. If there is no hole to shift
5413 * into, this will be considered invalid operation and we abort immediately. 5613 * the extents into, this will be considered invalid operation and we abort
5614 * immediately.
5414 */ 5615 */
5415int 5616int
5416xfs_bmap_shift_extents( 5617xfs_bmap_shift_extents(
5417 struct xfs_trans *tp, 5618 struct xfs_trans *tp,
5418 struct xfs_inode *ip, 5619 struct xfs_inode *ip,
5419 int *done,
5420 xfs_fileoff_t start_fsb, 5620 xfs_fileoff_t start_fsb,
5421 xfs_fileoff_t offset_shift_fsb, 5621 xfs_fileoff_t offset_shift_fsb,
5422 xfs_extnum_t *current_ext, 5622 int *done,
5623 xfs_fileoff_t *next_fsb,
5423 xfs_fsblock_t *firstblock, 5624 xfs_fsblock_t *firstblock,
5424 struct xfs_bmap_free *flist, 5625 struct xfs_bmap_free *flist,
5425 int num_exts) 5626 int num_exts)
@@ -5427,16 +5628,13 @@ xfs_bmap_shift_extents(
5427 struct xfs_btree_cur *cur = NULL; 5628 struct xfs_btree_cur *cur = NULL;
5428 struct xfs_bmbt_rec_host *gotp; 5629 struct xfs_bmbt_rec_host *gotp;
5429 struct xfs_bmbt_irec got; 5630 struct xfs_bmbt_irec got;
5430 struct xfs_bmbt_irec left;
5431 struct xfs_mount *mp = ip->i_mount; 5631 struct xfs_mount *mp = ip->i_mount;
5432 struct xfs_ifork *ifp; 5632 struct xfs_ifork *ifp;
5433 xfs_extnum_t nexts = 0; 5633 xfs_extnum_t nexts = 0;
5434 xfs_fileoff_t startoff; 5634 xfs_extnum_t current_ext;
5435 int error = 0; 5635 int error = 0;
5436 int i;
5437 int whichfork = XFS_DATA_FORK; 5636 int whichfork = XFS_DATA_FORK;
5438 int logflags = 0; 5637 int logflags = 0;
5439 xfs_filblks_t blockcount = 0;
5440 int total_extents; 5638 int total_extents;
5441 5639
5442 if (unlikely(XFS_TEST_ERROR( 5640 if (unlikely(XFS_TEST_ERROR(
@@ -5451,7 +5649,8 @@ xfs_bmap_shift_extents(
5451 if (XFS_FORCED_SHUTDOWN(mp)) 5649 if (XFS_FORCED_SHUTDOWN(mp))
5452 return -EIO; 5650 return -EIO;
5453 5651
5454 ASSERT(current_ext != NULL); 5652 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5653 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5455 5654
5456 ifp = XFS_IFORK_PTR(ip, whichfork); 5655 ifp = XFS_IFORK_PTR(ip, whichfork);
5457 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5656 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
@@ -5461,23 +5660,6 @@ xfs_bmap_shift_extents(
5461 return error; 5660 return error;
5462 } 5661 }
5463 5662
5464 /*
5465 * If *current_ext is 0, we would need to lookup the extent
5466 * from where we would start shifting and store it in gotp.
5467 */
5468 if (!*current_ext) {
5469 gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
5470 /*
5471 * gotp can be null in 2 cases: 1) if there are no extents
5472 * or 2) start_fsb lies in a hole beyond which there are
5473 * no extents. Either way, we are done.
5474 */
5475 if (!gotp) {
5476 *done = 1;
5477 return 0;
5478 }
5479 }
5480
5481 if (ifp->if_flags & XFS_IFBROOT) { 5663 if (ifp->if_flags & XFS_IFBROOT) {
5482 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5664 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5483 cur->bc_private.b.firstblock = *firstblock; 5665 cur->bc_private.b.firstblock = *firstblock;
@@ -5486,112 +5668,46 @@ xfs_bmap_shift_extents(
5486 } 5668 }
5487 5669
5488 /* 5670 /*
5671 * Look up the extent index for the fsb where we start shifting. We can
5672 * henceforth iterate with current_ext as extent list changes are locked
5673 * out via ilock.
5674 *
5675 * gotp can be null in 2 cases: 1) if there are no extents or 2)
5676 * start_fsb lies in a hole beyond which there are no extents. Either
5677 * way, we are done.
5678 */
5679 gotp = xfs_iext_bno_to_ext(ifp, start_fsb, &current_ext);
5680 if (!gotp) {
5681 *done = 1;
5682 goto del_cursor;
5683 }
5684
5685 /*
5489 * There may be delalloc extents in the data fork before the range we 5686 * There may be delalloc extents in the data fork before the range we
5490 * are collapsing out, so we cannot 5687 * are collapsing out, so we cannot use the count of real extents here.
5491 * use the count of real extents here. Instead we have to calculate it 5688 * Instead we have to calculate it from the incore fork.
5492 * from the incore fork.
5493 */ 5689 */
5494 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5690 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5495 while (nexts++ < num_exts && *current_ext < total_extents) { 5691 while (nexts++ < num_exts && current_ext < total_extents) {
5496 5692 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5497 gotp = xfs_iext_get_ext(ifp, *current_ext); 5693 &current_ext, gotp, cur, &logflags);
5498 xfs_bmbt_get_all(gotp, &got);
5499 startoff = got.br_startoff - offset_shift_fsb;
5500
5501 /*
5502 * Before shifting extent into hole, make sure that the hole
5503 * is large enough to accomodate the shift.
5504 */
5505 if (*current_ext) {
5506 xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
5507 *current_ext - 1), &left);
5508
5509 if (startoff < left.br_startoff + left.br_blockcount)
5510 error = -EINVAL;
5511 } else if (offset_shift_fsb > got.br_startoff) {
5512 /*
5513 * When first extent is shifted, offset_shift_fsb
5514 * should be less than the stating offset of
5515 * the first extent.
5516 */
5517 error = -EINVAL;
5518 }
5519
5520 if (error) 5694 if (error)
5521 goto del_cursor; 5695 goto del_cursor;
5522 5696
5523 if (cur) { 5697 /* update total extent count and grab the next record */
5524 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5525 got.br_startblock,
5526 got.br_blockcount,
5527 &i);
5528 if (error)
5529 goto del_cursor;
5530 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5531 }
5532
5533 /* Check if we can merge 2 adjacent extents */
5534 if (*current_ext &&
5535 left.br_startoff + left.br_blockcount == startoff &&
5536 left.br_startblock + left.br_blockcount ==
5537 got.br_startblock &&
5538 left.br_state == got.br_state &&
5539 left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
5540 blockcount = left.br_blockcount +
5541 got.br_blockcount;
5542 xfs_iext_remove(ip, *current_ext, 1, 0);
5543 logflags |= XFS_ILOG_CORE;
5544 if (cur) {
5545 error = xfs_btree_delete(cur, &i);
5546 if (error)
5547 goto del_cursor;
5548 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5549 } else {
5550 logflags |= XFS_ILOG_DEXT;
5551 }
5552 XFS_IFORK_NEXT_SET(ip, whichfork,
5553 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5554 gotp = xfs_iext_get_ext(ifp, --*current_ext);
5555 xfs_bmbt_get_all(gotp, &got);
5556
5557 /* Make cursor point to the extent we will update */
5558 if (cur) {
5559 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5560 got.br_startblock,
5561 got.br_blockcount,
5562 &i);
5563 if (error)
5564 goto del_cursor;
5565 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5566 }
5567
5568 xfs_bmbt_set_blockcount(gotp, blockcount);
5569 got.br_blockcount = blockcount;
5570 } else {
5571 /* We have to update the startoff */
5572 xfs_bmbt_set_startoff(gotp, startoff);
5573 got.br_startoff = startoff;
5574 }
5575
5576 logflags |= XFS_ILOG_CORE;
5577 if (cur) {
5578 error = xfs_bmbt_update(cur, got.br_startoff,
5579 got.br_startblock,
5580 got.br_blockcount,
5581 got.br_state);
5582 if (error)
5583 goto del_cursor;
5584 } else {
5585 logflags |= XFS_ILOG_DEXT;
5586 }
5587
5588 (*current_ext)++;
5589 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5698 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5699 if (current_ext >= total_extents)
5700 break;
5701 gotp = xfs_iext_get_ext(ifp, current_ext);
5590 } 5702 }
5591 5703
5592 /* Check if we are done */ 5704 /* Check if we are done */
5593 if (*current_ext == total_extents) 5705 if (current_ext == total_extents) {
5594 *done = 1; 5706 *done = 1;
5707 } else if (next_fsb) {
5708 xfs_bmbt_get_all(gotp, &got);
5709 *next_fsb = got.br_startoff;
5710 }
5595 5711
5596del_cursor: 5712del_cursor:
5597 if (cur) 5713 if (cur)
@@ -5600,5 +5716,6 @@ del_cursor:
5600 5716
5601 if (logflags) 5717 if (logflags)
5602 xfs_trans_log_inode(tp, ip, logflags); 5718 xfs_trans_log_inode(tp, ip, logflags);
5719
5603 return error; 5720 return error;
5604} 5721}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index b879ca56a64c..44db6db86402 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -178,9 +178,8 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
178 xfs_extnum_t num); 178 xfs_extnum_t num);
179uint xfs_default_attroffset(struct xfs_inode *ip); 179uint xfs_default_attroffset(struct xfs_inode *ip);
180int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 180int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
181 int *done, xfs_fileoff_t start_fsb, 181 xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb,
182 xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext, 182 int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock,
183 xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, 183 struct xfs_bmap_free *flist, int num_exts);
184 int num_exts);
185 184
186#endif /* __XFS_BMAP_H__ */ 185#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 2c42ae28d027..fd827530afec 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2563,7 +2563,8 @@ xfs_da_get_buf(
2563 mapp, nmap, 0); 2563 mapp, nmap, 0);
2564 error = bp ? bp->b_error : -EIO; 2564 error = bp ? bp->b_error : -EIO;
2565 if (error) { 2565 if (error) {
2566 xfs_trans_brelse(trans, bp); 2566 if (bp)
2567 xfs_trans_brelse(trans, bp);
2567 goto out_free; 2568 goto out_free;
2568 } 2569 }
2569 2570
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index c9aee52a37e2..7e42fdfd2f1d 100644
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -270,7 +270,6 @@ xfs_dir3_data_get_ftype(
270{ 270{
271 __uint8_t ftype = dep->name[dep->namelen]; 271 __uint8_t ftype = dep->name[dep->namelen];
272 272
273 ASSERT(ftype < XFS_DIR3_FT_MAX);
274 if (ftype >= XFS_DIR3_FT_MAX) 273 if (ftype >= XFS_DIR3_FT_MAX)
275 return XFS_DIR3_FT_UNKNOWN; 274 return XFS_DIR3_FT_UNKNOWN;
276 return ftype; 275 return ftype;
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 6cef22152fd6..7075aaf131f4 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -237,7 +237,8 @@ xfs_dir_init(
237} 237}
238 238
239/* 239/*
240 Enter a name in a directory. 240 * Enter a name in a directory, or check for available space.
241 * If inum is 0, only the available space test is performed.
241 */ 242 */
242int 243int
243xfs_dir_createname( 244xfs_dir_createname(
@@ -254,10 +255,12 @@ xfs_dir_createname(
254 int v; /* type-checking value */ 255 int v; /* type-checking value */
255 256
256 ASSERT(S_ISDIR(dp->i_d.di_mode)); 257 ASSERT(S_ISDIR(dp->i_d.di_mode));
257 rval = xfs_dir_ino_validate(tp->t_mountp, inum); 258 if (inum) {
258 if (rval) 259 rval = xfs_dir_ino_validate(tp->t_mountp, inum);
259 return rval; 260 if (rval)
260 XFS_STATS_INC(xs_dir_create); 261 return rval;
262 XFS_STATS_INC(xs_dir_create);
263 }
261 264
262 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); 265 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
263 if (!args) 266 if (!args)
@@ -276,6 +279,8 @@ xfs_dir_createname(
276 args->whichfork = XFS_DATA_FORK; 279 args->whichfork = XFS_DATA_FORK;
277 args->trans = tp; 280 args->trans = tp;
278 args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; 281 args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
282 if (!inum)
283 args->op_flags |= XFS_DA_OP_JUSTCHECK;
279 284
280 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { 285 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
281 rval = xfs_dir2_sf_addname(args); 286 rval = xfs_dir2_sf_addname(args);
@@ -535,62 +540,14 @@ out_free:
535 540
536/* 541/*
537 * See if this entry can be added to the directory without allocating space. 542 * See if this entry can be added to the directory without allocating space.
538 * First checks that the caller couldn't reserve enough space (resblks = 0).
539 */ 543 */
540int 544int
541xfs_dir_canenter( 545xfs_dir_canenter(
542 xfs_trans_t *tp, 546 xfs_trans_t *tp,
543 xfs_inode_t *dp, 547 xfs_inode_t *dp,
544 struct xfs_name *name, /* name of entry to add */ 548 struct xfs_name *name) /* name of entry to add */
545 uint resblks)
546{ 549{
547 struct xfs_da_args *args; 550 return xfs_dir_createname(tp, dp, name, 0, NULL, NULL, 0);
548 int rval;
549 int v; /* type-checking value */
550
551 if (resblks)
552 return 0;
553
554 ASSERT(S_ISDIR(dp->i_d.di_mode));
555
556 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
557 if (!args)
558 return -ENOMEM;
559
560 args->geo = dp->i_mount->m_dir_geo;
561 args->name = name->name;
562 args->namelen = name->len;
563 args->filetype = name->type;
564 args->hashval = dp->i_mount->m_dirnameops->hashname(name);
565 args->dp = dp;
566 args->whichfork = XFS_DATA_FORK;
567 args->trans = tp;
568 args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
569 XFS_DA_OP_OKNOENT;
570
571 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
572 rval = xfs_dir2_sf_addname(args);
573 goto out_free;
574 }
575
576 rval = xfs_dir2_isblock(args, &v);
577 if (rval)
578 goto out_free;
579 if (v) {
580 rval = xfs_dir2_block_addname(args);
581 goto out_free;
582 }
583
584 rval = xfs_dir2_isleaf(args, &v);
585 if (rval)
586 goto out_free;
587 if (v)
588 rval = xfs_dir2_leaf_addname(args);
589 else
590 rval = xfs_dir2_node_addname(args);
591out_free:
592 kmem_free(args);
593 return rval;
594} 551}
595 552
596/* 553/*
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index c8e86b0b5e99..4dff261e6ed5 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -136,7 +136,7 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
136 xfs_fsblock_t *first, 136 xfs_fsblock_t *first,
137 struct xfs_bmap_free *flist, xfs_extlen_t tot); 137 struct xfs_bmap_free *flist, xfs_extlen_t tot);
138extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, 138extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
139 struct xfs_name *name, uint resblks); 139 struct xfs_name *name);
140 140
141/* 141/*
142 * Direct call from the bmap code, bypassing the generic directory layer. 142 * Direct call from the bmap code, bypassing the generic directory layer.
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index b62771f1f4b5..23dcb72fc5e6 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1076,8 +1076,8 @@ xfs_dialloc_ag_finobt_newino(
1076 int i; 1076 int i;
1077 1077
1078 if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { 1078 if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
1079 error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, 1079 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
1080 &i); 1080 XFS_LOOKUP_EQ, &i);
1081 if (error) 1081 if (error)
1082 return error; 1082 return error;
1083 if (i == 1) { 1083 if (i == 1) {
@@ -1085,7 +1085,6 @@ xfs_dialloc_ag_finobt_newino(
1085 if (error) 1085 if (error)
1086 return error; 1086 return error;
1087 XFS_WANT_CORRUPTED_RETURN(i == 1); 1087 XFS_WANT_CORRUPTED_RETURN(i == 1);
1088
1089 return 0; 1088 return 0;
1090 } 1089 }
1091 } 1090 }
@@ -2051,6 +2050,8 @@ xfs_agi_verify(
2051 if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) 2050 if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
2052 return false; 2051 return false;
2053 2052
2053 if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
2054 return false;
2054 /* 2055 /*
2055 * during growfs operations, the perag is not fully initialised, 2056 * during growfs operations, the perag is not fully initialised,
2056 * so we can't use it for any useful checking. growfs ensures we can't 2057 * so we can't use it for any useful checking. growfs ensures we can't
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index f4dd697cac08..7c818f1e4484 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -424,20 +424,24 @@ xfs_rtfind_forw(
424} 424}
425 425
426/* 426/*
427 * Read and modify the summary information for a given extent size, 427 * Read and/or modify the summary information for a given extent size,
428 * bitmap block combination. 428 * bitmap block combination.
429 * Keeps track of a current summary block, so we don't keep reading 429 * Keeps track of a current summary block, so we don't keep reading
430 * it from the buffer cache. 430 * it from the buffer cache.
431 *
432 * Summary information is returned in *sum if specified.
433 * If no delta is specified, returns summary only.
431 */ 434 */
432int 435int
433xfs_rtmodify_summary( 436xfs_rtmodify_summary_int(
434 xfs_mount_t *mp, /* file system mount point */ 437 xfs_mount_t *mp, /* file system mount structure */
435 xfs_trans_t *tp, /* transaction pointer */ 438 xfs_trans_t *tp, /* transaction pointer */
436 int log, /* log2 of extent size */ 439 int log, /* log2 of extent size */
437 xfs_rtblock_t bbno, /* bitmap block number */ 440 xfs_rtblock_t bbno, /* bitmap block number */
438 int delta, /* change to make to summary info */ 441 int delta, /* change to make to summary info */
439 xfs_buf_t **rbpp, /* in/out: summary block buffer */ 442 xfs_buf_t **rbpp, /* in/out: summary block buffer */
440 xfs_fsblock_t *rsb) /* in/out: summary block number */ 443 xfs_fsblock_t *rsb, /* in/out: summary block number */
444 xfs_suminfo_t *sum) /* out: summary info for this block */
441{ 445{
442 xfs_buf_t *bp; /* buffer for the summary block */ 446 xfs_buf_t *bp; /* buffer for the summary block */
443 int error; /* error value */ 447 int error; /* error value */
@@ -456,7 +460,7 @@ xfs_rtmodify_summary(
456 /* 460 /*
457 * If we have an old buffer, and the block number matches, use that. 461 * If we have an old buffer, and the block number matches, use that.
458 */ 462 */
459 if (rbpp && *rbpp && *rsb == sb) 463 if (*rbpp && *rsb == sb)
460 bp = *rbpp; 464 bp = *rbpp;
461 /* 465 /*
462 * Otherwise we have to get the buffer. 466 * Otherwise we have to get the buffer.
@@ -465,7 +469,7 @@ xfs_rtmodify_summary(
465 /* 469 /*
466 * If there was an old one, get rid of it first. 470 * If there was an old one, get rid of it first.
467 */ 471 */
468 if (rbpp && *rbpp) 472 if (*rbpp)
469 xfs_trans_brelse(tp, *rbpp); 473 xfs_trans_brelse(tp, *rbpp);
470 error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); 474 error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
471 if (error) { 475 if (error) {
@@ -474,21 +478,38 @@ xfs_rtmodify_summary(
474 /* 478 /*
475 * Remember this buffer and block for the next call. 479 * Remember this buffer and block for the next call.
476 */ 480 */
477 if (rbpp) { 481 *rbpp = bp;
478 *rbpp = bp; 482 *rsb = sb;
479 *rsb = sb;
480 }
481 } 483 }
482 /* 484 /*
483 * Point to the summary information, modify and log it. 485 * Point to the summary information, modify/log it, and/or copy it out.
484 */ 486 */
485 sp = XFS_SUMPTR(mp, bp, so); 487 sp = XFS_SUMPTR(mp, bp, so);
486 *sp += delta; 488 if (delta) {
487 xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), 489 uint first = (uint)((char *)sp - (char *)bp->b_addr);
488 (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); 490
491 *sp += delta;
492 xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1);
493 }
494 if (sum)
495 *sum = *sp;
489 return 0; 496 return 0;
490} 497}
491 498
499int
500xfs_rtmodify_summary(
501 xfs_mount_t *mp, /* file system mount structure */
502 xfs_trans_t *tp, /* transaction pointer */
503 int log, /* log2 of extent size */
504 xfs_rtblock_t bbno, /* bitmap block number */
505 int delta, /* change to make to summary info */
506 xfs_buf_t **rbpp, /* in/out: summary block buffer */
507 xfs_fsblock_t *rsb) /* in/out: summary block number */
508{
509 return xfs_rtmodify_summary_int(mp, tp, log, bbno,
510 delta, rbpp, rsb, NULL);
511}
512
492/* 513/*
493 * Set the given range of bitmap bits to the given value. 514 * Set the given range of bitmap bits to the given value.
494 * Do whatever I/O and logging is required. 515 * Do whatever I/O and logging is required.
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index ad525a5623a4..5f902fa7913f 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -279,11 +279,13 @@ xfs_mount_validate_sb(
279 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || 279 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG ||
280 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 280 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG ||
281 sbp->sb_blocksize != (1 << sbp->sb_blocklog) || 281 sbp->sb_blocksize != (1 << sbp->sb_blocklog) ||
282 sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG ||
282 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || 283 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE ||
283 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || 284 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE ||
284 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 285 sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
285 sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 286 sbp->sb_inodelog > XFS_DINODE_MAX_LOG ||
286 sbp->sb_inodesize != (1 << sbp->sb_inodelog) || 287 sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
288 sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE ||
287 sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || 289 sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
288 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 290 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
289 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 291 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
@@ -443,6 +445,8 @@ __xfs_sb_from_disk(
443 to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat); 445 to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat);
444 to->sb_features_log_incompat = 446 to->sb_features_log_incompat =
445 be32_to_cpu(from->sb_features_log_incompat); 447 be32_to_cpu(from->sb_features_log_incompat);
448 /* crc is only used on disk, not in memory; just init to 0 here. */
449 to->sb_crc = 0;
446 to->sb_pad = 0; 450 to->sb_pad = 0;
447 to->sb_pquotino = be64_to_cpu(from->sb_pquotino); 451 to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
448 to->sb_lsn = be64_to_cpu(from->sb_lsn); 452 to->sb_lsn = be64_to_cpu(from->sb_lsn);
@@ -548,6 +552,9 @@ xfs_sb_to_disk(
548 if (!fields) 552 if (!fields)
549 return; 553 return;
550 554
555 /* We should never write the crc here, it's updated in the IO path */
556 fields &= ~XFS_SB_CRC;
557
551 xfs_sb_quota_to_disk(to, from, &fields); 558 xfs_sb_quota_to_disk(to, from, &fields);
552 while (fields) { 559 while (fields) {
553 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 560 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
diff --git a/fs/xfs/time.h b/fs/xfs/time.h
deleted file mode 100644
index 387e695a184c..000000000000
--- a/fs/xfs/time.h
+++ /dev/null
@@ -1,36 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_TIME_H__
19#define __XFS_SUPPORT_TIME_H__
20
21#include <linux/sched.h>
22#include <linux/time.h>
23
24typedef struct timespec timespec_t;
25
26static inline void delay(long ticks)
27{
28 schedule_timeout_uninterruptible(ticks);
29}
30
31static inline void nanotime(struct timespec *tvp)
32{
33 *tvp = CURRENT_TIME;
34}
35
36#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index b984647c24db..f5b2453a43b2 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -434,10 +434,22 @@ xfs_start_page_writeback(
434{ 434{
435 ASSERT(PageLocked(page)); 435 ASSERT(PageLocked(page));
436 ASSERT(!PageWriteback(page)); 436 ASSERT(!PageWriteback(page));
437 if (clear_dirty) 437
438 /*
439 * if the page was not fully cleaned, we need to ensure that the higher
440 * layers come back to it correctly. That means we need to keep the page
441 * dirty, and for WB_SYNC_ALL writeback we need to ensure the
442 * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to
443 * write this page in this writeback sweep will be made.
444 */
445 if (clear_dirty) {
438 clear_page_dirty_for_io(page); 446 clear_page_dirty_for_io(page);
439 set_page_writeback(page); 447 set_page_writeback(page);
448 } else
449 set_page_writeback_keepwrite(page);
450
440 unlock_page(page); 451 unlock_page(page);
452
441 /* If no buffers on the page are to be written, finish it here */ 453 /* If no buffers on the page are to be written, finish it here */
442 if (!buffers) 454 if (!buffers)
443 end_page_writeback(page); 455 end_page_writeback(page);
@@ -548,6 +560,13 @@ xfs_cancel_ioend(
548 do { 560 do {
549 next_bh = bh->b_private; 561 next_bh = bh->b_private;
550 clear_buffer_async_write(bh); 562 clear_buffer_async_write(bh);
563 /*
564 * The unwritten flag is cleared when added to the
565 * ioend. We're not submitting for I/O so mark the
566 * buffer unwritten again for next time around.
567 */
568 if (ioend->io_type == XFS_IO_UNWRITTEN)
569 set_buffer_unwritten(bh);
551 unlock_buffer(bh); 570 unlock_buffer(bh);
552 } while ((bh = next_bh) != NULL); 571 } while ((bh = next_bh) != NULL);
553 572
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 1707980f9a4b..92e8f99a5857 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1122,14 +1122,6 @@ xfs_zero_remaining_bytes(
1122 if (endoff > XFS_ISIZE(ip)) 1122 if (endoff > XFS_ISIZE(ip))
1123 endoff = XFS_ISIZE(ip); 1123 endoff = XFS_ISIZE(ip);
1124 1124
1125 bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
1126 mp->m_rtdev_targp : mp->m_ddev_targp,
1127 BTOBB(mp->m_sb.sb_blocksize), 0);
1128 if (!bp)
1129 return -ENOMEM;
1130
1131 xfs_buf_unlock(bp);
1132
1133 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 1125 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
1134 uint lock_mode; 1126 uint lock_mode;
1135 1127
@@ -1152,42 +1144,24 @@ xfs_zero_remaining_bytes(
1152 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1144 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1153 if (imap.br_state == XFS_EXT_UNWRITTEN) 1145 if (imap.br_state == XFS_EXT_UNWRITTEN)
1154 continue; 1146 continue;
1155 XFS_BUF_UNDONE(bp);
1156 XFS_BUF_UNWRITE(bp);
1157 XFS_BUF_READ(bp);
1158 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
1159 1147
1160 if (XFS_FORCED_SHUTDOWN(mp)) { 1148 error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
1161 error = -EIO; 1149 mp->m_rtdev_targp : mp->m_ddev_targp,
1162 break; 1150 xfs_fsb_to_db(ip, imap.br_startblock),
1163 } 1151 BTOBB(mp->m_sb.sb_blocksize),
1164 xfs_buf_iorequest(bp); 1152 0, &bp, NULL);
1165 error = xfs_buf_iowait(bp); 1153 if (error)
1166 if (error) { 1154 return error;
1167 xfs_buf_ioerror_alert(bp, 1155
1168 "xfs_zero_remaining_bytes(read)");
1169 break;
1170 }
1171 memset(bp->b_addr + 1156 memset(bp->b_addr +
1172 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 1157 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
1173 0, lastoffset - offset + 1); 1158 0, lastoffset - offset + 1);
1174 XFS_BUF_UNDONE(bp); 1159
1175 XFS_BUF_UNREAD(bp); 1160 error = xfs_bwrite(bp);
1176 XFS_BUF_WRITE(bp); 1161 xfs_buf_relse(bp);
1177 1162 if (error)
1178 if (XFS_FORCED_SHUTDOWN(mp)) { 1163 return error;
1179 error = -EIO;
1180 break;
1181 }
1182 xfs_buf_iorequest(bp);
1183 error = xfs_buf_iowait(bp);
1184 if (error) {
1185 xfs_buf_ioerror_alert(bp,
1186 "xfs_zero_remaining_bytes(write)");
1187 break;
1188 }
1189 } 1164 }
1190 xfs_buf_free(bp);
1191 return error; 1165 return error;
1192} 1166}
1193 1167
@@ -1205,6 +1179,7 @@ xfs_free_file_space(
1205 xfs_bmap_free_t free_list; 1179 xfs_bmap_free_t free_list;
1206 xfs_bmbt_irec_t imap; 1180 xfs_bmbt_irec_t imap;
1207 xfs_off_t ioffset; 1181 xfs_off_t ioffset;
1182 xfs_off_t iendoffset;
1208 xfs_extlen_t mod=0; 1183 xfs_extlen_t mod=0;
1209 xfs_mount_t *mp; 1184 xfs_mount_t *mp;
1210 int nimap; 1185 int nimap;
@@ -1233,12 +1208,13 @@ xfs_free_file_space(
1233 inode_dio_wait(VFS_I(ip)); 1208 inode_dio_wait(VFS_I(ip));
1234 1209
1235 rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 1210 rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
1236 ioffset = offset & ~(rounding - 1); 1211 ioffset = round_down(offset, rounding);
1237 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1212 iendoffset = round_up(offset + len, rounding) - 1;
1238 ioffset, -1); 1213 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
1214 iendoffset);
1239 if (error) 1215 if (error)
1240 goto out; 1216 goto out;
1241 truncate_pagecache_range(VFS_I(ip), ioffset, -1); 1217 truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset);
1242 1218
1243 /* 1219 /*
1244 * Need to zero the stuff we're not freeing, on disk. 1220 * Need to zero the stuff we're not freeing, on disk.
@@ -1392,14 +1368,14 @@ xfs_zero_file_space(
1392 1368
1393 if (start_boundary < end_boundary - 1) { 1369 if (start_boundary < end_boundary - 1) {
1394 /* 1370 /*
1395 * punch out delayed allocation blocks and the page cache over 1371 * Writeback the range to ensure any inode size updates due to
1396 * the conversion range 1372 * appending writes make it to disk (otherwise we could just
1373 * punch out the delalloc blocks).
1397 */ 1374 */
1398 xfs_ilock(ip, XFS_ILOCK_EXCL); 1375 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1399 error = xfs_bmap_punch_delalloc_range(ip, 1376 start_boundary, end_boundary - 1);
1400 XFS_B_TO_FSBT(mp, start_boundary), 1377 if (error)
1401 XFS_B_TO_FSB(mp, end_boundary - start_boundary)); 1378 goto out;
1402 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1403 truncate_pagecache_range(VFS_I(ip), start_boundary, 1379 truncate_pagecache_range(VFS_I(ip), start_boundary,
1404 end_boundary - 1); 1380 end_boundary - 1);
1405 1381
@@ -1456,41 +1432,47 @@ xfs_collapse_file_space(
1456 struct xfs_mount *mp = ip->i_mount; 1432 struct xfs_mount *mp = ip->i_mount;
1457 struct xfs_trans *tp; 1433 struct xfs_trans *tp;
1458 int error; 1434 int error;
1459 xfs_extnum_t current_ext = 0;
1460 struct xfs_bmap_free free_list; 1435 struct xfs_bmap_free free_list;
1461 xfs_fsblock_t first_block; 1436 xfs_fsblock_t first_block;
1462 int committed; 1437 int committed;
1463 xfs_fileoff_t start_fsb; 1438 xfs_fileoff_t start_fsb;
1439 xfs_fileoff_t next_fsb;
1464 xfs_fileoff_t shift_fsb; 1440 xfs_fileoff_t shift_fsb;
1465 1441
1466 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1442 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1467 1443
1468 trace_xfs_collapse_file_space(ip); 1444 trace_xfs_collapse_file_space(ip);
1469 1445
1470 start_fsb = XFS_B_TO_FSB(mp, offset + len); 1446 next_fsb = XFS_B_TO_FSB(mp, offset + len);
1471 shift_fsb = XFS_B_TO_FSB(mp, len); 1447 shift_fsb = XFS_B_TO_FSB(mp, len);
1472 1448
1473 /* 1449 error = xfs_free_file_space(ip, offset, len);
1474 * Writeback the entire file and force remove any post-eof blocks. The
1475 * writeback prevents changes to the extent list via concurrent
1476 * writeback and the eofblocks trim prevents the extent shift algorithm
1477 * from running into a post-eof delalloc extent.
1478 *
1479 * XXX: This is a temporary fix until the extent shift loop below is
1480 * converted to use offsets and lookups within the ILOCK rather than
1481 * carrying around the index into the extent list for the next
1482 * iteration.
1483 */
1484 error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
1485 if (error) 1450 if (error)
1486 return error; 1451 return error;
1452
1453 /*
1454 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
1455 * into the accessible region of the file.
1456 */
1487 if (xfs_can_free_eofblocks(ip, true)) { 1457 if (xfs_can_free_eofblocks(ip, true)) {
1488 error = xfs_free_eofblocks(mp, ip, false); 1458 error = xfs_free_eofblocks(mp, ip, false);
1489 if (error) 1459 if (error)
1490 return error; 1460 return error;
1491 } 1461 }
1492 1462
1493 error = xfs_free_file_space(ip, offset, len); 1463 /*
1464 * Writeback and invalidate cache for the remainder of the file as we're
1465 * about to shift down every extent from the collapse range to EOF. The
1466 * free of the collapse range above might have already done some of
1467 * this, but we shouldn't rely on it to do anything outside of the range
1468 * that was freed.
1469 */
1470 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1471 offset + len, -1);
1472 if (error)
1473 return error;
1474 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
1475 (offset + len) >> PAGE_CACHE_SHIFT, -1);
1494 if (error) 1476 if (error)
1495 return error; 1477 return error;
1496 1478
@@ -1525,10 +1507,10 @@ xfs_collapse_file_space(
1525 * We are using the write transaction in which max 2 bmbt 1507 * We are using the write transaction in which max 2 bmbt
1526 * updates are allowed 1508 * updates are allowed
1527 */ 1509 */
1528 error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb, 1510 start_fsb = next_fsb;
1529 shift_fsb, &current_ext, 1511 error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb,
1530 &first_block, &free_list, 1512 &done, &next_fsb, &first_block, &free_list,
1531 XFS_BMAP_MAX_SHIFT_EXTENTS); 1513 XFS_BMAP_MAX_SHIFT_EXTENTS);
1532 if (error) 1514 if (error)
1533 goto out; 1515 goto out;
1534 1516
@@ -1638,7 +1620,7 @@ xfs_swap_extents_check_format(
1638 return 0; 1620 return 0;
1639} 1621}
1640 1622
1641int 1623static int
1642xfs_swap_extent_flush( 1624xfs_swap_extent_flush(
1643 struct xfs_inode *ip) 1625 struct xfs_inode *ip)
1644{ 1626{
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index cd7b8ca9b064..017b6afe340b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -623,10 +623,11 @@ _xfs_buf_read(
623 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); 623 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
624 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); 624 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
625 625
626 xfs_buf_iorequest(bp); 626 if (flags & XBF_ASYNC) {
627 if (flags & XBF_ASYNC) 627 xfs_buf_submit(bp);
628 return 0; 628 return 0;
629 return xfs_buf_iowait(bp); 629 }
630 return xfs_buf_submit_wait(bp);
630} 631}
631 632
632xfs_buf_t * 633xfs_buf_t *
@@ -687,34 +688,39 @@ xfs_buf_readahead_map(
687 * Read an uncached buffer from disk. Allocates and returns a locked 688 * Read an uncached buffer from disk. Allocates and returns a locked
688 * buffer containing the disk contents or nothing. 689 * buffer containing the disk contents or nothing.
689 */ 690 */
690struct xfs_buf * 691int
691xfs_buf_read_uncached( 692xfs_buf_read_uncached(
692 struct xfs_buftarg *target, 693 struct xfs_buftarg *target,
693 xfs_daddr_t daddr, 694 xfs_daddr_t daddr,
694 size_t numblks, 695 size_t numblks,
695 int flags, 696 int flags,
697 struct xfs_buf **bpp,
696 const struct xfs_buf_ops *ops) 698 const struct xfs_buf_ops *ops)
697{ 699{
698 struct xfs_buf *bp; 700 struct xfs_buf *bp;
699 701
702 *bpp = NULL;
703
700 bp = xfs_buf_get_uncached(target, numblks, flags); 704 bp = xfs_buf_get_uncached(target, numblks, flags);
701 if (!bp) 705 if (!bp)
702 return NULL; 706 return -ENOMEM;
703 707
704 /* set up the buffer for a read IO */ 708 /* set up the buffer for a read IO */
705 ASSERT(bp->b_map_count == 1); 709 ASSERT(bp->b_map_count == 1);
706 bp->b_bn = daddr; 710 bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */
707 bp->b_maps[0].bm_bn = daddr; 711 bp->b_maps[0].bm_bn = daddr;
708 bp->b_flags |= XBF_READ; 712 bp->b_flags |= XBF_READ;
709 bp->b_ops = ops; 713 bp->b_ops = ops;
710 714
711 if (XFS_FORCED_SHUTDOWN(target->bt_mount)) { 715 xfs_buf_submit_wait(bp);
716 if (bp->b_error) {
717 int error = bp->b_error;
712 xfs_buf_relse(bp); 718 xfs_buf_relse(bp);
713 return NULL; 719 return error;
714 } 720 }
715 xfs_buf_iorequest(bp); 721
716 xfs_buf_iowait(bp); 722 *bpp = bp;
717 return bp; 723 return 0;
718} 724}
719 725
720/* 726/*
@@ -998,53 +1004,56 @@ xfs_buf_wait_unpin(
998 * Buffer Utility Routines 1004 * Buffer Utility Routines
999 */ 1005 */
1000 1006
1001STATIC void 1007void
1002xfs_buf_iodone_work( 1008xfs_buf_ioend(
1003 struct work_struct *work) 1009 struct xfs_buf *bp)
1004{ 1010{
1005 struct xfs_buf *bp = 1011 bool read = bp->b_flags & XBF_READ;
1006 container_of(work, xfs_buf_t, b_iodone_work); 1012
1007 bool read = !!(bp->b_flags & XBF_READ); 1013 trace_xfs_buf_iodone(bp, _RET_IP_);
1008 1014
1009 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); 1015 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1010 1016
1011 /* only validate buffers that were read without errors */ 1017 /*
1012 if (read && bp->b_ops && !bp->b_error && (bp->b_flags & XBF_DONE)) 1018 * Pull in IO completion errors now. We are guaranteed to be running
1019 * single threaded, so we don't need the lock to read b_io_error.
1020 */
1021 if (!bp->b_error && bp->b_io_error)
1022 xfs_buf_ioerror(bp, bp->b_io_error);
1023
1024 /* Only validate buffers that were read without errors */
1025 if (read && !bp->b_error && bp->b_ops) {
1026 ASSERT(!bp->b_iodone);
1013 bp->b_ops->verify_read(bp); 1027 bp->b_ops->verify_read(bp);
1028 }
1029
1030 if (!bp->b_error)
1031 bp->b_flags |= XBF_DONE;
1014 1032
1015 if (bp->b_iodone) 1033 if (bp->b_iodone)
1016 (*(bp->b_iodone))(bp); 1034 (*(bp->b_iodone))(bp);
1017 else if (bp->b_flags & XBF_ASYNC) 1035 else if (bp->b_flags & XBF_ASYNC)
1018 xfs_buf_relse(bp); 1036 xfs_buf_relse(bp);
1019 else { 1037 else
1020 ASSERT(read && bp->b_ops);
1021 complete(&bp->b_iowait); 1038 complete(&bp->b_iowait);
1022 }
1023} 1039}
1024 1040
1025void 1041static void
1026xfs_buf_ioend( 1042xfs_buf_ioend_work(
1027 struct xfs_buf *bp, 1043 struct work_struct *work)
1028 int schedule)
1029{ 1044{
1030 bool read = !!(bp->b_flags & XBF_READ); 1045 struct xfs_buf *bp =
1031 1046 container_of(work, xfs_buf_t, b_iodone_work);
1032 trace_xfs_buf_iodone(bp, _RET_IP_);
1033 1047
1034 if (bp->b_error == 0) 1048 xfs_buf_ioend(bp);
1035 bp->b_flags |= XBF_DONE; 1049}
1036 1050
1037 if (bp->b_iodone || (read && bp->b_ops) || (bp->b_flags & XBF_ASYNC)) { 1051void
1038 if (schedule) { 1052xfs_buf_ioend_async(
1039 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); 1053 struct xfs_buf *bp)
1040 queue_work(xfslogd_workqueue, &bp->b_iodone_work); 1054{
1041 } else { 1055 INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work);
1042 xfs_buf_iodone_work(&bp->b_iodone_work); 1056 queue_work(xfslogd_workqueue, &bp->b_iodone_work);
1043 }
1044 } else {
1045 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1046 complete(&bp->b_iowait);
1047 }
1048} 1057}
1049 1058
1050void 1059void
@@ -1067,96 +1076,6 @@ xfs_buf_ioerror_alert(
1067 (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); 1076 (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
1068} 1077}
1069 1078
1070/*
1071 * Called when we want to stop a buffer from getting written or read.
1072 * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
1073 * so that the proper iodone callbacks get called.
1074 */
1075STATIC int
1076xfs_bioerror(
1077 xfs_buf_t *bp)
1078{
1079#ifdef XFSERRORDEBUG
1080 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
1081#endif
1082
1083 /*
1084 * No need to wait until the buffer is unpinned, we aren't flushing it.
1085 */
1086 xfs_buf_ioerror(bp, -EIO);
1087
1088 /*
1089 * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
1090 */
1091 XFS_BUF_UNREAD(bp);
1092 XFS_BUF_UNDONE(bp);
1093 xfs_buf_stale(bp);
1094
1095 xfs_buf_ioend(bp, 0);
1096
1097 return -EIO;
1098}
1099
1100/*
1101 * Same as xfs_bioerror, except that we are releasing the buffer
1102 * here ourselves, and avoiding the xfs_buf_ioend call.
1103 * This is meant for userdata errors; metadata bufs come with
1104 * iodone functions attached, so that we can track down errors.
1105 */
1106int
1107xfs_bioerror_relse(
1108 struct xfs_buf *bp)
1109{
1110 int64_t fl = bp->b_flags;
1111 /*
1112 * No need to wait until the buffer is unpinned.
1113 * We aren't flushing it.
1114 *
1115 * chunkhold expects B_DONE to be set, whether
1116 * we actually finish the I/O or not. We don't want to
1117 * change that interface.
1118 */
1119 XFS_BUF_UNREAD(bp);
1120 XFS_BUF_DONE(bp);
1121 xfs_buf_stale(bp);
1122 bp->b_iodone = NULL;
1123 if (!(fl & XBF_ASYNC)) {
1124 /*
1125 * Mark b_error and B_ERROR _both_.
1126 * Lot's of chunkcache code assumes that.
1127 * There's no reason to mark error for
1128 * ASYNC buffers.
1129 */
1130 xfs_buf_ioerror(bp, -EIO);
1131 complete(&bp->b_iowait);
1132 } else {
1133 xfs_buf_relse(bp);
1134 }
1135
1136 return -EIO;
1137}
1138
1139STATIC int
1140xfs_bdstrat_cb(
1141 struct xfs_buf *bp)
1142{
1143 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
1144 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1145 /*
1146 * Metadata write that didn't get logged but
1147 * written delayed anyway. These aren't associated
1148 * with a transaction, and can be ignored.
1149 */
1150 if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
1151 return xfs_bioerror_relse(bp);
1152 else
1153 return xfs_bioerror(bp);
1154 }
1155
1156 xfs_buf_iorequest(bp);
1157 return 0;
1158}
1159
1160int 1079int
1161xfs_bwrite( 1080xfs_bwrite(
1162 struct xfs_buf *bp) 1081 struct xfs_buf *bp)
@@ -1166,11 +1085,10 @@ xfs_bwrite(
1166 ASSERT(xfs_buf_islocked(bp)); 1085 ASSERT(xfs_buf_islocked(bp));
1167 1086
1168 bp->b_flags |= XBF_WRITE; 1087 bp->b_flags |= XBF_WRITE;
1169 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL); 1088 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
1089 XBF_WRITE_FAIL | XBF_DONE);
1170 1090
1171 xfs_bdstrat_cb(bp); 1091 error = xfs_buf_submit_wait(bp);
1172
1173 error = xfs_buf_iowait(bp);
1174 if (error) { 1092 if (error) {
1175 xfs_force_shutdown(bp->b_target->bt_mount, 1093 xfs_force_shutdown(bp->b_target->bt_mount,
1176 SHUTDOWN_META_IO_ERROR); 1094 SHUTDOWN_META_IO_ERROR);
@@ -1179,15 +1097,6 @@ xfs_bwrite(
1179} 1097}
1180 1098
1181STATIC void 1099STATIC void
1182_xfs_buf_ioend(
1183 xfs_buf_t *bp,
1184 int schedule)
1185{
1186 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1187 xfs_buf_ioend(bp, schedule);
1188}
1189
1190STATIC void
1191xfs_buf_bio_end_io( 1100xfs_buf_bio_end_io(
1192 struct bio *bio, 1101 struct bio *bio,
1193 int error) 1102 int error)
@@ -1198,13 +1107,18 @@ xfs_buf_bio_end_io(
1198 * don't overwrite existing errors - otherwise we can lose errors on 1107 * don't overwrite existing errors - otherwise we can lose errors on
1199 * buffers that require multiple bios to complete. 1108 * buffers that require multiple bios to complete.
1200 */ 1109 */
1201 if (!bp->b_error) 1110 if (error) {
1202 xfs_buf_ioerror(bp, error); 1111 spin_lock(&bp->b_lock);
1112 if (!bp->b_io_error)
1113 bp->b_io_error = error;
1114 spin_unlock(&bp->b_lock);
1115 }
1203 1116
1204 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1117 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1205 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1118 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1206 1119
1207 _xfs_buf_ioend(bp, 1); 1120 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1121 xfs_buf_ioend_async(bp);
1208 bio_put(bio); 1122 bio_put(bio);
1209} 1123}
1210 1124
@@ -1283,7 +1197,7 @@ next_chunk:
1283 } else { 1197 } else {
1284 /* 1198 /*
1285 * This is guaranteed not to be the last io reference count 1199 * This is guaranteed not to be the last io reference count
1286 * because the caller (xfs_buf_iorequest) holds a count itself. 1200 * because the caller (xfs_buf_submit) holds a count itself.
1287 */ 1201 */
1288 atomic_dec(&bp->b_io_remaining); 1202 atomic_dec(&bp->b_io_remaining);
1289 xfs_buf_ioerror(bp, -EIO); 1203 xfs_buf_ioerror(bp, -EIO);
@@ -1373,53 +1287,131 @@ _xfs_buf_ioapply(
1373 blk_finish_plug(&plug); 1287 blk_finish_plug(&plug);
1374} 1288}
1375 1289
1290/*
1291 * Asynchronous IO submission path. This transfers the buffer lock ownership and
1292 * the current reference to the IO. It is not safe to reference the buffer after
1293 * a call to this function unless the caller holds an additional reference
1294 * itself.
1295 */
1376void 1296void
1377xfs_buf_iorequest( 1297xfs_buf_submit(
1378 xfs_buf_t *bp) 1298 struct xfs_buf *bp)
1379{ 1299{
1380 trace_xfs_buf_iorequest(bp, _RET_IP_); 1300 trace_xfs_buf_submit(bp, _RET_IP_);
1381 1301
1382 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 1302 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1303 ASSERT(bp->b_flags & XBF_ASYNC);
1304
1305 /* on shutdown we stale and complete the buffer immediately */
1306 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
1307 xfs_buf_ioerror(bp, -EIO);
1308 bp->b_flags &= ~XBF_DONE;
1309 xfs_buf_stale(bp);
1310 xfs_buf_ioend(bp);
1311 return;
1312 }
1383 1313
1384 if (bp->b_flags & XBF_WRITE) 1314 if (bp->b_flags & XBF_WRITE)
1385 xfs_buf_wait_unpin(bp); 1315 xfs_buf_wait_unpin(bp);
1316
1317 /* clear the internal error state to avoid spurious errors */
1318 bp->b_io_error = 0;
1319
1320 /*
1321 * The caller's reference is released during I/O completion.
1322 * This occurs some time after the last b_io_remaining reference is
1323 * released, so after we drop our Io reference we have to have some
1324 * other reference to ensure the buffer doesn't go away from underneath
1325 * us. Take a direct reference to ensure we have safe access to the
1326 * buffer until we are finished with it.
1327 */
1386 xfs_buf_hold(bp); 1328 xfs_buf_hold(bp);
1387 1329
1388 /* 1330 /*
1389 * Set the count to 1 initially, this will stop an I/O 1331 * Set the count to 1 initially, this will stop an I/O completion
1390 * completion callout which happens before we have started 1332 * callout which happens before we have started all the I/O from calling
1391 * all the I/O from calling xfs_buf_ioend too early. 1333 * xfs_buf_ioend too early.
1392 */ 1334 */
1393 atomic_set(&bp->b_io_remaining, 1); 1335 atomic_set(&bp->b_io_remaining, 1);
1394 _xfs_buf_ioapply(bp); 1336 _xfs_buf_ioapply(bp);
1337
1395 /* 1338 /*
1396 * If _xfs_buf_ioapply failed, we'll get back here with 1339 * If _xfs_buf_ioapply failed, we can get back here with only the IO
1397 * only the reference we took above. _xfs_buf_ioend will 1340 * reference we took above. If we drop it to zero, run completion so
1398 * drop it to zero, so we'd better not queue it for later, 1341 * that we don't return to the caller with completion still pending.
1399 * or we'll free it before it's done.
1400 */ 1342 */
1401 _xfs_buf_ioend(bp, bp->b_error ? 0 : 1); 1343 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1344 if (bp->b_error)
1345 xfs_buf_ioend(bp);
1346 else
1347 xfs_buf_ioend_async(bp);
1348 }
1402 1349
1403 xfs_buf_rele(bp); 1350 xfs_buf_rele(bp);
1351 /* Note: it is not safe to reference bp now we've dropped our ref */
1404} 1352}
1405 1353
1406/* 1354/*
1407 * Waits for I/O to complete on the buffer supplied. It returns immediately if 1355 * Synchronous buffer IO submission path, read or write.
1408 * no I/O is pending or there is already a pending error on the buffer, in which
1409 * case nothing will ever complete. It returns the I/O error code, if any, or
1410 * 0 if there was no error.
1411 */ 1356 */
1412int 1357int
1413xfs_buf_iowait( 1358xfs_buf_submit_wait(
1414 xfs_buf_t *bp) 1359 struct xfs_buf *bp)
1415{ 1360{
1416 trace_xfs_buf_iowait(bp, _RET_IP_); 1361 int error;
1417 1362
1418 if (!bp->b_error) 1363 trace_xfs_buf_submit_wait(bp, _RET_IP_);
1419 wait_for_completion(&bp->b_iowait); 1364
1365 ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC)));
1366
1367 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
1368 xfs_buf_ioerror(bp, -EIO);
1369 xfs_buf_stale(bp);
1370 bp->b_flags &= ~XBF_DONE;
1371 return -EIO;
1372 }
1373
1374 if (bp->b_flags & XBF_WRITE)
1375 xfs_buf_wait_unpin(bp);
1376
1377 /* clear the internal error state to avoid spurious errors */
1378 bp->b_io_error = 0;
1379
1380 /*
1381 * For synchronous IO, the IO does not inherit the submitters reference
1382 * count, nor the buffer lock. Hence we cannot release the reference we
1383 * are about to take until we've waited for all IO completion to occur,
1384 * including any xfs_buf_ioend_async() work that may be pending.
1385 */
1386 xfs_buf_hold(bp);
1387
1388 /*
1389 * Set the count to 1 initially, this will stop an I/O completion
1390 * callout which happens before we have started all the I/O from calling
1391 * xfs_buf_ioend too early.
1392 */
1393 atomic_set(&bp->b_io_remaining, 1);
1394 _xfs_buf_ioapply(bp);
1395
1396 /*
1397 * make sure we run completion synchronously if it raced with us and is
1398 * already complete.
1399 */
1400 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1401 xfs_buf_ioend(bp);
1420 1402
1403 /* wait for completion before gathering the error from the buffer */
1404 trace_xfs_buf_iowait(bp, _RET_IP_);
1405 wait_for_completion(&bp->b_iowait);
1421 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1406 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1422 return bp->b_error; 1407 error = bp->b_error;
1408
1409 /*
1410 * all done now, we can release the hold that keeps the buffer
1411 * referenced for the entire IO.
1412 */
1413 xfs_buf_rele(bp);
1414 return error;
1423} 1415}
1424 1416
1425xfs_caddr_t 1417xfs_caddr_t
@@ -1813,13 +1805,19 @@ __xfs_buf_delwri_submit(
1813 blk_start_plug(&plug); 1805 blk_start_plug(&plug);
1814 list_for_each_entry_safe(bp, n, io_list, b_list) { 1806 list_for_each_entry_safe(bp, n, io_list, b_list) {
1815 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL); 1807 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
1816 bp->b_flags |= XBF_WRITE; 1808 bp->b_flags |= XBF_WRITE | XBF_ASYNC;
1817 1809
1818 if (!wait) { 1810 /*
1819 bp->b_flags |= XBF_ASYNC; 1811 * we do all Io submission async. This means if we need to wait
1812 * for IO completion we need to take an extra reference so the
1813 * buffer is still valid on the other side.
1814 */
1815 if (wait)
1816 xfs_buf_hold(bp);
1817 else
1820 list_del_init(&bp->b_list); 1818 list_del_init(&bp->b_list);
1821 } 1819
1822 xfs_bdstrat_cb(bp); 1820 xfs_buf_submit(bp);
1823 } 1821 }
1824 blk_finish_plug(&plug); 1822 blk_finish_plug(&plug);
1825 1823
@@ -1866,7 +1864,10 @@ xfs_buf_delwri_submit(
1866 bp = list_first_entry(&io_list, struct xfs_buf, b_list); 1864 bp = list_first_entry(&io_list, struct xfs_buf, b_list);
1867 1865
1868 list_del_init(&bp->b_list); 1866 list_del_init(&bp->b_list);
1869 error2 = xfs_buf_iowait(bp); 1867
1868 /* locking the buffer will wait for async IO completion. */
1869 xfs_buf_lock(bp);
1870 error2 = bp->b_error;
1870 xfs_buf_relse(bp); 1871 xfs_buf_relse(bp);
1871 if (!error) 1872 if (!error)
1872 error = error2; 1873 error = error2;
@@ -1884,7 +1885,7 @@ xfs_buf_init(void)
1884 goto out; 1885 goto out;
1885 1886
1886 xfslogd_workqueue = alloc_workqueue("xfslogd", 1887 xfslogd_workqueue = alloc_workqueue("xfslogd",
1887 WQ_MEM_RECLAIM | WQ_HIGHPRI, 1); 1888 WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 1);
1888 if (!xfslogd_workqueue) 1889 if (!xfslogd_workqueue)
1889 goto out_free_buf_zone; 1890 goto out_free_buf_zone;
1890 1891
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c753183900b3..82002c00af90 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -158,6 +158,7 @@ typedef struct xfs_buf {
158 struct list_head b_lru; /* lru list */ 158 struct list_head b_lru; /* lru list */
159 spinlock_t b_lock; /* internal state lock */ 159 spinlock_t b_lock; /* internal state lock */
160 unsigned int b_state; /* internal state flags */ 160 unsigned int b_state; /* internal state flags */
161 int b_io_error; /* internal IO error state */
161 wait_queue_head_t b_waiters; /* unpin waiters */ 162 wait_queue_head_t b_waiters; /* unpin waiters */
162 struct list_head b_list; 163 struct list_head b_list;
163 struct xfs_perag *b_pag; /* contains rbtree root */ 164 struct xfs_perag *b_pag; /* contains rbtree root */
@@ -268,9 +269,9 @@ int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
268 269
269struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, 270struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
270 int flags); 271 int flags);
271struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target, 272int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
272 xfs_daddr_t daddr, size_t numblks, int flags, 273 size_t numblks, int flags, struct xfs_buf **bpp,
273 const struct xfs_buf_ops *ops); 274 const struct xfs_buf_ops *ops);
274void xfs_buf_hold(struct xfs_buf *bp); 275void xfs_buf_hold(struct xfs_buf *bp);
275 276
276/* Releasing Buffers */ 277/* Releasing Buffers */
@@ -286,18 +287,16 @@ extern void xfs_buf_unlock(xfs_buf_t *);
286 287
287/* Buffer Read and Write Routines */ 288/* Buffer Read and Write Routines */
288extern int xfs_bwrite(struct xfs_buf *bp); 289extern int xfs_bwrite(struct xfs_buf *bp);
289extern void xfs_buf_ioend(xfs_buf_t *, int); 290extern void xfs_buf_ioend(struct xfs_buf *bp);
290extern void xfs_buf_ioerror(xfs_buf_t *, int); 291extern void xfs_buf_ioerror(xfs_buf_t *, int);
291extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); 292extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
292extern void xfs_buf_iorequest(xfs_buf_t *); 293extern void xfs_buf_submit(struct xfs_buf *bp);
293extern int xfs_buf_iowait(xfs_buf_t *); 294extern int xfs_buf_submit_wait(struct xfs_buf *bp);
294extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, 295extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
295 xfs_buf_rw_t); 296 xfs_buf_rw_t);
296#define xfs_buf_zero(bp, off, len) \ 297#define xfs_buf_zero(bp, off, len) \
297 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 298 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
298 299
299extern int xfs_bioerror_relse(struct xfs_buf *);
300
301/* Buffer Utility Routines */ 300/* Buffer Utility Routines */
302extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); 301extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
303 302
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 76007deed31f..f15969543326 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -491,7 +491,7 @@ xfs_buf_item_unpin(
491 xfs_buf_ioerror(bp, -EIO); 491 xfs_buf_ioerror(bp, -EIO);
492 XFS_BUF_UNDONE(bp); 492 XFS_BUF_UNDONE(bp);
493 xfs_buf_stale(bp); 493 xfs_buf_stale(bp);
494 xfs_buf_ioend(bp, 0); 494 xfs_buf_ioend(bp);
495 } 495 }
496} 496}
497 497
@@ -501,7 +501,7 @@ xfs_buf_item_unpin(
501 * buffer being bad.. 501 * buffer being bad..
502 */ 502 */
503 503
504DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); 504static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
505 505
506STATIC uint 506STATIC uint
507xfs_buf_item_push( 507xfs_buf_item_push(
@@ -1081,7 +1081,7 @@ xfs_buf_iodone_callbacks(
1081 * a way to shut the filesystem down if the writes keep failing. 1081 * a way to shut the filesystem down if the writes keep failing.
1082 * 1082 *
1083 * In practice we'll shut the filesystem down soon as non-transient 1083 * In practice we'll shut the filesystem down soon as non-transient
1084 * erorrs tend to affect the whole device and a failing log write 1084 * errors tend to affect the whole device and a failing log write
1085 * will make us give up. But we really ought to do better here. 1085 * will make us give up. But we really ought to do better here.
1086 */ 1086 */
1087 if (XFS_BUF_ISASYNC(bp)) { 1087 if (XFS_BUF_ISASYNC(bp)) {
@@ -1094,7 +1094,7 @@ xfs_buf_iodone_callbacks(
1094 if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { 1094 if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
1095 bp->b_flags |= XBF_WRITE | XBF_ASYNC | 1095 bp->b_flags |= XBF_WRITE | XBF_ASYNC |
1096 XBF_DONE | XBF_WRITE_FAIL; 1096 XBF_DONE | XBF_WRITE_FAIL;
1097 xfs_buf_iorequest(bp); 1097 xfs_buf_submit(bp);
1098 } else { 1098 } else {
1099 xfs_buf_relse(bp); 1099 xfs_buf_relse(bp);
1100 } 1100 }
@@ -1115,7 +1115,7 @@ do_callbacks:
1115 xfs_buf_do_callbacks(bp); 1115 xfs_buf_do_callbacks(bp);
1116 bp->b_fspriv = NULL; 1116 bp->b_fspriv = NULL;
1117 bp->b_iodone = NULL; 1117 bp->b_iodone = NULL;
1118 xfs_buf_ioend(bp, 0); 1118 xfs_buf_ioend(bp);
1119} 1119}
1120 1120
1121/* 1121/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index de5368c803f9..eb596b419942 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -983,7 +983,7 @@ xfs_vm_page_mkwrite(
983 983
984/* 984/*
985 * This type is designed to indicate the type of offset we would like 985 * This type is designed to indicate the type of offset we would like
986 * to search from page cache for either xfs_seek_data() or xfs_seek_hole(). 986 * to search from page cache for xfs_seek_hole_data().
987 */ 987 */
988enum { 988enum {
989 HOLE_OFF = 0, 989 HOLE_OFF = 0,
@@ -1040,7 +1040,7 @@ xfs_lookup_buffer_offset(
1040/* 1040/*
1041 * This routine is called to find out and return a data or hole offset 1041 * This routine is called to find out and return a data or hole offset
1042 * from the page cache for unwritten extents according to the desired 1042 * from the page cache for unwritten extents according to the desired
1043 * type for xfs_seek_data() or xfs_seek_hole(). 1043 * type for xfs_seek_hole_data().
1044 * 1044 *
1045 * The argument offset is used to tell where we start to search from the 1045 * The argument offset is used to tell where we start to search from the
1046 * page cache. Map is used to figure out the end points of the range to 1046 * page cache. Map is used to figure out the end points of the range to
@@ -1200,9 +1200,10 @@ out:
1200} 1200}
1201 1201
1202STATIC loff_t 1202STATIC loff_t
1203xfs_seek_data( 1203xfs_seek_hole_data(
1204 struct file *file, 1204 struct file *file,
1205 loff_t start) 1205 loff_t start,
1206 int whence)
1206{ 1207{
1207 struct inode *inode = file->f_mapping->host; 1208 struct inode *inode = file->f_mapping->host;
1208 struct xfs_inode *ip = XFS_I(inode); 1209 struct xfs_inode *ip = XFS_I(inode);
@@ -1214,6 +1215,9 @@ xfs_seek_data(
1214 uint lock; 1215 uint lock;
1215 int error; 1216 int error;
1216 1217
1218 if (XFS_FORCED_SHUTDOWN(mp))
1219 return -EIO;
1220
1217 lock = xfs_ilock_data_map_shared(ip); 1221 lock = xfs_ilock_data_map_shared(ip);
1218 1222
1219 isize = i_size_read(inode); 1223 isize = i_size_read(inode);
@@ -1228,6 +1232,7 @@ xfs_seek_data(
1228 */ 1232 */
1229 fsbno = XFS_B_TO_FSBT(mp, start); 1233 fsbno = XFS_B_TO_FSBT(mp, start);
1230 end = XFS_B_TO_FSB(mp, isize); 1234 end = XFS_B_TO_FSB(mp, isize);
1235
1231 for (;;) { 1236 for (;;) {
1232 struct xfs_bmbt_irec map[2]; 1237 struct xfs_bmbt_irec map[2];
1233 int nmap = 2; 1238 int nmap = 2;
@@ -1248,29 +1253,48 @@ xfs_seek_data(
1248 offset = max_t(loff_t, start, 1253 offset = max_t(loff_t, start,
1249 XFS_FSB_TO_B(mp, map[i].br_startoff)); 1254 XFS_FSB_TO_B(mp, map[i].br_startoff));
1250 1255
1251 /* Landed in a data extent */ 1256 /* Landed in the hole we wanted? */
1252 if (map[i].br_startblock == DELAYSTARTBLOCK || 1257 if (whence == SEEK_HOLE &&
1253 (map[i].br_state == XFS_EXT_NORM && 1258 map[i].br_startblock == HOLESTARTBLOCK)
1254 !isnullstartblock(map[i].br_startblock))) 1259 goto out;
1260
1261 /* Landed in the data extent we wanted? */
1262 if (whence == SEEK_DATA &&
1263 (map[i].br_startblock == DELAYSTARTBLOCK ||
1264 (map[i].br_state == XFS_EXT_NORM &&
1265 !isnullstartblock(map[i].br_startblock))))
1255 goto out; 1266 goto out;
1256 1267
1257 /* 1268 /*
1258 * Landed in an unwritten extent, try to search data 1269 * Landed in an unwritten extent, try to search
1259 * from page cache. 1270 * for hole or data from page cache.
1260 */ 1271 */
1261 if (map[i].br_state == XFS_EXT_UNWRITTEN) { 1272 if (map[i].br_state == XFS_EXT_UNWRITTEN) {
1262 if (xfs_find_get_desired_pgoff(inode, &map[i], 1273 if (xfs_find_get_desired_pgoff(inode, &map[i],
1263 DATA_OFF, &offset)) 1274 whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF,
1275 &offset))
1264 goto out; 1276 goto out;
1265 } 1277 }
1266 } 1278 }
1267 1279
1268 /* 1280 /*
1269 * map[0] is hole or its an unwritten extent but 1281 * We only received one extent out of the two requested. This
1270 * without data in page cache. Probably means that 1282 * means we've hit EOF and didn't find what we are looking for.
1271 * we are reading after EOF if nothing in map[1].
1272 */ 1283 */
1273 if (nmap == 1) { 1284 if (nmap == 1) {
1285 /*
1286 * If we were looking for a hole, set offset to
1287 * the end of the file (i.e., there is an implicit
1288 * hole at the end of any file).
1289 */
1290 if (whence == SEEK_HOLE) {
1291 offset = isize;
1292 break;
1293 }
1294 /*
1295 * If we were looking for data, it's nowhere to be found
1296 */
1297 ASSERT(whence == SEEK_DATA);
1274 error = -ENXIO; 1298 error = -ENXIO;
1275 goto out_unlock; 1299 goto out_unlock;
1276 } 1300 }
@@ -1279,125 +1303,30 @@ xfs_seek_data(
1279 1303
1280 /* 1304 /*
1281 * Nothing was found, proceed to the next round of search 1305 * Nothing was found, proceed to the next round of search
1282 * if reading offset not beyond or hit EOF. 1306 * if the next reading offset is not at or beyond EOF.
1283 */ 1307 */
1284 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; 1308 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
1285 start = XFS_FSB_TO_B(mp, fsbno); 1309 start = XFS_FSB_TO_B(mp, fsbno);
1286 if (start >= isize) { 1310 if (start >= isize) {
1311 if (whence == SEEK_HOLE) {
1312 offset = isize;
1313 break;
1314 }
1315 ASSERT(whence == SEEK_DATA);
1287 error = -ENXIO; 1316 error = -ENXIO;
1288 goto out_unlock; 1317 goto out_unlock;
1289 } 1318 }
1290 } 1319 }
1291 1320
1292out: 1321out:
1293 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
1294
1295out_unlock:
1296 xfs_iunlock(ip, lock);
1297
1298 if (error)
1299 return error;
1300 return offset;
1301}
1302
1303STATIC loff_t
1304xfs_seek_hole(
1305 struct file *file,
1306 loff_t start)
1307{
1308 struct inode *inode = file->f_mapping->host;
1309 struct xfs_inode *ip = XFS_I(inode);
1310 struct xfs_mount *mp = ip->i_mount;
1311 loff_t uninitialized_var(offset);
1312 xfs_fsize_t isize;
1313 xfs_fileoff_t fsbno;
1314 xfs_filblks_t end;
1315 uint lock;
1316 int error;
1317
1318 if (XFS_FORCED_SHUTDOWN(mp))
1319 return -EIO;
1320
1321 lock = xfs_ilock_data_map_shared(ip);
1322
1323 isize = i_size_read(inode);
1324 if (start >= isize) {
1325 error = -ENXIO;
1326 goto out_unlock;
1327 }
1328
1329 fsbno = XFS_B_TO_FSBT(mp, start);
1330 end = XFS_B_TO_FSB(mp, isize);
1331
1332 for (;;) {
1333 struct xfs_bmbt_irec map[2];
1334 int nmap = 2;
1335 unsigned int i;
1336
1337 error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
1338 XFS_BMAPI_ENTIRE);
1339 if (error)
1340 goto out_unlock;
1341
1342 /* No extents at given offset, must be beyond EOF */
1343 if (nmap == 0) {
1344 error = -ENXIO;
1345 goto out_unlock;
1346 }
1347
1348 for (i = 0; i < nmap; i++) {
1349 offset = max_t(loff_t, start,
1350 XFS_FSB_TO_B(mp, map[i].br_startoff));
1351
1352 /* Landed in a hole */
1353 if (map[i].br_startblock == HOLESTARTBLOCK)
1354 goto out;
1355
1356 /*
1357 * Landed in an unwritten extent, try to search hole
1358 * from page cache.
1359 */
1360 if (map[i].br_state == XFS_EXT_UNWRITTEN) {
1361 if (xfs_find_get_desired_pgoff(inode, &map[i],
1362 HOLE_OFF, &offset))
1363 goto out;
1364 }
1365 }
1366
1367 /*
1368 * map[0] contains data or its unwritten but contains
1369 * data in page cache, probably means that we are
1370 * reading after EOF. We should fix offset to point
1371 * to the end of the file(i.e., there is an implicit
1372 * hole at the end of any file).
1373 */
1374 if (nmap == 1) {
1375 offset = isize;
1376 break;
1377 }
1378
1379 ASSERT(i > 1);
1380
1381 /*
1382 * Both mappings contains data, proceed to the next round of
1383 * search if the current reading offset not beyond or hit EOF.
1384 */
1385 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
1386 start = XFS_FSB_TO_B(mp, fsbno);
1387 if (start >= isize) {
1388 offset = isize;
1389 break;
1390 }
1391 }
1392
1393out:
1394 /* 1322 /*
1395 * At this point, we must have found a hole. However, the returned 1323 * If at this point we have found the hole we wanted, the returned
1396 * offset may be bigger than the file size as it may be aligned to 1324 * offset may be bigger than the file size as it may be aligned to
1397 * page boundary for unwritten extents, we need to deal with this 1325 * page boundary for unwritten extents. We need to deal with this
1398 * situation in particular. 1326 * situation in particular.
1399 */ 1327 */
1400 offset = min_t(loff_t, offset, isize); 1328 if (whence == SEEK_HOLE)
1329 offset = min_t(loff_t, offset, isize);
1401 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 1330 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
1402 1331
1403out_unlock: 1332out_unlock:
@@ -1412,17 +1341,16 @@ STATIC loff_t
1412xfs_file_llseek( 1341xfs_file_llseek(
1413 struct file *file, 1342 struct file *file,
1414 loff_t offset, 1343 loff_t offset,
1415 int origin) 1344 int whence)
1416{ 1345{
1417 switch (origin) { 1346 switch (whence) {
1418 case SEEK_END: 1347 case SEEK_END:
1419 case SEEK_CUR: 1348 case SEEK_CUR:
1420 case SEEK_SET: 1349 case SEEK_SET:
1421 return generic_file_llseek(file, offset, origin); 1350 return generic_file_llseek(file, offset, whence);
1422 case SEEK_DATA:
1423 return xfs_seek_data(file, offset);
1424 case SEEK_HOLE: 1351 case SEEK_HOLE:
1425 return xfs_seek_hole(file, offset); 1352 case SEEK_DATA:
1353 return xfs_seek_hole_data(file, offset, whence);
1426 default: 1354 default:
1427 return -EINVAL; 1355 return -EINVAL;
1428 } 1356 }
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index f91de1ef05e1..c05ac8b70fa9 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -172,16 +172,11 @@ xfs_growfs_data_private(
172 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) 172 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
173 return error; 173 return error;
174 dpct = pct - mp->m_sb.sb_imax_pct; 174 dpct = pct - mp->m_sb.sb_imax_pct;
175 bp = xfs_buf_read_uncached(mp->m_ddev_targp, 175 error = xfs_buf_read_uncached(mp->m_ddev_targp,
176 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), 176 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
177 XFS_FSS_TO_BB(mp, 1), 0, NULL); 177 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
178 if (!bp) 178 if (error)
179 return -EIO;
180 if (bp->b_error) {
181 error = bp->b_error;
182 xfs_buf_relse(bp);
183 return error; 179 return error;
184 }
185 xfs_buf_relse(bp); 180 xfs_buf_relse(bp);
186 181
187 new = nb; /* use new as a temporary here */ 182 new = nb; /* use new as a temporary here */
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 5399ef222dd7..4d41b241298f 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -43,3 +43,7 @@ xfs_param_t xfs_params = {
43 .fstrm_timer = { 1, 30*100, 3600*100}, 43 .fstrm_timer = { 1, 30*100, 3600*100},
44 .eofb_timer = { 1, 300, 3600*24}, 44 .eofb_timer = { 1, 300, 3600*24},
45}; 45};
46
47struct xfs_globals xfs_globals = {
48 .log_recovery_delay = 0, /* no delay by default */
49};
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 981b2cf51985..b45f7b27b5df 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -33,7 +33,6 @@
33#include "xfs_trace.h" 33#include "xfs_trace.h"
34#include "xfs_icache.h" 34#include "xfs_icache.h"
35#include "xfs_bmap_util.h" 35#include "xfs_bmap_util.h"
36#include "xfs_quota.h"
37#include "xfs_dquot_item.h" 36#include "xfs_dquot_item.h"
38#include "xfs_dquot.h" 37#include "xfs_dquot.h"
39 38
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index fea3c92fb3f0..8ed049d1e332 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -654,7 +654,7 @@ xfs_ialloc(
654 xfs_inode_t *ip; 654 xfs_inode_t *ip;
655 uint flags; 655 uint flags;
656 int error; 656 int error;
657 timespec_t tv; 657 struct timespec tv;
658 658
659 /* 659 /*
660 * Call the space management code to pick 660 * Call the space management code to pick
@@ -720,7 +720,7 @@ xfs_ialloc(
720 ip->i_d.di_nextents = 0; 720 ip->i_d.di_nextents = 0;
721 ASSERT(ip->i_d.di_nblocks == 0); 721 ASSERT(ip->i_d.di_nblocks == 0);
722 722
723 nanotime(&tv); 723 tv = current_fs_time(mp->m_super);
724 ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 724 ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
725 ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 725 ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
726 ip->i_d.di_atime = ip->i_d.di_mtime; 726 ip->i_d.di_atime = ip->i_d.di_mtime;
@@ -769,6 +769,8 @@ xfs_ialloc(
769 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 769 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
770 ip->i_d.di_extsize = pip->i_d.di_extsize; 770 ip->i_d.di_extsize = pip->i_d.di_extsize;
771 } 771 }
772 if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
773 di_flags |= XFS_DIFLAG_PROJINHERIT;
772 } else if (S_ISREG(mode)) { 774 } else if (S_ISREG(mode)) {
773 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 775 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
774 di_flags |= XFS_DIFLAG_REALTIME; 776 di_flags |= XFS_DIFLAG_REALTIME;
@@ -789,8 +791,6 @@ xfs_ialloc(
789 if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 791 if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
790 xfs_inherit_nosymlinks) 792 xfs_inherit_nosymlinks)
791 di_flags |= XFS_DIFLAG_NOSYMLINKS; 793 di_flags |= XFS_DIFLAG_NOSYMLINKS;
792 if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
793 di_flags |= XFS_DIFLAG_PROJINHERIT;
794 if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 794 if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
795 xfs_inherit_nodefrag) 795 xfs_inherit_nodefrag)
796 di_flags |= XFS_DIFLAG_NODEFRAG; 796 di_flags |= XFS_DIFLAG_NODEFRAG;
@@ -1153,9 +1153,11 @@ xfs_create(
1153 if (error) 1153 if (error)
1154 goto out_trans_cancel; 1154 goto out_trans_cancel;
1155 1155
1156 error = xfs_dir_canenter(tp, dp, name, resblks); 1156 if (!resblks) {
1157 if (error) 1157 error = xfs_dir_canenter(tp, dp, name);
1158 goto out_trans_cancel; 1158 if (error)
1159 goto out_trans_cancel;
1160 }
1159 1161
1160 /* 1162 /*
1161 * A newly created regular or special file just has one directory 1163 * A newly created regular or special file just has one directory
@@ -1421,9 +1423,11 @@ xfs_link(
1421 goto error_return; 1423 goto error_return;
1422 } 1424 }
1423 1425
1424 error = xfs_dir_canenter(tp, tdp, target_name, resblks); 1426 if (!resblks) {
1425 if (error) 1427 error = xfs_dir_canenter(tp, tdp, target_name);
1426 goto error_return; 1428 if (error)
1429 goto error_return;
1430 }
1427 1431
1428 xfs_bmap_init(&free_list, &first_block); 1432 xfs_bmap_init(&free_list, &first_block);
1429 1433
@@ -2759,9 +2763,11 @@ xfs_rename(
2759 * If there's no space reservation, check the entry will 2763 * If there's no space reservation, check the entry will
2760 * fit before actually inserting it. 2764 * fit before actually inserting it.
2761 */ 2765 */
2762 error = xfs_dir_canenter(tp, target_dp, target_name, spaceres); 2766 if (!spaceres) {
2763 if (error) 2767 error = xfs_dir_canenter(tp, target_dp, target_name);
2764 goto error_return; 2768 if (error)
2769 goto error_return;
2770 }
2765 /* 2771 /*
2766 * If target does not exist and the rename crosses 2772 * If target does not exist and the rename crosses
2767 * directories, adjust the target directory link count 2773 * directories, adjust the target directory link count
@@ -3056,7 +3062,7 @@ cluster_corrupt_out:
3056 XFS_BUF_UNDONE(bp); 3062 XFS_BUF_UNDONE(bp);
3057 xfs_buf_stale(bp); 3063 xfs_buf_stale(bp);
3058 xfs_buf_ioerror(bp, -EIO); 3064 xfs_buf_ioerror(bp, -EIO);
3059 xfs_buf_ioend(bp, 0); 3065 xfs_buf_ioend(bp);
3060 } else { 3066 } else {
3061 xfs_buf_stale(bp); 3067 xfs_buf_stale(bp);
3062 xfs_buf_relse(bp); 3068 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index c10e3fadd9af..9af2882e1f4c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -102,7 +102,7 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
102{ 102{
103 xfs_fsize_t i_size = i_size_read(VFS_I(ip)); 103 xfs_fsize_t i_size = i_size_read(VFS_I(ip));
104 104
105 if (new_size > i_size) 105 if (new_size > i_size || new_size < 0)
106 new_size = i_size; 106 new_size = i_size;
107 return new_size > ip->i_d.di_size ? new_size : 0; 107 return new_size > ip->i_d.di_size ? new_size : 0;
108} 108}
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index de5a7be36e60..63de0b0acc32 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -615,7 +615,7 @@ xfs_iflush_done(
615 blip = bp->b_fspriv; 615 blip = bp->b_fspriv;
616 prev = NULL; 616 prev = NULL;
617 while (blip != NULL) { 617 while (blip != NULL) {
618 if (lip->li_cb != xfs_iflush_done) { 618 if (blip->li_cb != xfs_iflush_done) {
619 prev = blip; 619 prev = blip;
620 blip = blip->li_bio_list; 620 blip = blip->li_bio_list;
621 continue; 621 continue;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 3799695b9249..24c926b6fe85 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -968,8 +968,6 @@ xfs_set_diflags(
968 di_flags |= XFS_DIFLAG_NOATIME; 968 di_flags |= XFS_DIFLAG_NOATIME;
969 if (xflags & XFS_XFLAG_NODUMP) 969 if (xflags & XFS_XFLAG_NODUMP)
970 di_flags |= XFS_DIFLAG_NODUMP; 970 di_flags |= XFS_DIFLAG_NODUMP;
971 if (xflags & XFS_XFLAG_PROJINHERIT)
972 di_flags |= XFS_DIFLAG_PROJINHERIT;
973 if (xflags & XFS_XFLAG_NODEFRAG) 971 if (xflags & XFS_XFLAG_NODEFRAG)
974 di_flags |= XFS_DIFLAG_NODEFRAG; 972 di_flags |= XFS_DIFLAG_NODEFRAG;
975 if (xflags & XFS_XFLAG_FILESTREAM) 973 if (xflags & XFS_XFLAG_FILESTREAM)
@@ -981,6 +979,8 @@ xfs_set_diflags(
981 di_flags |= XFS_DIFLAG_NOSYMLINKS; 979 di_flags |= XFS_DIFLAG_NOSYMLINKS;
982 if (xflags & XFS_XFLAG_EXTSZINHERIT) 980 if (xflags & XFS_XFLAG_EXTSZINHERIT)
983 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 981 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
982 if (xflags & XFS_XFLAG_PROJINHERIT)
983 di_flags |= XFS_DIFLAG_PROJINHERIT;
984 } else if (S_ISREG(ip->i_d.di_mode)) { 984 } else if (S_ISREG(ip->i_d.di_mode)) {
985 if (xflags & XFS_XFLAG_REALTIME) 985 if (xflags & XFS_XFLAG_REALTIME)
986 di_flags |= XFS_DIFLAG_REALTIME; 986 di_flags |= XFS_DIFLAG_REALTIME;
@@ -1231,13 +1231,25 @@ xfs_ioctl_setattr(
1231 1231
1232 } 1232 }
1233 1233
1234 if (mask & FSX_EXTSIZE)
1235 ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
1236 if (mask & FSX_XFLAGS) { 1234 if (mask & FSX_XFLAGS) {
1237 xfs_set_diflags(ip, fa->fsx_xflags); 1235 xfs_set_diflags(ip, fa->fsx_xflags);
1238 xfs_diflags_to_linux(ip); 1236 xfs_diflags_to_linux(ip);
1239 } 1237 }
1240 1238
1239 /*
1240 * Only set the extent size hint if we've already determined that the
1241 * extent size hint should be set on the inode. If no extent size flags
1242 * are set on the inode then unconditionally clear the extent size hint.
1243 */
1244 if (mask & FSX_EXTSIZE) {
1245 int extsize = 0;
1246
1247 if (ip->i_d.di_flags &
1248 (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT))
1249 extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
1250 ip->i_d.di_extsize = extsize;
1251 }
1252
1241 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 1253 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
1242 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1254 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1243 1255
@@ -1349,7 +1361,7 @@ xfs_ioc_setxflags(
1349STATIC int 1361STATIC int
1350xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) 1362xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
1351{ 1363{
1352 struct getbmap __user *base = *ap; 1364 struct getbmap __user *base = (struct getbmap __user *)*ap;
1353 1365
1354 /* copy only getbmap portion (not getbmapx) */ 1366 /* copy only getbmap portion (not getbmapx) */
1355 if (copy_to_user(base, bmv, sizeof(struct getbmap))) 1367 if (copy_to_user(base, bmv, sizeof(struct getbmap)))
@@ -1380,7 +1392,7 @@ xfs_ioc_getbmap(
1380 bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; 1392 bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
1381 1393
1382 error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, 1394 error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
1383 (struct getbmap *)arg+1); 1395 (__force struct getbmap *)arg+1);
1384 if (error) 1396 if (error)
1385 return error; 1397 return error;
1386 1398
@@ -1393,7 +1405,7 @@ xfs_ioc_getbmap(
1393STATIC int 1405STATIC int
1394xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) 1406xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
1395{ 1407{
1396 struct getbmapx __user *base = *ap; 1408 struct getbmapx __user *base = (struct getbmapx __user *)*ap;
1397 1409
1398 if (copy_to_user(base, bmv, sizeof(struct getbmapx))) 1410 if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
1399 return -EFAULT; 1411 return -EFAULT;
@@ -1420,7 +1432,7 @@ xfs_ioc_getbmapx(
1420 return -EINVAL; 1432 return -EINVAL;
1421 1433
1422 error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, 1434 error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
1423 (struct getbmapx *)arg+1); 1435 (__force struct getbmapx *)arg+1);
1424 if (error) 1436 if (error)
1425 return error; 1437 return error;
1426 1438
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index a554646ff141..94ce027e28e3 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -160,6 +160,7 @@ xfs_ioctl32_bstat_copyin(
160 get_user(bstat->bs_gen, &bstat32->bs_gen) || 160 get_user(bstat->bs_gen, &bstat32->bs_gen) ||
161 get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || 161 get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
162 get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || 162 get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
163 get_user(bstat->bs_forkoff, &bstat32->bs_forkoff) ||
163 get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || 164 get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
164 get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || 165 get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) ||
165 get_user(bstat->bs_aextents, &bstat32->bs_aextents)) 166 get_user(bstat->bs_aextents, &bstat32->bs_aextents))
@@ -214,6 +215,7 @@ xfs_bulkstat_one_fmt_compat(
214 put_user(buffer->bs_gen, &p32->bs_gen) || 215 put_user(buffer->bs_gen, &p32->bs_gen) ||
215 put_user(buffer->bs_projid, &p32->bs_projid) || 216 put_user(buffer->bs_projid, &p32->bs_projid) ||
216 put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || 217 put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) ||
218 put_user(buffer->bs_forkoff, &p32->bs_forkoff) ||
217 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || 219 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
218 put_user(buffer->bs_dmstate, &p32->bs_dmstate) || 220 put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
219 put_user(buffer->bs_aextents, &p32->bs_aextents)) 221 put_user(buffer->bs_aextents, &p32->bs_aextents))
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index 80f4060e8970..b1bb45444df8 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -67,8 +67,9 @@ typedef struct compat_xfs_bstat {
67 __u32 bs_gen; /* generation count */ 67 __u32 bs_gen; /* generation count */
68 __u16 bs_projid_lo; /* lower part of project id */ 68 __u16 bs_projid_lo; /* lower part of project id */
69#define bs_projid bs_projid_lo /* (previously just bs_projid) */ 69#define bs_projid bs_projid_lo /* (previously just bs_projid) */
70 __u16 bs_forkoff; /* inode fork offset in bytes */
70 __u16 bs_projid_hi; /* high part of project id */ 71 __u16 bs_projid_hi; /* high part of project id */
71 unsigned char bs_pad[12]; /* pad space, unused */ 72 unsigned char bs_pad[10]; /* pad space, unused */
72 __u32 bs_dmevmask; /* DMIG event mask */ 73 __u32 bs_dmevmask; /* DMIG event mask */
73 __u16 bs_dmstate; /* DMIG state info */ 74 __u16 bs_dmstate; /* DMIG state info */
74 __u16 bs_aextents; /* attribute number of extents */ 75 __u16 bs_aextents; /* attribute number of extents */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index e9c47b6f5e5a..afcf3c926565 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -404,8 +404,8 @@ xfs_quota_calc_throttle(
404 int shift = 0; 404 int shift = 0;
405 struct xfs_dquot *dq = xfs_inode_dquot(ip, type); 405 struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
406 406
407 /* over hi wmark, squash the prealloc completely */ 407 /* no dq, or over hi wmark, squash the prealloc completely */
408 if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { 408 if (!dq || dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
409 *qblocks = 0; 409 *qblocks = 0;
410 *qfreesp = 0; 410 *qfreesp = 0;
411 return; 411 return;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 72129493e9d3..ec6dcdc181ee 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -849,6 +849,36 @@ xfs_setattr_size(
849 return error; 849 return error;
850 truncate_setsize(inode, newsize); 850 truncate_setsize(inode, newsize);
851 851
852 /*
853 * The "we can't serialise against page faults" pain gets worse.
854 *
855 * If the file is mapped then we have to clean the page at the old EOF
856 * when extending the file. Extending the file can expose changes the
857 * underlying page mapping (e.g. from beyond EOF to a hole or
858 * unwritten), and so on the next attempt to write to that page we need
859 * to remap it for write. i.e. we need .page_mkwrite() to be called.
860 * Hence we need to clean the page to clean the pte and so a new write
861 * fault will be triggered appropriately.
862 *
863 * If we do it before we change the inode size, then we can race with a
864 * page fault that maps the page with exactly the same problem. If we do
865 * it after we change the file size, then a new page fault can come in
866 * and allocate space before we've run the rest of the truncate
867 * transaction. That's kinda grotesque, but it's better than have data
868 * over a hole, and so that's the lesser evil that has been chosen here.
869 *
870 * The real solution, however, is to have some mechanism for locking out
871 * page faults while a truncate is in progress.
872 */
873 if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
874 error = filemap_write_and_wait_range(
875 VFS_I(ip)->i_mapping,
876 round_down(oldsize, PAGE_CACHE_SIZE),
877 round_up(oldsize, PAGE_CACHE_SIZE) - 1);
878 if (error)
879 return error;
880 }
881
852 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 882 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
853 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 883 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
854 if (error) 884 if (error)
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f71be9c68017..f1deb961a296 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -639,7 +639,8 @@ next_ag:
639 xfs_buf_relse(agbp); 639 xfs_buf_relse(agbp);
640 agbp = NULL; 640 agbp = NULL;
641 agino = 0; 641 agino = 0;
642 } while (++agno < mp->m_sb.sb_agcount); 642 agno++;
643 } while (agno < mp->m_sb.sb_agcount);
643 644
644 if (!error) { 645 if (!error) {
645 if (bufidx) { 646 if (bufidx) {
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index d10dc8f397c9..6a51619d8690 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -56,7 +56,6 @@ typedef __uint64_t __psunsigned_t;
56 56
57#include "kmem.h" 57#include "kmem.h"
58#include "mrlock.h" 58#include "mrlock.h"
59#include "time.h"
60#include "uuid.h" 59#include "uuid.h"
61 60
62#include <linux/semaphore.h> 61#include <linux/semaphore.h>
@@ -179,6 +178,11 @@ typedef __uint64_t __psunsigned_t;
179#define MAX(a,b) (max(a,b)) 178#define MAX(a,b) (max(a,b))
180#define howmany(x, y) (((x)+((y)-1))/(y)) 179#define howmany(x, y) (((x)+((y)-1))/(y))
181 180
181static inline void delay(long ticks)
182{
183 schedule_timeout_uninterruptible(ticks);
184}
185
182/* 186/*
183 * XFS wrapper structure for sysfs support. It depends on external data 187 * XFS wrapper structure for sysfs support. It depends on external data
184 * structures and is embedded in various internal data structures to implement 188 * structures and is embedded in various internal data structures to implement
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ca4fd5bd8522..fe88ef67f93a 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1678,7 +1678,7 @@ xlog_bdstrat(
1678 if (iclog->ic_state & XLOG_STATE_IOERROR) { 1678 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1679 xfs_buf_ioerror(bp, -EIO); 1679 xfs_buf_ioerror(bp, -EIO);
1680 xfs_buf_stale(bp); 1680 xfs_buf_stale(bp);
1681 xfs_buf_ioend(bp, 0); 1681 xfs_buf_ioend(bp);
1682 /* 1682 /*
1683 * It would seem logical to return EIO here, but we rely on 1683 * It would seem logical to return EIO here, but we rely on
1684 * the log state machine to propagate I/O errors instead of 1684 * the log state machine to propagate I/O errors instead of
@@ -1688,7 +1688,7 @@ xlog_bdstrat(
1688 return 0; 1688 return 0;
1689 } 1689 }
1690 1690
1691 xfs_buf_iorequest(bp); 1691 xfs_buf_submit(bp);
1692 return 0; 1692 return 0;
1693} 1693}
1694 1694
@@ -3867,18 +3867,17 @@ xlog_state_ioerror(
3867 * This is called from xfs_force_shutdown, when we're forcibly 3867 * This is called from xfs_force_shutdown, when we're forcibly
3868 * shutting down the filesystem, typically because of an IO error. 3868 * shutting down the filesystem, typically because of an IO error.
3869 * Our main objectives here are to make sure that: 3869 * Our main objectives here are to make sure that:
3870 * a. the filesystem gets marked 'SHUTDOWN' for all interested 3870 * a. if !logerror, flush the logs to disk. Anything modified
3871 * after this is ignored.
3872 * b. the filesystem gets marked 'SHUTDOWN' for all interested
3871 * parties to find out, 'atomically'. 3873 * parties to find out, 'atomically'.
3872 * b. those who're sleeping on log reservations, pinned objects and 3874 * c. those who're sleeping on log reservations, pinned objects and
3873 * other resources get woken up, and be told the bad news. 3875 * other resources get woken up, and be told the bad news.
3874 * c. nothing new gets queued up after (a) and (b) are done. 3876 * d. nothing new gets queued up after (b) and (c) are done.
3875 * d. if !logerror, flush the iclogs to disk, then seal them off
3876 * for business.
3877 * 3877 *
3878 * Note: for delayed logging the !logerror case needs to flush the regions 3878 * Note: for the !logerror case we need to flush the regions held in memory out
3879 * held in memory out to the iclogs before flushing them to disk. This needs 3879 * to disk first. This needs to be done before the log is marked as shutdown,
3880 * to be done before the log is marked as shutdown, otherwise the flush to the 3880 * otherwise the iclog writes will fail.
3881 * iclogs will fail.
3882 */ 3881 */
3883int 3882int
3884xfs_log_force_umount( 3883xfs_log_force_umount(
@@ -3910,16 +3909,16 @@ xfs_log_force_umount(
3910 ASSERT(XLOG_FORCED_SHUTDOWN(log)); 3909 ASSERT(XLOG_FORCED_SHUTDOWN(log));
3911 return 1; 3910 return 1;
3912 } 3911 }
3913 retval = 0;
3914 3912
3915 /* 3913 /*
3916 * Flush the in memory commit item list before marking the log as 3914 * Flush all the completed transactions to disk before marking the log
3917 * being shut down. We need to do it in this order to ensure all the 3915 * being shut down. We need to do it in this order to ensure that
3918 * completed transactions are flushed to disk with the xfs_log_force() 3916 * completed operations are safely on disk before we shut down, and that
3919 * call below. 3917 * we don't have to issue any buffer IO after the shutdown flags are set
3918 * to guarantee this.
3920 */ 3919 */
3921 if (!logerror) 3920 if (!logerror)
3922 xlog_cil_force(log); 3921 _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
3923 3922
3924 /* 3923 /*
3925 * mark the filesystem and the as in a shutdown state and wake 3924 * mark the filesystem and the as in a shutdown state and wake
@@ -3931,18 +3930,11 @@ xfs_log_force_umount(
3931 XFS_BUF_DONE(mp->m_sb_bp); 3930 XFS_BUF_DONE(mp->m_sb_bp);
3932 3931
3933 /* 3932 /*
3934 * This flag is sort of redundant because of the mount flag, but 3933 * Mark the log and the iclogs with IO error flags to prevent any
3935 * it's good to maintain the separation between the log and the rest 3934 * further log IO from being issued or completed.
3936 * of XFS.
3937 */ 3935 */
3938 log->l_flags |= XLOG_IO_ERROR; 3936 log->l_flags |= XLOG_IO_ERROR;
3939 3937 retval = xlog_state_ioerror(log);
3940 /*
3941 * If we hit a log error, we want to mark all the iclogs IOERROR
3942 * while we're still holding the loglock.
3943 */
3944 if (logerror)
3945 retval = xlog_state_ioerror(log);
3946 spin_unlock(&log->l_icloglock); 3938 spin_unlock(&log->l_icloglock);
3947 3939
3948 /* 3940 /*
@@ -3955,19 +3947,6 @@ xfs_log_force_umount(
3955 xlog_grant_head_wake_all(&log->l_reserve_head); 3947 xlog_grant_head_wake_all(&log->l_reserve_head);
3956 xlog_grant_head_wake_all(&log->l_write_head); 3948 xlog_grant_head_wake_all(&log->l_write_head);
3957 3949
3958 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3959 ASSERT(!logerror);
3960 /*
3961 * Force the incore logs to disk before shutting the
3962 * log down completely.
3963 */
3964 _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
3965
3966 spin_lock(&log->l_icloglock);
3967 retval = xlog_state_ioerror(log);
3968 spin_unlock(&log->l_icloglock);
3969 }
3970
3971 /* 3950 /*
3972 * Wake up everybody waiting on xfs_log_force. Wake the CIL push first 3951 * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
3973 * as if the log writes were completed. The abort handling in the log 3952 * as if the log writes were completed. The abort handling in the log
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index f6b79e5325dd..f506c457011e 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -463,12 +463,40 @@ xlog_cil_push(
463 spin_unlock(&cil->xc_push_lock); 463 spin_unlock(&cil->xc_push_lock);
464 goto out_skip; 464 goto out_skip;
465 } 465 }
466 spin_unlock(&cil->xc_push_lock);
467 466
468 467
469 /* check for a previously pushed seqeunce */ 468 /* check for a previously pushed seqeunce */
470 if (push_seq < cil->xc_ctx->sequence) 469 if (push_seq < cil->xc_ctx->sequence) {
470 spin_unlock(&cil->xc_push_lock);
471 goto out_skip; 471 goto out_skip;
472 }
473
474 /*
475 * We are now going to push this context, so add it to the committing
476 * list before we do anything else. This ensures that anyone waiting on
477 * this push can easily detect the difference between a "push in
478 * progress" and "CIL is empty, nothing to do".
479 *
480 * IOWs, a wait loop can now check for:
481 * the current sequence not being found on the committing list;
482 * an empty CIL; and
483 * an unchanged sequence number
484 * to detect a push that had nothing to do and therefore does not need
485 * waiting on. If the CIL is not empty, we get put on the committing
486 * list before emptying the CIL and bumping the sequence number. Hence
487 * an empty CIL and an unchanged sequence number means we jumped out
488 * above after doing nothing.
489 *
490 * Hence the waiter will either find the commit sequence on the
491 * committing list or the sequence number will be unchanged and the CIL
492 * still dirty. In that latter case, the push has not yet started, and
493 * so the waiter will have to continue trying to check the CIL
494 * committing list until it is found. In extreme cases of delay, the
495 * sequence may fully commit between the attempts the wait makes to wait
496 * on the commit sequence.
497 */
498 list_add(&ctx->committing, &cil->xc_committing);
499 spin_unlock(&cil->xc_push_lock);
472 500
473 /* 501 /*
474 * pull all the log vectors off the items in the CIL, and 502 * pull all the log vectors off the items in the CIL, and
@@ -532,7 +560,6 @@ xlog_cil_push(
532 */ 560 */
533 spin_lock(&cil->xc_push_lock); 561 spin_lock(&cil->xc_push_lock);
534 cil->xc_current_sequence = new_ctx->sequence; 562 cil->xc_current_sequence = new_ctx->sequence;
535 list_add(&ctx->committing, &cil->xc_committing);
536 spin_unlock(&cil->xc_push_lock); 563 spin_unlock(&cil->xc_push_lock);
537 up_write(&cil->xc_ctx_lock); 564 up_write(&cil->xc_ctx_lock);
538 565
@@ -855,13 +882,15 @@ restart:
855 * Hence by the time we have got here it our sequence may not have been 882 * Hence by the time we have got here it our sequence may not have been
856 * pushed yet. This is true if the current sequence still matches the 883 * pushed yet. This is true if the current sequence still matches the
857 * push sequence after the above wait loop and the CIL still contains 884 * push sequence after the above wait loop and the CIL still contains
858 * dirty objects. 885 * dirty objects. This is guaranteed by the push code first adding the
886 * context to the committing list before emptying the CIL.
859 * 887 *
860 * When the push occurs, it will empty the CIL and atomically increment 888 * Hence if we don't find the context in the committing list and the
861 * the currect sequence past the push sequence and move it into the 889 * current sequence number is unchanged then the CIL contents are
862 * committing list. Of course, if the CIL is clean at the time of the 890 * significant. If the CIL is empty, if means there was nothing to push
863 * push, it won't have pushed the CIL at all, so in that case we should 891 * and that means there is nothing to wait for. If the CIL is not empty,
864 * try the push for this sequence again from the start just in case. 892 * it means we haven't yet started the push, because if it had started
893 * we would have found the context on the committing list.
865 */ 894 */
866 if (sequence == cil->xc_current_sequence && 895 if (sequence == cil->xc_current_sequence &&
867 !list_empty(&cil->xc_cil)) { 896 !list_empty(&cil->xc_cil)) {
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 1fd5787add99..00cd7f3a8f59 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -193,12 +193,8 @@ xlog_bread_noalign(
193 bp->b_io_length = nbblks; 193 bp->b_io_length = nbblks;
194 bp->b_error = 0; 194 bp->b_error = 0;
195 195
196 if (XFS_FORCED_SHUTDOWN(log->l_mp)) 196 error = xfs_buf_submit_wait(bp);
197 return -EIO; 197 if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
198
199 xfs_buf_iorequest(bp);
200 error = xfs_buf_iowait(bp);
201 if (error)
202 xfs_buf_ioerror_alert(bp, __func__); 198 xfs_buf_ioerror_alert(bp, __func__);
203 return error; 199 return error;
204} 200}
@@ -378,12 +374,14 @@ xlog_recover_iodone(
378 * We're not going to bother about retrying 374 * We're not going to bother about retrying
379 * this during recovery. One strike! 375 * this during recovery. One strike!
380 */ 376 */
381 xfs_buf_ioerror_alert(bp, __func__); 377 if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
382 xfs_force_shutdown(bp->b_target->bt_mount, 378 xfs_buf_ioerror_alert(bp, __func__);
383 SHUTDOWN_META_IO_ERROR); 379 xfs_force_shutdown(bp->b_target->bt_mount,
380 SHUTDOWN_META_IO_ERROR);
381 }
384 } 382 }
385 bp->b_iodone = NULL; 383 bp->b_iodone = NULL;
386 xfs_buf_ioend(bp, 0); 384 xfs_buf_ioend(bp);
387} 385}
388 386
389/* 387/*
@@ -1445,160 +1443,6 @@ xlog_clear_stale_blocks(
1445 ****************************************************************************** 1443 ******************************************************************************
1446 */ 1444 */
1447 1445
1448STATIC xlog_recover_t *
1449xlog_recover_find_tid(
1450 struct hlist_head *head,
1451 xlog_tid_t tid)
1452{
1453 xlog_recover_t *trans;
1454
1455 hlist_for_each_entry(trans, head, r_list) {
1456 if (trans->r_log_tid == tid)
1457 return trans;
1458 }
1459 return NULL;
1460}
1461
1462STATIC void
1463xlog_recover_new_tid(
1464 struct hlist_head *head,
1465 xlog_tid_t tid,
1466 xfs_lsn_t lsn)
1467{
1468 xlog_recover_t *trans;
1469
1470 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
1471 trans->r_log_tid = tid;
1472 trans->r_lsn = lsn;
1473 INIT_LIST_HEAD(&trans->r_itemq);
1474
1475 INIT_HLIST_NODE(&trans->r_list);
1476 hlist_add_head(&trans->r_list, head);
1477}
1478
1479STATIC void
1480xlog_recover_add_item(
1481 struct list_head *head)
1482{
1483 xlog_recover_item_t *item;
1484
1485 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
1486 INIT_LIST_HEAD(&item->ri_list);
1487 list_add_tail(&item->ri_list, head);
1488}
1489
1490STATIC int
1491xlog_recover_add_to_cont_trans(
1492 struct xlog *log,
1493 struct xlog_recover *trans,
1494 xfs_caddr_t dp,
1495 int len)
1496{
1497 xlog_recover_item_t *item;
1498 xfs_caddr_t ptr, old_ptr;
1499 int old_len;
1500
1501 if (list_empty(&trans->r_itemq)) {
1502 /* finish copying rest of trans header */
1503 xlog_recover_add_item(&trans->r_itemq);
1504 ptr = (xfs_caddr_t) &trans->r_theader +
1505 sizeof(xfs_trans_header_t) - len;
1506 memcpy(ptr, dp, len); /* d, s, l */
1507 return 0;
1508 }
1509 /* take the tail entry */
1510 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
1511
1512 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1513 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1514
1515 ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
1516 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1517 item->ri_buf[item->ri_cnt-1].i_len += len;
1518 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1519 trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
1520 return 0;
1521}
1522
1523/*
1524 * The next region to add is the start of a new region. It could be
1525 * a whole region or it could be the first part of a new region. Because
1526 * of this, the assumption here is that the type and size fields of all
1527 * format structures fit into the first 32 bits of the structure.
1528 *
1529 * This works because all regions must be 32 bit aligned. Therefore, we
1530 * either have both fields or we have neither field. In the case we have
1531 * neither field, the data part of the region is zero length. We only have
1532 * a log_op_header and can throw away the header since a new one will appear
1533 * later. If we have at least 4 bytes, then we can determine how many regions
1534 * will appear in the current log item.
1535 */
1536STATIC int
1537xlog_recover_add_to_trans(
1538 struct xlog *log,
1539 struct xlog_recover *trans,
1540 xfs_caddr_t dp,
1541 int len)
1542{
1543 xfs_inode_log_format_t *in_f; /* any will do */
1544 xlog_recover_item_t *item;
1545 xfs_caddr_t ptr;
1546
1547 if (!len)
1548 return 0;
1549 if (list_empty(&trans->r_itemq)) {
1550 /* we need to catch log corruptions here */
1551 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1552 xfs_warn(log->l_mp, "%s: bad header magic number",
1553 __func__);
1554 ASSERT(0);
1555 return -EIO;
1556 }
1557 if (len == sizeof(xfs_trans_header_t))
1558 xlog_recover_add_item(&trans->r_itemq);
1559 memcpy(&trans->r_theader, dp, len); /* d, s, l */
1560 return 0;
1561 }
1562
1563 ptr = kmem_alloc(len, KM_SLEEP);
1564 memcpy(ptr, dp, len);
1565 in_f = (xfs_inode_log_format_t *)ptr;
1566
1567 /* take the tail entry */
1568 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
1569 if (item->ri_total != 0 &&
1570 item->ri_total == item->ri_cnt) {
1571 /* tail item is in use, get a new one */
1572 xlog_recover_add_item(&trans->r_itemq);
1573 item = list_entry(trans->r_itemq.prev,
1574 xlog_recover_item_t, ri_list);
1575 }
1576
1577 if (item->ri_total == 0) { /* first region to be added */
1578 if (in_f->ilf_size == 0 ||
1579 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
1580 xfs_warn(log->l_mp,
1581 "bad number of regions (%d) in inode log format",
1582 in_f->ilf_size);
1583 ASSERT(0);
1584 kmem_free(ptr);
1585 return -EIO;
1586 }
1587
1588 item->ri_total = in_f->ilf_size;
1589 item->ri_buf =
1590 kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
1591 KM_SLEEP);
1592 }
1593 ASSERT(item->ri_total > item->ri_cnt);
1594 /* Description region is ri_buf[0] */
1595 item->ri_buf[item->ri_cnt].i_addr = ptr;
1596 item->ri_buf[item->ri_cnt].i_len = len;
1597 item->ri_cnt++;
1598 trace_xfs_log_recover_item_add(log, trans, item, 0);
1599 return 0;
1600}
1601
1602/* 1446/*
1603 * Sort the log items in the transaction. 1447 * Sort the log items in the transaction.
1604 * 1448 *
@@ -3254,31 +3098,6 @@ xlog_recover_do_icreate_pass2(
3254 return 0; 3098 return 0;
3255} 3099}
3256 3100
3257/*
3258 * Free up any resources allocated by the transaction
3259 *
3260 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
3261 */
3262STATIC void
3263xlog_recover_free_trans(
3264 struct xlog_recover *trans)
3265{
3266 xlog_recover_item_t *item, *n;
3267 int i;
3268
3269 list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
3270 /* Free the regions in the item. */
3271 list_del(&item->ri_list);
3272 for (i = 0; i < item->ri_cnt; i++)
3273 kmem_free(item->ri_buf[i].i_addr);
3274 /* Free the item itself */
3275 kmem_free(item->ri_buf);
3276 kmem_free(item);
3277 }
3278 /* Free the transaction recover structure */
3279 kmem_free(trans);
3280}
3281
3282STATIC void 3101STATIC void
3283xlog_recover_buffer_ra_pass2( 3102xlog_recover_buffer_ra_pass2(
3284 struct xlog *log, 3103 struct xlog *log,
@@ -3528,22 +3347,309 @@ out:
3528 if (!list_empty(&done_list)) 3347 if (!list_empty(&done_list))
3529 list_splice_init(&done_list, &trans->r_itemq); 3348 list_splice_init(&done_list, &trans->r_itemq);
3530 3349
3531 xlog_recover_free_trans(trans);
3532
3533 error2 = xfs_buf_delwri_submit(&buffer_list); 3350 error2 = xfs_buf_delwri_submit(&buffer_list);
3534 return error ? error : error2; 3351 return error ? error : error2;
3535} 3352}
3536 3353
3354STATIC void
3355xlog_recover_add_item(
3356 struct list_head *head)
3357{
3358 xlog_recover_item_t *item;
3359
3360 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
3361 INIT_LIST_HEAD(&item->ri_list);
3362 list_add_tail(&item->ri_list, head);
3363}
3364
3537STATIC int 3365STATIC int
3538xlog_recover_unmount_trans( 3366xlog_recover_add_to_cont_trans(
3539 struct xlog *log) 3367 struct xlog *log,
3368 struct xlog_recover *trans,
3369 xfs_caddr_t dp,
3370 int len)
3540{ 3371{
3541 /* Do nothing now */ 3372 xlog_recover_item_t *item;
3542 xfs_warn(log->l_mp, "%s: Unmount LR", __func__); 3373 xfs_caddr_t ptr, old_ptr;
3374 int old_len;
3375
3376 if (list_empty(&trans->r_itemq)) {
3377 /* finish copying rest of trans header */
3378 xlog_recover_add_item(&trans->r_itemq);
3379 ptr = (xfs_caddr_t) &trans->r_theader +
3380 sizeof(xfs_trans_header_t) - len;
3381 memcpy(ptr, dp, len);
3382 return 0;
3383 }
3384 /* take the tail entry */
3385 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
3386
3387 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
3388 old_len = item->ri_buf[item->ri_cnt-1].i_len;
3389
3390 ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
3391 memcpy(&ptr[old_len], dp, len);
3392 item->ri_buf[item->ri_cnt-1].i_len += len;
3393 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
3394 trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
3395 return 0;
3396}
3397
3398/*
3399 * The next region to add is the start of a new region. It could be
3400 * a whole region or it could be the first part of a new region. Because
3401 * of this, the assumption here is that the type and size fields of all
3402 * format structures fit into the first 32 bits of the structure.
3403 *
3404 * This works because all regions must be 32 bit aligned. Therefore, we
3405 * either have both fields or we have neither field. In the case we have
3406 * neither field, the data part of the region is zero length. We only have
3407 * a log_op_header and can throw away the header since a new one will appear
3408 * later. If we have at least 4 bytes, then we can determine how many regions
3409 * will appear in the current log item.
3410 */
3411STATIC int
3412xlog_recover_add_to_trans(
3413 struct xlog *log,
3414 struct xlog_recover *trans,
3415 xfs_caddr_t dp,
3416 int len)
3417{
3418 xfs_inode_log_format_t *in_f; /* any will do */
3419 xlog_recover_item_t *item;
3420 xfs_caddr_t ptr;
3421
3422 if (!len)
3423 return 0;
3424 if (list_empty(&trans->r_itemq)) {
3425 /* we need to catch log corruptions here */
3426 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
3427 xfs_warn(log->l_mp, "%s: bad header magic number",
3428 __func__);
3429 ASSERT(0);
3430 return -EIO;
3431 }
3432 if (len == sizeof(xfs_trans_header_t))
3433 xlog_recover_add_item(&trans->r_itemq);
3434 memcpy(&trans->r_theader, dp, len);
3435 return 0;
3436 }
3437
3438 ptr = kmem_alloc(len, KM_SLEEP);
3439 memcpy(ptr, dp, len);
3440 in_f = (xfs_inode_log_format_t *)ptr;
3441
3442 /* take the tail entry */
3443 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
3444 if (item->ri_total != 0 &&
3445 item->ri_total == item->ri_cnt) {
3446 /* tail item is in use, get a new one */
3447 xlog_recover_add_item(&trans->r_itemq);
3448 item = list_entry(trans->r_itemq.prev,
3449 xlog_recover_item_t, ri_list);
3450 }
3451
3452 if (item->ri_total == 0) { /* first region to be added */
3453 if (in_f->ilf_size == 0 ||
3454 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
3455 xfs_warn(log->l_mp,
3456 "bad number of regions (%d) in inode log format",
3457 in_f->ilf_size);
3458 ASSERT(0);
3459 kmem_free(ptr);
3460 return -EIO;
3461 }
3462
3463 item->ri_total = in_f->ilf_size;
3464 item->ri_buf =
3465 kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
3466 KM_SLEEP);
3467 }
3468 ASSERT(item->ri_total > item->ri_cnt);
3469 /* Description region is ri_buf[0] */
3470 item->ri_buf[item->ri_cnt].i_addr = ptr;
3471 item->ri_buf[item->ri_cnt].i_len = len;
3472 item->ri_cnt++;
3473 trace_xfs_log_recover_item_add(log, trans, item, 0);
3543 return 0; 3474 return 0;
3544} 3475}
3545 3476
3546/* 3477/*
3478 * Free up any resources allocated by the transaction
3479 *
3480 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
3481 */
3482STATIC void
3483xlog_recover_free_trans(
3484 struct xlog_recover *trans)
3485{
3486 xlog_recover_item_t *item, *n;
3487 int i;
3488
3489 list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
3490 /* Free the regions in the item. */
3491 list_del(&item->ri_list);
3492 for (i = 0; i < item->ri_cnt; i++)
3493 kmem_free(item->ri_buf[i].i_addr);
3494 /* Free the item itself */
3495 kmem_free(item->ri_buf);
3496 kmem_free(item);
3497 }
3498 /* Free the transaction recover structure */
3499 kmem_free(trans);
3500}
3501
3502/*
3503 * On error or completion, trans is freed.
3504 */
3505STATIC int
3506xlog_recovery_process_trans(
3507 struct xlog *log,
3508 struct xlog_recover *trans,
3509 xfs_caddr_t dp,
3510 unsigned int len,
3511 unsigned int flags,
3512 int pass)
3513{
3514 int error = 0;
3515 bool freeit = false;
3516
3517 /* mask off ophdr transaction container flags */
3518 flags &= ~XLOG_END_TRANS;
3519 if (flags & XLOG_WAS_CONT_TRANS)
3520 flags &= ~XLOG_CONTINUE_TRANS;
3521
3522 /*
3523 * Callees must not free the trans structure. We'll decide if we need to
3524 * free it or not based on the operation being done and it's result.
3525 */
3526 switch (flags) {
3527 /* expected flag values */
3528 case 0:
3529 case XLOG_CONTINUE_TRANS:
3530 error = xlog_recover_add_to_trans(log, trans, dp, len);
3531 break;
3532 case XLOG_WAS_CONT_TRANS:
3533 error = xlog_recover_add_to_cont_trans(log, trans, dp, len);
3534 break;
3535 case XLOG_COMMIT_TRANS:
3536 error = xlog_recover_commit_trans(log, trans, pass);
3537 /* success or fail, we are now done with this transaction. */
3538 freeit = true;
3539 break;
3540
3541 /* unexpected flag values */
3542 case XLOG_UNMOUNT_TRANS:
3543 /* just skip trans */
3544 xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
3545 freeit = true;
3546 break;
3547 case XLOG_START_TRANS:
3548 default:
3549 xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags);
3550 ASSERT(0);
3551 error = -EIO;
3552 break;
3553 }
3554 if (error || freeit)
3555 xlog_recover_free_trans(trans);
3556 return error;
3557}
3558
3559/*
3560 * Lookup the transaction recovery structure associated with the ID in the
3561 * current ophdr. If the transaction doesn't exist and the start flag is set in
3562 * the ophdr, then allocate a new transaction for future ID matches to find.
3563 * Either way, return what we found during the lookup - an existing transaction
3564 * or nothing.
3565 */
3566STATIC struct xlog_recover *
3567xlog_recover_ophdr_to_trans(
3568 struct hlist_head rhash[],
3569 struct xlog_rec_header *rhead,
3570 struct xlog_op_header *ohead)
3571{
3572 struct xlog_recover *trans;
3573 xlog_tid_t tid;
3574 struct hlist_head *rhp;
3575
3576 tid = be32_to_cpu(ohead->oh_tid);
3577 rhp = &rhash[XLOG_RHASH(tid)];
3578 hlist_for_each_entry(trans, rhp, r_list) {
3579 if (trans->r_log_tid == tid)
3580 return trans;
3581 }
3582
3583 /*
3584 * skip over non-start transaction headers - we could be
3585 * processing slack space before the next transaction starts
3586 */
3587 if (!(ohead->oh_flags & XLOG_START_TRANS))
3588 return NULL;
3589
3590 ASSERT(be32_to_cpu(ohead->oh_len) == 0);
3591
3592 /*
3593 * This is a new transaction so allocate a new recovery container to
3594 * hold the recovery ops that will follow.
3595 */
3596 trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP);
3597 trans->r_log_tid = tid;
3598 trans->r_lsn = be64_to_cpu(rhead->h_lsn);
3599 INIT_LIST_HEAD(&trans->r_itemq);
3600 INIT_HLIST_NODE(&trans->r_list);
3601 hlist_add_head(&trans->r_list, rhp);
3602
3603 /*
3604 * Nothing more to do for this ophdr. Items to be added to this new
3605 * transaction will be in subsequent ophdr containers.
3606 */
3607 return NULL;
3608}
3609
3610STATIC int
3611xlog_recover_process_ophdr(
3612 struct xlog *log,
3613 struct hlist_head rhash[],
3614 struct xlog_rec_header *rhead,
3615 struct xlog_op_header *ohead,
3616 xfs_caddr_t dp,
3617 xfs_caddr_t end,
3618 int pass)
3619{
3620 struct xlog_recover *trans;
3621 unsigned int len;
3622
3623 /* Do we understand who wrote this op? */
3624 if (ohead->oh_clientid != XFS_TRANSACTION &&
3625 ohead->oh_clientid != XFS_LOG) {
3626 xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
3627 __func__, ohead->oh_clientid);
3628 ASSERT(0);
3629 return -EIO;
3630 }
3631
3632 /*
3633 * Check the ophdr contains all the data it is supposed to contain.
3634 */
3635 len = be32_to_cpu(ohead->oh_len);
3636 if (dp + len > end) {
3637 xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len);
3638 WARN_ON(1);
3639 return -EIO;
3640 }
3641
3642 trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead);
3643 if (!trans) {
3644 /* nothing to do, so skip over this ophdr */
3645 return 0;
3646 }
3647
3648 return xlog_recovery_process_trans(log, trans, dp, len,
3649 ohead->oh_flags, pass);
3650}
3651
3652/*
3547 * There are two valid states of the r_state field. 0 indicates that the 3653 * There are two valid states of the r_state field. 0 indicates that the
3548 * transaction structure is in a normal state. We have either seen the 3654 * transaction structure is in a normal state. We have either seen the
3549 * start of the transaction or the last operation we added was not a partial 3655 * start of the transaction or the last operation we added was not a partial
@@ -3560,86 +3666,30 @@ xlog_recover_process_data(
3560 xfs_caddr_t dp, 3666 xfs_caddr_t dp,
3561 int pass) 3667 int pass)
3562{ 3668{
3563 xfs_caddr_t lp; 3669 struct xlog_op_header *ohead;
3670 xfs_caddr_t end;
3564 int num_logops; 3671 int num_logops;
3565 xlog_op_header_t *ohead;
3566 xlog_recover_t *trans;
3567 xlog_tid_t tid;
3568 int error; 3672 int error;
3569 unsigned long hash;
3570 uint flags;
3571 3673
3572 lp = dp + be32_to_cpu(rhead->h_len); 3674 end = dp + be32_to_cpu(rhead->h_len);
3573 num_logops = be32_to_cpu(rhead->h_num_logops); 3675 num_logops = be32_to_cpu(rhead->h_num_logops);
3574 3676
3575 /* check the log format matches our own - else we can't recover */ 3677 /* check the log format matches our own - else we can't recover */
3576 if (xlog_header_check_recover(log->l_mp, rhead)) 3678 if (xlog_header_check_recover(log->l_mp, rhead))
3577 return -EIO; 3679 return -EIO;
3578 3680
3579 while ((dp < lp) && num_logops) { 3681 while ((dp < end) && num_logops) {
3580 ASSERT(dp + sizeof(xlog_op_header_t) <= lp); 3682
3581 ohead = (xlog_op_header_t *)dp; 3683 ohead = (struct xlog_op_header *)dp;
3582 dp += sizeof(xlog_op_header_t); 3684 dp += sizeof(*ohead);
3583 if (ohead->oh_clientid != XFS_TRANSACTION && 3685 ASSERT(dp <= end);
3584 ohead->oh_clientid != XFS_LOG) { 3686
3585 xfs_warn(log->l_mp, "%s: bad clientid 0x%x", 3687 /* errors will abort recovery */
3586 __func__, ohead->oh_clientid); 3688 error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
3587 ASSERT(0); 3689 dp, end, pass);
3588 return -EIO; 3690 if (error)
3589 } 3691 return error;
3590 tid = be32_to_cpu(ohead->oh_tid); 3692
3591 hash = XLOG_RHASH(tid);
3592 trans = xlog_recover_find_tid(&rhash[hash], tid);
3593 if (trans == NULL) { /* not found; add new tid */
3594 if (ohead->oh_flags & XLOG_START_TRANS)
3595 xlog_recover_new_tid(&rhash[hash], tid,
3596 be64_to_cpu(rhead->h_lsn));
3597 } else {
3598 if (dp + be32_to_cpu(ohead->oh_len) > lp) {
3599 xfs_warn(log->l_mp, "%s: bad length 0x%x",
3600 __func__, be32_to_cpu(ohead->oh_len));
3601 WARN_ON(1);
3602 return -EIO;
3603 }
3604 flags = ohead->oh_flags & ~XLOG_END_TRANS;
3605 if (flags & XLOG_WAS_CONT_TRANS)
3606 flags &= ~XLOG_CONTINUE_TRANS;
3607 switch (flags) {
3608 case XLOG_COMMIT_TRANS:
3609 error = xlog_recover_commit_trans(log,
3610 trans, pass);
3611 break;
3612 case XLOG_UNMOUNT_TRANS:
3613 error = xlog_recover_unmount_trans(log);
3614 break;
3615 case XLOG_WAS_CONT_TRANS:
3616 error = xlog_recover_add_to_cont_trans(log,
3617 trans, dp,
3618 be32_to_cpu(ohead->oh_len));
3619 break;
3620 case XLOG_START_TRANS:
3621 xfs_warn(log->l_mp, "%s: bad transaction",
3622 __func__);
3623 ASSERT(0);
3624 error = -EIO;
3625 break;
3626 case 0:
3627 case XLOG_CONTINUE_TRANS:
3628 error = xlog_recover_add_to_trans(log, trans,
3629 dp, be32_to_cpu(ohead->oh_len));
3630 break;
3631 default:
3632 xfs_warn(log->l_mp, "%s: bad flag 0x%x",
3633 __func__, flags);
3634 ASSERT(0);
3635 error = -EIO;
3636 break;
3637 }
3638 if (error) {
3639 xlog_recover_free_trans(trans);
3640 return error;
3641 }
3642 }
3643 dp += be32_to_cpu(ohead->oh_len); 3693 dp += be32_to_cpu(ohead->oh_len);
3644 num_logops--; 3694 num_logops--;
3645 } 3695 }
@@ -4132,41 +4182,13 @@ xlog_do_recovery_pass(
4132 } 4182 }
4133 4183
4134 memset(rhash, 0, sizeof(rhash)); 4184 memset(rhash, 0, sizeof(rhash));
4135 if (tail_blk <= head_blk) { 4185 blk_no = tail_blk;
4136 for (blk_no = tail_blk; blk_no < head_blk; ) { 4186 if (tail_blk > head_blk) {
4137 error = xlog_bread(log, blk_no, hblks, hbp, &offset);
4138 if (error)
4139 goto bread_err2;
4140
4141 rhead = (xlog_rec_header_t *)offset;
4142 error = xlog_valid_rec_header(log, rhead, blk_no);
4143 if (error)
4144 goto bread_err2;
4145
4146 /* blocks in data section */
4147 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
4148 error = xlog_bread(log, blk_no + hblks, bblks, dbp,
4149 &offset);
4150 if (error)
4151 goto bread_err2;
4152
4153 error = xlog_unpack_data(rhead, offset, log);
4154 if (error)
4155 goto bread_err2;
4156
4157 error = xlog_recover_process_data(log,
4158 rhash, rhead, offset, pass);
4159 if (error)
4160 goto bread_err2;
4161 blk_no += bblks + hblks;
4162 }
4163 } else {
4164 /* 4187 /*
4165 * Perform recovery around the end of the physical log. 4188 * Perform recovery around the end of the physical log.
4166 * When the head is not on the same cycle number as the tail, 4189 * When the head is not on the same cycle number as the tail,
4167 * we can't do a sequential recovery as above. 4190 * we can't do a sequential recovery.
4168 */ 4191 */
4169 blk_no = tail_blk;
4170 while (blk_no < log->l_logBBsize) { 4192 while (blk_no < log->l_logBBsize) {
4171 /* 4193 /*
4172 * Check for header wrapping around physical end-of-log 4194 * Check for header wrapping around physical end-of-log
@@ -4280,34 +4302,35 @@ xlog_do_recovery_pass(
4280 4302
4281 ASSERT(blk_no >= log->l_logBBsize); 4303 ASSERT(blk_no >= log->l_logBBsize);
4282 blk_no -= log->l_logBBsize; 4304 blk_no -= log->l_logBBsize;
4305 }
4283 4306
4284 /* read first part of physical log */ 4307 /* read first part of physical log */
4285 while (blk_no < head_blk) { 4308 while (blk_no < head_blk) {
4286 error = xlog_bread(log, blk_no, hblks, hbp, &offset); 4309 error = xlog_bread(log, blk_no, hblks, hbp, &offset);
4287 if (error) 4310 if (error)
4288 goto bread_err2; 4311 goto bread_err2;
4289 4312
4290 rhead = (xlog_rec_header_t *)offset; 4313 rhead = (xlog_rec_header_t *)offset;
4291 error = xlog_valid_rec_header(log, rhead, blk_no); 4314 error = xlog_valid_rec_header(log, rhead, blk_no);
4292 if (error) 4315 if (error)
4293 goto bread_err2; 4316 goto bread_err2;
4294 4317
4295 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 4318 /* blocks in data section */
4296 error = xlog_bread(log, blk_no+hblks, bblks, dbp, 4319 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
4297 &offset); 4320 error = xlog_bread(log, blk_no+hblks, bblks, dbp,
4298 if (error) 4321 &offset);
4299 goto bread_err2; 4322 if (error)
4323 goto bread_err2;
4300 4324
4301 error = xlog_unpack_data(rhead, offset, log); 4325 error = xlog_unpack_data(rhead, offset, log);
4302 if (error) 4326 if (error)
4303 goto bread_err2; 4327 goto bread_err2;
4304 4328
4305 error = xlog_recover_process_data(log, rhash, 4329 error = xlog_recover_process_data(log, rhash,
4306 rhead, offset, pass); 4330 rhead, offset, pass);
4307 if (error) 4331 if (error)
4308 goto bread_err2; 4332 goto bread_err2;
4309 blk_no += bblks + hblks; 4333 blk_no += bblks + hblks;
4310 }
4311 } 4334 }
4312 4335
4313 bread_err2: 4336 bread_err2:
@@ -4427,16 +4450,12 @@ xlog_do_recover(
4427 XFS_BUF_UNASYNC(bp); 4450 XFS_BUF_UNASYNC(bp);
4428 bp->b_ops = &xfs_sb_buf_ops; 4451 bp->b_ops = &xfs_sb_buf_ops;
4429 4452
4430 if (XFS_FORCED_SHUTDOWN(log->l_mp)) { 4453 error = xfs_buf_submit_wait(bp);
4431 xfs_buf_relse(bp);
4432 return -EIO;
4433 }
4434
4435 xfs_buf_iorequest(bp);
4436 error = xfs_buf_iowait(bp);
4437 if (error) { 4454 if (error) {
4438 xfs_buf_ioerror_alert(bp, __func__); 4455 if (!XFS_FORCED_SHUTDOWN(log->l_mp)) {
4439 ASSERT(0); 4456 xfs_buf_ioerror_alert(bp, __func__);
4457 ASSERT(0);
4458 }
4440 xfs_buf_relse(bp); 4459 xfs_buf_relse(bp);
4441 return error; 4460 return error;
4442 } 4461 }
@@ -4509,6 +4528,18 @@ xlog_recover(
4509 return -EINVAL; 4528 return -EINVAL;
4510 } 4529 }
4511 4530
4531 /*
4532 * Delay log recovery if the debug hook is set. This is debug
4533 * instrumention to coordinate simulation of I/O failures with
4534 * log recovery.
4535 */
4536 if (xfs_globals.log_recovery_delay) {
4537 xfs_notice(log->l_mp,
4538 "Delaying log recovery for %d seconds.",
4539 xfs_globals.log_recovery_delay);
4540 msleep(xfs_globals.log_recovery_delay * 1000);
4541 }
4542
4512 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", 4543 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
4513 log->l_mp->m_logname ? log->l_mp->m_logname 4544 log->l_mp->m_logname ? log->l_mp->m_logname
4514 : "internal"); 4545 : "internal");
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index fbf0384a466f..51435dbce9c4 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -61,8 +61,6 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex);
61static int xfs_uuid_table_size; 61static int xfs_uuid_table_size;
62static uuid_t *xfs_uuid_table; 62static uuid_t *xfs_uuid_table;
63 63
64extern struct kset *xfs_kset;
65
66/* 64/*
67 * See if the UUID is unique among mounted XFS filesystems. 65 * See if the UUID is unique among mounted XFS filesystems.
68 * Mount fails if UUID is nil or a FS with the same UUID is already mounted. 66 * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
@@ -302,21 +300,15 @@ xfs_readsb(
302 * access to the superblock. 300 * access to the superblock.
303 */ 301 */
304reread: 302reread:
305 bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, 303 error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
306 BTOBB(sector_size), 0, buf_ops); 304 BTOBB(sector_size), 0, &bp, buf_ops);
307 if (!bp) { 305 if (error) {
308 if (loud)
309 xfs_warn(mp, "SB buffer read failed");
310 return -EIO;
311 }
312 if (bp->b_error) {
313 error = bp->b_error;
314 if (loud) 306 if (loud)
315 xfs_warn(mp, "SB validate failed with error %d.", error); 307 xfs_warn(mp, "SB validate failed with error %d.", error);
316 /* bad CRC means corrupted metadata */ 308 /* bad CRC means corrupted metadata */
317 if (error == -EFSBADCRC) 309 if (error == -EFSBADCRC)
318 error = -EFSCORRUPTED; 310 error = -EFSCORRUPTED;
319 goto release_buf; 311 return error;
320 } 312 }
321 313
322 /* 314 /*
@@ -546,40 +538,43 @@ xfs_set_inoalignment(xfs_mount_t *mp)
546 * Check that the data (and log if separate) is an ok size. 538 * Check that the data (and log if separate) is an ok size.
547 */ 539 */
548STATIC int 540STATIC int
549xfs_check_sizes(xfs_mount_t *mp) 541xfs_check_sizes(
542 struct xfs_mount *mp)
550{ 543{
551 xfs_buf_t *bp; 544 struct xfs_buf *bp;
552 xfs_daddr_t d; 545 xfs_daddr_t d;
546 int error;
553 547
554 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 548 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
555 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 549 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
556 xfs_warn(mp, "filesystem size mismatch detected"); 550 xfs_warn(mp, "filesystem size mismatch detected");
557 return -EFBIG; 551 return -EFBIG;
558 } 552 }
559 bp = xfs_buf_read_uncached(mp->m_ddev_targp, 553 error = xfs_buf_read_uncached(mp->m_ddev_targp,
560 d - XFS_FSS_TO_BB(mp, 1), 554 d - XFS_FSS_TO_BB(mp, 1),
561 XFS_FSS_TO_BB(mp, 1), 0, NULL); 555 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
562 if (!bp) { 556 if (error) {
563 xfs_warn(mp, "last sector read failed"); 557 xfs_warn(mp, "last sector read failed");
564 return -EIO; 558 return error;
565 } 559 }
566 xfs_buf_relse(bp); 560 xfs_buf_relse(bp);
567 561
568 if (mp->m_logdev_targp != mp->m_ddev_targp) { 562 if (mp->m_logdev_targp == mp->m_ddev_targp)
569 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 563 return 0;
570 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 564
571 xfs_warn(mp, "log size mismatch detected"); 565 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
572 return -EFBIG; 566 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
573 } 567 xfs_warn(mp, "log size mismatch detected");
574 bp = xfs_buf_read_uncached(mp->m_logdev_targp, 568 return -EFBIG;
569 }
570 error = xfs_buf_read_uncached(mp->m_logdev_targp,
575 d - XFS_FSB_TO_BB(mp, 1), 571 d - XFS_FSB_TO_BB(mp, 1),
576 XFS_FSB_TO_BB(mp, 1), 0, NULL); 572 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
577 if (!bp) { 573 if (error) {
578 xfs_warn(mp, "log device read failed"); 574 xfs_warn(mp, "log device read failed");
579 return -EIO; 575 return error;
580 }
581 xfs_buf_relse(bp);
582 } 576 }
577 xfs_buf_relse(bp);
583 return 0; 578 return 0;
584} 579}
585 580
@@ -729,7 +724,6 @@ xfs_mountfs(
729 724
730 xfs_set_maxicount(mp); 725 xfs_set_maxicount(mp);
731 726
732 mp->m_kobj.kobject.kset = xfs_kset;
733 error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); 727 error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
734 if (error) 728 if (error)
735 goto out; 729 goto out;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 1eb6f3df698c..30ecca3037e3 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -304,7 +304,8 @@ _xfs_mru_cache_reap(
304int 304int
305xfs_mru_cache_init(void) 305xfs_mru_cache_init(void)
306{ 306{
307 xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); 307 xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache",
308 WQ_MEM_RECLAIM|WQ_FREEZABLE, 1);
308 if (!xfs_mru_reap_wq) 309 if (!xfs_mru_reap_wq)
309 return -ENOMEM; 310 return -ENOMEM;
310 return 0; 311 return 0;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 10232102b4a6..d68f23021af3 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -434,6 +434,7 @@ xfs_qm_dquot_isolate(
434 struct list_head *item, 434 struct list_head *item,
435 spinlock_t *lru_lock, 435 spinlock_t *lru_lock,
436 void *arg) 436 void *arg)
437 __releases(lru_lock) __acquires(lru_lock)
437{ 438{
438 struct xfs_dquot *dqp = container_of(item, 439 struct xfs_dquot *dqp = container_of(item,
439 struct xfs_dquot, q_lru); 440 struct xfs_dquot, q_lru);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 909e143b87ae..e1175ea9b551 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -46,7 +46,7 @@
46 * Keeps track of a current summary block, so we don't keep reading 46 * Keeps track of a current summary block, so we don't keep reading
47 * it from the buffer cache. 47 * it from the buffer cache.
48 */ 48 */
49STATIC int /* error */ 49static int
50xfs_rtget_summary( 50xfs_rtget_summary(
51 xfs_mount_t *mp, /* file system mount structure */ 51 xfs_mount_t *mp, /* file system mount structure */
52 xfs_trans_t *tp, /* transaction pointer */ 52 xfs_trans_t *tp, /* transaction pointer */
@@ -56,60 +56,9 @@ xfs_rtget_summary(
56 xfs_fsblock_t *rsb, /* in/out: summary block number */ 56 xfs_fsblock_t *rsb, /* in/out: summary block number */
57 xfs_suminfo_t *sum) /* out: summary info for this block */ 57 xfs_suminfo_t *sum) /* out: summary info for this block */
58{ 58{
59 xfs_buf_t *bp; /* buffer for summary block */ 59 return xfs_rtmodify_summary_int(mp, tp, log, bbno, 0, rbpp, rsb, sum);
60 int error; /* error value */
61 xfs_fsblock_t sb; /* summary fsblock */
62 int so; /* index into the summary file */
63 xfs_suminfo_t *sp; /* pointer to returned data */
64
65 /*
66 * Compute entry number in the summary file.
67 */
68 so = XFS_SUMOFFS(mp, log, bbno);
69 /*
70 * Compute the block number in the summary file.
71 */
72 sb = XFS_SUMOFFSTOBLOCK(mp, so);
73 /*
74 * If we have an old buffer, and the block number matches, use that.
75 */
76 if (rbpp && *rbpp && *rsb == sb)
77 bp = *rbpp;
78 /*
79 * Otherwise we have to get the buffer.
80 */
81 else {
82 /*
83 * If there was an old one, get rid of it first.
84 */
85 if (rbpp && *rbpp)
86 xfs_trans_brelse(tp, *rbpp);
87 error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
88 if (error) {
89 return error;
90 }
91 /*
92 * Remember this buffer and block for the next call.
93 */
94 if (rbpp) {
95 *rbpp = bp;
96 *rsb = sb;
97 }
98 }
99 /*
100 * Point to the summary information & copy it out.
101 */
102 sp = XFS_SUMPTR(mp, bp, so);
103 *sum = *sp;
104 /*
105 * Drop the buffer if we're not asked to remember it.
106 */
107 if (!rbpp)
108 xfs_trans_brelse(tp, bp);
109 return 0;
110} 60}
111 61
112
113/* 62/*
114 * Return whether there are any free extents in the size range given 63 * Return whether there are any free extents in the size range given
115 * by low and high, for the bitmap block bbno. 64 * by low and high, for the bitmap block bbno.
@@ -972,16 +921,11 @@ xfs_growfs_rt(
972 /* 921 /*
973 * Read in the last block of the device, make sure it exists. 922 * Read in the last block of the device, make sure it exists.
974 */ 923 */
975 bp = xfs_buf_read_uncached(mp->m_rtdev_targp, 924 error = xfs_buf_read_uncached(mp->m_rtdev_targp,
976 XFS_FSB_TO_BB(mp, nrblocks - 1), 925 XFS_FSB_TO_BB(mp, nrblocks - 1),
977 XFS_FSB_TO_BB(mp, 1), 0, NULL); 926 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
978 if (!bp) 927 if (error)
979 return -EIO;
980 if (bp->b_error) {
981 error = bp->b_error;
982 xfs_buf_relse(bp);
983 return error; 928 return error;
984 }
985 xfs_buf_relse(bp); 929 xfs_buf_relse(bp);
986 930
987 /* 931 /*
@@ -1235,11 +1179,12 @@ xfs_rtallocate_extent(
1235 */ 1179 */
1236int /* error */ 1180int /* error */
1237xfs_rtmount_init( 1181xfs_rtmount_init(
1238 xfs_mount_t *mp) /* file system mount structure */ 1182 struct xfs_mount *mp) /* file system mount structure */
1239{ 1183{
1240 xfs_buf_t *bp; /* buffer for last block of subvolume */ 1184 struct xfs_buf *bp; /* buffer for last block of subvolume */
1241 xfs_daddr_t d; /* address of last block of subvolume */ 1185 struct xfs_sb *sbp; /* filesystem superblock copy in mount */
1242 xfs_sb_t *sbp; /* filesystem superblock copy in mount */ 1186 xfs_daddr_t d; /* address of last block of subvolume */
1187 int error;
1243 1188
1244 sbp = &mp->m_sb; 1189 sbp = &mp->m_sb;
1245 if (sbp->sb_rblocks == 0) 1190 if (sbp->sb_rblocks == 0)
@@ -1265,14 +1210,12 @@ xfs_rtmount_init(
1265 (unsigned long long) mp->m_sb.sb_rblocks); 1210 (unsigned long long) mp->m_sb.sb_rblocks);
1266 return -EFBIG; 1211 return -EFBIG;
1267 } 1212 }
1268 bp = xfs_buf_read_uncached(mp->m_rtdev_targp, 1213 error = xfs_buf_read_uncached(mp->m_rtdev_targp,
1269 d - XFS_FSB_TO_BB(mp, 1), 1214 d - XFS_FSB_TO_BB(mp, 1),
1270 XFS_FSB_TO_BB(mp, 1), 0, NULL); 1215 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
1271 if (!bp || bp->b_error) { 1216 if (error) {
1272 xfs_warn(mp, "realtime device size check failed"); 1217 xfs_warn(mp, "realtime device size check failed");
1273 if (bp) 1218 return error;
1274 xfs_buf_relse(bp);
1275 return -EIO;
1276 } 1219 }
1277 xfs_buf_relse(bp); 1220 xfs_buf_relse(bp);
1278 return 0; 1221 return 0;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index c642795324af..76c0a4a9bb17 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -111,6 +111,10 @@ int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
111 xfs_rtblock_t *rtblock); 111 xfs_rtblock_t *rtblock);
112int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, 112int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
113 xfs_rtblock_t start, xfs_extlen_t len, int val); 113 xfs_rtblock_t start, xfs_extlen_t len, int val);
114int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
115 int log, xfs_rtblock_t bbno, int delta,
116 xfs_buf_t **rbpp, xfs_fsblock_t *rsb,
117 xfs_suminfo_t *sum);
114int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, 118int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
115 xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp, 119 xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp,
116 xfs_fsblock_t *rsb); 120 xfs_fsblock_t *rsb);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index b194652033cd..9f622feda6a4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -47,6 +47,7 @@
47#include "xfs_dinode.h" 47#include "xfs_dinode.h"
48#include "xfs_filestream.h" 48#include "xfs_filestream.h"
49#include "xfs_quota.h" 49#include "xfs_quota.h"
50#include "xfs_sysfs.h"
50 51
51#include <linux/namei.h> 52#include <linux/namei.h>
52#include <linux/init.h> 53#include <linux/init.h>
@@ -61,7 +62,11 @@
61static const struct super_operations xfs_super_operations; 62static const struct super_operations xfs_super_operations;
62static kmem_zone_t *xfs_ioend_zone; 63static kmem_zone_t *xfs_ioend_zone;
63mempool_t *xfs_ioend_pool; 64mempool_t *xfs_ioend_pool;
64struct kset *xfs_kset; 65
66static struct kset *xfs_kset; /* top-level xfs sysfs dir */
67#ifdef DEBUG
68static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */
69#endif
65 70
66#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ 71#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
67#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ 72#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
@@ -838,32 +843,32 @@ xfs_init_mount_workqueues(
838 struct xfs_mount *mp) 843 struct xfs_mount *mp)
839{ 844{
840 mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", 845 mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
841 WQ_MEM_RECLAIM, 0, mp->m_fsname); 846 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
842 if (!mp->m_data_workqueue) 847 if (!mp->m_data_workqueue)
843 goto out; 848 goto out;
844 849
845 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", 850 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
846 WQ_MEM_RECLAIM, 0, mp->m_fsname); 851 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
847 if (!mp->m_unwritten_workqueue) 852 if (!mp->m_unwritten_workqueue)
848 goto out_destroy_data_iodone_queue; 853 goto out_destroy_data_iodone_queue;
849 854
850 mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", 855 mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
851 WQ_MEM_RECLAIM, 0, mp->m_fsname); 856 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
852 if (!mp->m_cil_workqueue) 857 if (!mp->m_cil_workqueue)
853 goto out_destroy_unwritten; 858 goto out_destroy_unwritten;
854 859
855 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 860 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
856 0, 0, mp->m_fsname); 861 WQ_FREEZABLE, 0, mp->m_fsname);
857 if (!mp->m_reclaim_workqueue) 862 if (!mp->m_reclaim_workqueue)
858 goto out_destroy_cil; 863 goto out_destroy_cil;
859 864
860 mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", 865 mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
861 0, 0, mp->m_fsname); 866 WQ_FREEZABLE, 0, mp->m_fsname);
862 if (!mp->m_log_workqueue) 867 if (!mp->m_log_workqueue)
863 goto out_destroy_reclaim; 868 goto out_destroy_reclaim;
864 869
865 mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", 870 mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
866 0, 0, mp->m_fsname); 871 WQ_FREEZABLE, 0, mp->m_fsname);
867 if (!mp->m_eofblocks_workqueue) 872 if (!mp->m_eofblocks_workqueue)
868 goto out_destroy_log; 873 goto out_destroy_log;
869 874
@@ -1406,6 +1411,7 @@ xfs_fs_fill_super(
1406 atomic_set(&mp->m_active_trans, 0); 1411 atomic_set(&mp->m_active_trans, 0);
1407 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 1412 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
1408 INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); 1413 INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
1414 mp->m_kobj.kobject.kset = xfs_kset;
1409 1415
1410 mp->m_super = sb; 1416 mp->m_super = sb;
1411 sb->s_fs_info = mp; 1417 sb->s_fs_info = mp;
@@ -1715,7 +1721,8 @@ xfs_init_workqueues(void)
1715 * AGs in all the filesystems mounted. Hence use the default large 1721 * AGs in all the filesystems mounted. Hence use the default large
1716 * max_active value for this workqueue. 1722 * max_active value for this workqueue.
1717 */ 1723 */
1718 xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0); 1724 xfs_alloc_wq = alloc_workqueue("xfsalloc",
1725 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
1719 if (!xfs_alloc_wq) 1726 if (!xfs_alloc_wq)
1720 return -ENOMEM; 1727 return -ENOMEM;
1721 1728
@@ -1768,9 +1775,16 @@ init_xfs_fs(void)
1768 goto out_sysctl_unregister;; 1775 goto out_sysctl_unregister;;
1769 } 1776 }
1770 1777
1771 error = xfs_qm_init(); 1778#ifdef DEBUG
1779 xfs_dbg_kobj.kobject.kset = xfs_kset;
1780 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
1772 if (error) 1781 if (error)
1773 goto out_kset_unregister; 1782 goto out_kset_unregister;
1783#endif
1784
1785 error = xfs_qm_init();
1786 if (error)
1787 goto out_remove_kobj;
1774 1788
1775 error = register_filesystem(&xfs_fs_type); 1789 error = register_filesystem(&xfs_fs_type);
1776 if (error) 1790 if (error)
@@ -1779,7 +1793,11 @@ init_xfs_fs(void)
1779 1793
1780 out_qm_exit: 1794 out_qm_exit:
1781 xfs_qm_exit(); 1795 xfs_qm_exit();
1796 out_remove_kobj:
1797#ifdef DEBUG
1798 xfs_sysfs_del(&xfs_dbg_kobj);
1782 out_kset_unregister: 1799 out_kset_unregister:
1800#endif
1783 kset_unregister(xfs_kset); 1801 kset_unregister(xfs_kset);
1784 out_sysctl_unregister: 1802 out_sysctl_unregister:
1785 xfs_sysctl_unregister(); 1803 xfs_sysctl_unregister();
@@ -1802,6 +1820,9 @@ exit_xfs_fs(void)
1802{ 1820{
1803 xfs_qm_exit(); 1821 xfs_qm_exit();
1804 unregister_filesystem(&xfs_fs_type); 1822 unregister_filesystem(&xfs_fs_type);
1823#ifdef DEBUG
1824 xfs_sysfs_del(&xfs_dbg_kobj);
1825#endif
1805 kset_unregister(xfs_kset); 1826 kset_unregister(xfs_kset);
1806 xfs_sysctl_unregister(); 1827 xfs_sysctl_unregister();
1807 xfs_cleanup_procfs(); 1828 xfs_cleanup_procfs();
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 6a944a2cd36f..02ae62a998e0 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -269,9 +269,11 @@ xfs_symlink(
269 /* 269 /*
270 * Check for ability to enter directory entry, if no space reserved. 270 * Check for ability to enter directory entry, if no space reserved.
271 */ 271 */
272 error = xfs_dir_canenter(tp, dp, link_name, resblks); 272 if (!resblks) {
273 if (error) 273 error = xfs_dir_canenter(tp, dp, link_name);
274 goto error_return; 274 if (error)
275 goto error_return;
276 }
275 /* 277 /*
276 * Initialize the bmap freelist prior to calling either 278 * Initialize the bmap freelist prior to calling either
277 * bmapi or the directory create code. 279 * bmapi or the directory create code.
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index bd8e157c20ef..ffef45375754 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -92,6 +92,11 @@ enum {
92 92
93extern xfs_param_t xfs_params; 93extern xfs_param_t xfs_params;
94 94
95struct xfs_globals {
96 int log_recovery_delay; /* log recovery delay (secs) */
97};
98extern struct xfs_globals xfs_globals;
99
95#ifdef CONFIG_SYSCTL 100#ifdef CONFIG_SYSCTL
96extern int xfs_sysctl_register(void); 101extern int xfs_sysctl_register(void);
97extern void xfs_sysctl_unregister(void); 102extern void xfs_sysctl_unregister(void);
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 9835139ce1ec..aa03670851d8 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -51,6 +51,80 @@ struct kobj_type xfs_mp_ktype = {
51 .release = xfs_sysfs_release, 51 .release = xfs_sysfs_release,
52}; 52};
53 53
54#ifdef DEBUG
55/* debug */
56
57STATIC ssize_t
58log_recovery_delay_store(
59 const char *buf,
60 size_t count,
61 void *data)
62{
63 int ret;
64 int val;
65
66 ret = kstrtoint(buf, 0, &val);
67 if (ret)
68 return ret;
69
70 if (val < 0 || val > 60)
71 return -EINVAL;
72
73 xfs_globals.log_recovery_delay = val;
74
75 return count;
76}
77
78STATIC ssize_t
79log_recovery_delay_show(
80 char *buf,
81 void *data)
82{
83 return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay);
84}
85XFS_SYSFS_ATTR_RW(log_recovery_delay);
86
87static struct attribute *xfs_dbg_attrs[] = {
88 ATTR_LIST(log_recovery_delay),
89 NULL,
90};
91
92STATIC ssize_t
93xfs_dbg_show(
94 struct kobject *kobject,
95 struct attribute *attr,
96 char *buf)
97{
98 struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
99
100 return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0;
101}
102
103STATIC ssize_t
104xfs_dbg_store(
105 struct kobject *kobject,
106 struct attribute *attr,
107 const char *buf,
108 size_t count)
109{
110 struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
111
112 return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0;
113}
114
115static struct sysfs_ops xfs_dbg_ops = {
116 .show = xfs_dbg_show,
117 .store = xfs_dbg_store,
118};
119
120struct kobj_type xfs_dbg_ktype = {
121 .release = xfs_sysfs_release,
122 .sysfs_ops = &xfs_dbg_ops,
123 .default_attrs = xfs_dbg_attrs,
124};
125
126#endif /* DEBUG */
127
54/* xlog */ 128/* xlog */
55 129
56STATIC ssize_t 130STATIC ssize_t
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index 54a2091183c0..240eee35f342 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -20,6 +20,7 @@
20#define __XFS_SYSFS_H__ 20#define __XFS_SYSFS_H__
21 21
22extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ 22extern struct kobj_type xfs_mp_ktype; /* xfs_mount */
23extern struct kobj_type xfs_dbg_ktype; /* debug */
23extern struct kobj_type xfs_log_ktype; /* xlog */ 24extern struct kobj_type xfs_log_ktype; /* xlog */
24 25
25static inline struct xfs_kobj * 26static inline struct xfs_kobj *
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 152f82782630..51372e34d988 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -349,7 +349,8 @@ DEFINE_BUF_EVENT(xfs_buf_free);
349DEFINE_BUF_EVENT(xfs_buf_hold); 349DEFINE_BUF_EVENT(xfs_buf_hold);
350DEFINE_BUF_EVENT(xfs_buf_rele); 350DEFINE_BUF_EVENT(xfs_buf_rele);
351DEFINE_BUF_EVENT(xfs_buf_iodone); 351DEFINE_BUF_EVENT(xfs_buf_iodone);
352DEFINE_BUF_EVENT(xfs_buf_iorequest); 352DEFINE_BUF_EVENT(xfs_buf_submit);
353DEFINE_BUF_EVENT(xfs_buf_submit_wait);
353DEFINE_BUF_EVENT(xfs_buf_bawrite); 354DEFINE_BUF_EVENT(xfs_buf_bawrite);
354DEFINE_BUF_EVENT(xfs_buf_lock); 355DEFINE_BUF_EVENT(xfs_buf_lock);
355DEFINE_BUF_EVENT(xfs_buf_lock_done); 356DEFINE_BUF_EVENT(xfs_buf_lock_done);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 96c898e7ac9a..e2b2216b1635 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -318,20 +318,10 @@ xfs_trans_read_buf_map(
318 XFS_BUF_READ(bp); 318 XFS_BUF_READ(bp);
319 bp->b_ops = ops; 319 bp->b_ops = ops;
320 320
321 /* 321 error = xfs_buf_submit_wait(bp);
322 * XXX(hch): clean up the error handling here to be less
323 * of a mess..
324 */
325 if (XFS_FORCED_SHUTDOWN(mp)) {
326 trace_xfs_bdstrat_shut(bp, _RET_IP_);
327 xfs_bioerror_relse(bp);
328 } else {
329 xfs_buf_iorequest(bp);
330 }
331
332 error = xfs_buf_iowait(bp);
333 if (error) { 322 if (error) {
334 xfs_buf_ioerror_alert(bp, __func__); 323 if (!XFS_FORCED_SHUTDOWN(mp))
324 xfs_buf_ioerror_alert(bp, __func__);
335 xfs_buf_relse(bp); 325 xfs_buf_relse(bp);
336 /* 326 /*
337 * We can gracefully recover from most read 327 * We can gracefully recover from most read
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 50c3f5614288..cdb4d86520e1 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -70,7 +70,7 @@ xfs_trans_ichgtime(
70 int flags) 70 int flags)
71{ 71{
72 struct inode *inode = VFS_I(ip); 72 struct inode *inode = VFS_I(ip);
73 timespec_t tv; 73 struct timespec tv;
74 74
75 ASSERT(tp); 75 ASSERT(tp);
76 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 76 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));