aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-18 13:17:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-18 13:17:37 -0400
commit962bf3eadfb62d1d15df59e43499ef82036ea878 (patch)
tree5bdd035489f537925e7329948de10d7a3281ba69
parent7d77879bfd5ab0bcd9eb33180224b27fda61a7cd (diff)
parent330033d697ed8d296fa52b5303db9d802ad901cc (diff)
Merge tag 'xfs-for-linus-3.15-rc2' of git://oss.sgi.com/xfs/xfs
Pull xfs bug fixes from Dave Chinner: "The fixes are for data corruption issues, memory corruption and regressions for changes merged in -rc1. Data corruption fixes: - fix a bunch of delayed allocation state mismatches - fix collapse/zero range bugs - fix a direct IO block mapping bug @ EOF Other fixes: - fix a use after free on metadata IO error - fix a use after free on IO error during unmount - fix an incorrect error sign on direct IO write errors - add missing O_TMPFILE inode security context initialisation" * tag 'xfs-for-linus-3.15-rc2' of git://oss.sgi.com/xfs/xfs: xfs: fix tmpfile/selinux deadlock and initialize security xfs: fix buffer use after free on IO error xfs: wrong error sign conversion during failed DIO writes xfs: unmount does not wait for shutdown during unmount xfs: collapse range is delalloc challenged xfs: don't map ranges that span EOF for direct IO xfs: zeroing space needs to punch delalloc blocks xfs: xfs_vm_write_end truncates too much on failure xfs: write failure beyond EOF truncates too much data xfs: kill buffers over failed write ranges properly
-rw-r--r--fs/xfs/xfs_aops.c51
-rw-r--r--fs/xfs/xfs_bmap.c17
-rw-r--r--fs/xfs/xfs_bmap_util.c13
-rw-r--r--fs/xfs/xfs_buf.c16
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_inode.c5
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_iops.c20
-rw-r--r--fs/xfs/xfs_log.c53
-rw-r--r--fs/xfs/xfs_trace.h1
10 files changed, 147 insertions, 33 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 75df77d09f75..0479c32c5eb1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1344,6 +1344,14 @@ __xfs_get_blocks(
1344 /* 1344 /*
1345 * If this is O_DIRECT or the mpage code calling tell them how large 1345 * If this is O_DIRECT or the mpage code calling tell them how large
1346 * the mapping is, so that we can avoid repeated get_blocks calls. 1346 * the mapping is, so that we can avoid repeated get_blocks calls.
1347 *
1348 * If the mapping spans EOF, then we have to break the mapping up as the
1349 * mapping for blocks beyond EOF must be marked new so that sub block
1350 * regions can be correctly zeroed. We can't do this for mappings within
1351 * EOF unless the mapping was just allocated or is unwritten, otherwise
1352 * the callers would overwrite existing data with zeros. Hence we have
1353 * to split the mapping into a range up to and including EOF, and a
1354 * second mapping for beyond EOF.
1347 */ 1355 */
1348 if (direct || size > (1 << inode->i_blkbits)) { 1356 if (direct || size > (1 << inode->i_blkbits)) {
1349 xfs_off_t mapping_size; 1357 xfs_off_t mapping_size;
@@ -1354,6 +1362,12 @@ __xfs_get_blocks(
1354 ASSERT(mapping_size > 0); 1362 ASSERT(mapping_size > 0);
1355 if (mapping_size > size) 1363 if (mapping_size > size)
1356 mapping_size = size; 1364 mapping_size = size;
1365 if (offset < i_size_read(inode) &&
1366 offset + mapping_size >= i_size_read(inode)) {
1367 /* limit mapping to block that spans EOF */
1368 mapping_size = roundup_64(i_size_read(inode) - offset,
1369 1 << inode->i_blkbits);
1370 }
1357 if (mapping_size > LONG_MAX) 1371 if (mapping_size > LONG_MAX)
1358 mapping_size = LONG_MAX; 1372 mapping_size = LONG_MAX;
1359 1373
@@ -1566,6 +1580,16 @@ xfs_vm_write_failed(
1566 1580
1567 xfs_vm_kill_delalloc_range(inode, block_offset, 1581 xfs_vm_kill_delalloc_range(inode, block_offset,
1568 block_offset + bh->b_size); 1582 block_offset + bh->b_size);
1583
1584 /*
1585 * This buffer does not contain data anymore. make sure anyone
1586 * who finds it knows that for certain.
1587 */
1588 clear_buffer_delay(bh);
1589 clear_buffer_uptodate(bh);
1590 clear_buffer_mapped(bh);
1591 clear_buffer_new(bh);
1592 clear_buffer_dirty(bh);
1569 } 1593 }
1570 1594
1571} 1595}
@@ -1599,12 +1623,21 @@ xfs_vm_write_begin(
1599 status = __block_write_begin(page, pos, len, xfs_get_blocks); 1623 status = __block_write_begin(page, pos, len, xfs_get_blocks);
1600 if (unlikely(status)) { 1624 if (unlikely(status)) {
1601 struct inode *inode = mapping->host; 1625 struct inode *inode = mapping->host;
1626 size_t isize = i_size_read(inode);
1602 1627
1603 xfs_vm_write_failed(inode, page, pos, len); 1628 xfs_vm_write_failed(inode, page, pos, len);
1604 unlock_page(page); 1629 unlock_page(page);
1605 1630
1606 if (pos + len > i_size_read(inode)) 1631 /*
1607 truncate_pagecache(inode, i_size_read(inode)); 1632 * If the write is beyond EOF, we only want to kill blocks
1633 * allocated in this write, not blocks that were previously
1634 * written successfully.
1635 */
1636 if (pos + len > isize) {
1637 ssize_t start = max_t(ssize_t, pos, isize);
1638
1639 truncate_pagecache_range(inode, start, pos + len);
1640 }
1608 1641
1609 page_cache_release(page); 1642 page_cache_release(page);
1610 page = NULL; 1643 page = NULL;
@@ -1615,9 +1648,12 @@ xfs_vm_write_begin(
1615} 1648}
1616 1649
1617/* 1650/*
1618 * On failure, we only need to kill delalloc blocks beyond EOF because they 1651 * On failure, we only need to kill delalloc blocks beyond EOF in the range of
1619 * will never be written. For blocks within EOF, generic_write_end() zeros them 1652 * this specific write because they will never be written. Previous writes
1620 * so they are safe to leave alone and be written with all the other valid data. 1653 * beyond EOF where block allocation succeeded do not need to be trashed, so
1654 * only new blocks from this write should be trashed. For blocks within
1655 * EOF, generic_write_end() zeros them so they are safe to leave alone and be
1656 * written with all the other valid data.
1621 */ 1657 */
1622STATIC int 1658STATIC int
1623xfs_vm_write_end( 1659xfs_vm_write_end(
@@ -1640,8 +1676,11 @@ xfs_vm_write_end(
1640 loff_t to = pos + len; 1676 loff_t to = pos + len;
1641 1677
1642 if (to > isize) { 1678 if (to > isize) {
1643 truncate_pagecache(inode, isize); 1679 /* only kill blocks in this write beyond EOF */
1680 if (pos > isize)
1681 isize = pos;
1644 xfs_vm_kill_delalloc_range(inode, isize, to); 1682 xfs_vm_kill_delalloc_range(inode, isize, to);
1683 truncate_pagecache_range(inode, isize, to);
1645 } 1684 }
1646 } 1685 }
1647 return ret; 1686 return ret;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 5b6092ef51ef..f0efc7e970ef 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5413,6 +5413,7 @@ xfs_bmap_shift_extents(
5413 int whichfork = XFS_DATA_FORK; 5413 int whichfork = XFS_DATA_FORK;
5414 int logflags; 5414 int logflags;
5415 xfs_filblks_t blockcount = 0; 5415 xfs_filblks_t blockcount = 0;
5416 int total_extents;
5416 5417
5417 if (unlikely(XFS_TEST_ERROR( 5418 if (unlikely(XFS_TEST_ERROR(
5418 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5419 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5429,7 +5430,6 @@ xfs_bmap_shift_extents(
5429 ASSERT(current_ext != NULL); 5430 ASSERT(current_ext != NULL);
5430 5431
5431 ifp = XFS_IFORK_PTR(ip, whichfork); 5432 ifp = XFS_IFORK_PTR(ip, whichfork);
5432
5433 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5433 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5434 /* Read in all the extents */ 5434 /* Read in all the extents */
5435 error = xfs_iread_extents(tp, ip, whichfork); 5435 error = xfs_iread_extents(tp, ip, whichfork);
@@ -5456,7 +5456,6 @@ xfs_bmap_shift_extents(
5456 5456
5457 /* We are going to change core inode */ 5457 /* We are going to change core inode */
5458 logflags = XFS_ILOG_CORE; 5458 logflags = XFS_ILOG_CORE;
5459
5460 if (ifp->if_flags & XFS_IFBROOT) { 5459 if (ifp->if_flags & XFS_IFBROOT) {
5461 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5460 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5462 cur->bc_private.b.firstblock = *firstblock; 5461 cur->bc_private.b.firstblock = *firstblock;
@@ -5467,8 +5466,14 @@ xfs_bmap_shift_extents(
5467 logflags |= XFS_ILOG_DEXT; 5466 logflags |= XFS_ILOG_DEXT;
5468 } 5467 }
5469 5468
5470 while (nexts++ < num_exts && 5469 /*
5471 *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) { 5470 * There may be delalloc extents in the data fork before the range we
5471 * are collapsing out, so we cannot
5472 * use the count of real extents here. Instead we have to calculate it
5473 * from the incore fork.
5474 */
5475 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5476 while (nexts++ < num_exts && *current_ext < total_extents) {
5472 5477
5473 gotp = xfs_iext_get_ext(ifp, *current_ext); 5478 gotp = xfs_iext_get_ext(ifp, *current_ext);
5474 xfs_bmbt_get_all(gotp, &got); 5479 xfs_bmbt_get_all(gotp, &got);
@@ -5556,10 +5561,11 @@ xfs_bmap_shift_extents(
5556 } 5561 }
5557 5562
5558 (*current_ext)++; 5563 (*current_ext)++;
5564 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5559 } 5565 }
5560 5566
5561 /* Check if we are done */ 5567 /* Check if we are done */
5562 if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork)) 5568 if (*current_ext == total_extents)
5563 *done = 1; 5569 *done = 1;
5564 5570
5565del_cursor: 5571del_cursor:
@@ -5568,6 +5574,5 @@ del_cursor:
5568 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5574 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5569 5575
5570 xfs_trans_log_inode(tp, ip, logflags); 5576 xfs_trans_log_inode(tp, ip, logflags);
5571
5572 return error; 5577 return error;
5573} 5578}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 01f6a646caa1..296160b8e78c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1418,6 +1418,8 @@ xfs_zero_file_space(
1418 xfs_off_t end_boundary; 1418 xfs_off_t end_boundary;
1419 int error; 1419 int error;
1420 1420
1421 trace_xfs_zero_file_space(ip);
1422
1421 granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 1423 granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
1422 1424
1423 /* 1425 /*
@@ -1432,9 +1434,18 @@ xfs_zero_file_space(
1432 ASSERT(end_boundary <= offset + len); 1434 ASSERT(end_boundary <= offset + len);
1433 1435
1434 if (start_boundary < end_boundary - 1) { 1436 if (start_boundary < end_boundary - 1) {
1435 /* punch out the page cache over the conversion range */ 1437 /*
1438 * punch out delayed allocation blocks and the page cache over
1439 * the conversion range
1440 */
1441 xfs_ilock(ip, XFS_ILOCK_EXCL);
1442 error = xfs_bmap_punch_delalloc_range(ip,
1443 XFS_B_TO_FSBT(mp, start_boundary),
1444 XFS_B_TO_FSB(mp, end_boundary - start_boundary));
1445 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1436 truncate_pagecache_range(VFS_I(ip), start_boundary, 1446 truncate_pagecache_range(VFS_I(ip), start_boundary,
1437 end_boundary - 1); 1447 end_boundary - 1);
1448
1438 /* convert the blocks */ 1449 /* convert the blocks */
1439 error = xfs_alloc_file_space(ip, start_boundary, 1450 error = xfs_alloc_file_space(ip, start_boundary,
1440 end_boundary - start_boundary - 1, 1451 end_boundary - start_boundary - 1,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 107f2fdfe41f..cb10a0aaab3a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1372,21 +1372,29 @@ xfs_buf_iorequest(
1372 xfs_buf_wait_unpin(bp); 1372 xfs_buf_wait_unpin(bp);
1373 xfs_buf_hold(bp); 1373 xfs_buf_hold(bp);
1374 1374
1375 /* Set the count to 1 initially, this will stop an I/O 1375 /*
1376 * Set the count to 1 initially, this will stop an I/O
1376 * completion callout which happens before we have started 1377 * completion callout which happens before we have started
1377 * all the I/O from calling xfs_buf_ioend too early. 1378 * all the I/O from calling xfs_buf_ioend too early.
1378 */ 1379 */
1379 atomic_set(&bp->b_io_remaining, 1); 1380 atomic_set(&bp->b_io_remaining, 1);
1380 _xfs_buf_ioapply(bp); 1381 _xfs_buf_ioapply(bp);
1381 _xfs_buf_ioend(bp, 1); 1382 /*
1383 * If _xfs_buf_ioapply failed, we'll get back here with
1384 * only the reference we took above. _xfs_buf_ioend will
1385 * drop it to zero, so we'd better not queue it for later,
1386 * or we'll free it before it's done.
1387 */
1388 _xfs_buf_ioend(bp, bp->b_error ? 0 : 1);
1382 1389
1383 xfs_buf_rele(bp); 1390 xfs_buf_rele(bp);
1384} 1391}
1385 1392
1386/* 1393/*
1387 * Waits for I/O to complete on the buffer supplied. It returns immediately if 1394 * Waits for I/O to complete on the buffer supplied. It returns immediately if
1388 * no I/O is pending or there is already a pending error on the buffer. It 1395 * no I/O is pending or there is already a pending error on the buffer, in which
1389 * returns the I/O error code, if any, or 0 if there was no error. 1396 * case nothing will ever complete. It returns the I/O error code, if any, or
1397 * 0 if there was no error.
1390 */ 1398 */
1391int 1399int
1392xfs_buf_iowait( 1400xfs_buf_iowait(
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 79e96ce98733..82afdcb33183 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -679,7 +679,7 @@ xfs_file_dio_aio_write(
679 goto out; 679 goto out;
680 680
681 if (mapping->nrpages) { 681 if (mapping->nrpages) {
682 ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 682 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
683 pos, -1); 683 pos, -1);
684 if (ret) 684 if (ret)
685 goto out; 685 goto out;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5e7a38fa6ee6..768087bedbac 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1334,7 +1334,8 @@ int
1334xfs_create_tmpfile( 1334xfs_create_tmpfile(
1335 struct xfs_inode *dp, 1335 struct xfs_inode *dp,
1336 struct dentry *dentry, 1336 struct dentry *dentry,
1337 umode_t mode) 1337 umode_t mode,
1338 struct xfs_inode **ipp)
1338{ 1339{
1339 struct xfs_mount *mp = dp->i_mount; 1340 struct xfs_mount *mp = dp->i_mount;
1340 struct xfs_inode *ip = NULL; 1341 struct xfs_inode *ip = NULL;
@@ -1402,7 +1403,6 @@ xfs_create_tmpfile(
1402 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 1403 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
1403 1404
1404 ip->i_d.di_nlink--; 1405 ip->i_d.di_nlink--;
1405 d_tmpfile(dentry, VFS_I(ip));
1406 error = xfs_iunlink(tp, ip); 1406 error = xfs_iunlink(tp, ip);
1407 if (error) 1407 if (error)
1408 goto out_trans_abort; 1408 goto out_trans_abort;
@@ -1415,6 +1415,7 @@ xfs_create_tmpfile(
1415 xfs_qm_dqrele(gdqp); 1415 xfs_qm_dqrele(gdqp);
1416 xfs_qm_dqrele(pdqp); 1416 xfs_qm_dqrele(pdqp);
1417 1417
1418 *ipp = ip;
1418 return 0; 1419 return 0;
1419 1420
1420 out_trans_abort: 1421 out_trans_abort:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 396cc1fafd0d..f2fcde52b66d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -334,7 +334,7 @@ int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
334int xfs_create(struct xfs_inode *dp, struct xfs_name *name, 334int xfs_create(struct xfs_inode *dp, struct xfs_name *name,
335 umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); 335 umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
336int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, 336int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
337 umode_t mode); 337 umode_t mode, struct xfs_inode **ipp);
338int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 338int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
339 struct xfs_inode *ip); 339 struct xfs_inode *ip);
340int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, 340int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 89b07e43ca28..ef1ca010f417 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1053,11 +1053,25 @@ xfs_vn_tmpfile(
1053 struct dentry *dentry, 1053 struct dentry *dentry,
1054 umode_t mode) 1054 umode_t mode)
1055{ 1055{
1056 int error; 1056 int error;
1057 struct xfs_inode *ip;
1058 struct inode *inode;
1057 1059
1058 error = xfs_create_tmpfile(XFS_I(dir), dentry, mode); 1060 error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
1061 if (unlikely(error))
1062 return -error;
1059 1063
1060 return -error; 1064 inode = VFS_I(ip);
1065
1066 error = xfs_init_security(inode, dir, &dentry->d_name);
1067 if (unlikely(error)) {
1068 iput(inode);
1069 return -error;
1070 }
1071
1072 d_tmpfile(dentry, inode);
1073
1074 return 0;
1061} 1075}
1062 1076
1063static const struct inode_operations xfs_inode_operations = { 1077static const struct inode_operations xfs_inode_operations = {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 8497a00e399d..08624dc67317 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1181,11 +1181,14 @@ xlog_iodone(xfs_buf_t *bp)
1181 /* log I/O is always issued ASYNC */ 1181 /* log I/O is always issued ASYNC */
1182 ASSERT(XFS_BUF_ISASYNC(bp)); 1182 ASSERT(XFS_BUF_ISASYNC(bp));
1183 xlog_state_done_syncing(iclog, aborted); 1183 xlog_state_done_syncing(iclog, aborted);
1184
1184 /* 1185 /*
1185 * do not reference the buffer (bp) here as we could race 1186 * drop the buffer lock now that we are done. Nothing references
1186 * with it being freed after writing the unmount record to the 1187 * the buffer after this, so an unmount waiting on this lock can now
1187 * log. 1188 * tear it down safely. As such, it is unsafe to reference the buffer
1189 * (bp) after the unlock as we could race with it being freed.
1188 */ 1190 */
1191 xfs_buf_unlock(bp);
1189} 1192}
1190 1193
1191/* 1194/*
@@ -1368,8 +1371,16 @@ xlog_alloc_log(
1368 bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0); 1371 bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);
1369 if (!bp) 1372 if (!bp)
1370 goto out_free_log; 1373 goto out_free_log;
1371 bp->b_iodone = xlog_iodone; 1374
1375 /*
1376 * The iclogbuf buffer locks are held over IO but we are not going to do
1377 * IO yet. Hence unlock the buffer so that the log IO path can grab it
1378 * when appropriately.
1379 */
1372 ASSERT(xfs_buf_islocked(bp)); 1380 ASSERT(xfs_buf_islocked(bp));
1381 xfs_buf_unlock(bp);
1382
1383 bp->b_iodone = xlog_iodone;
1373 log->l_xbuf = bp; 1384 log->l_xbuf = bp;
1374 1385
1375 spin_lock_init(&log->l_icloglock); 1386 spin_lock_init(&log->l_icloglock);
@@ -1398,6 +1409,9 @@ xlog_alloc_log(
1398 if (!bp) 1409 if (!bp)
1399 goto out_free_iclog; 1410 goto out_free_iclog;
1400 1411
1412 ASSERT(xfs_buf_islocked(bp));
1413 xfs_buf_unlock(bp);
1414
1401 bp->b_iodone = xlog_iodone; 1415 bp->b_iodone = xlog_iodone;
1402 iclog->ic_bp = bp; 1416 iclog->ic_bp = bp;
1403 iclog->ic_data = bp->b_addr; 1417 iclog->ic_data = bp->b_addr;
@@ -1422,7 +1436,6 @@ xlog_alloc_log(
1422 iclog->ic_callback_tail = &(iclog->ic_callback); 1436 iclog->ic_callback_tail = &(iclog->ic_callback);
1423 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; 1437 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
1424 1438
1425 ASSERT(xfs_buf_islocked(iclog->ic_bp));
1426 init_waitqueue_head(&iclog->ic_force_wait); 1439 init_waitqueue_head(&iclog->ic_force_wait);
1427 init_waitqueue_head(&iclog->ic_write_wait); 1440 init_waitqueue_head(&iclog->ic_write_wait);
1428 1441
@@ -1631,6 +1644,12 @@ xlog_cksum(
1631 * we transition the iclogs to IOERROR state *after* flushing all existing 1644 * we transition the iclogs to IOERROR state *after* flushing all existing
1632 * iclogs to disk. This is because we don't want anymore new transactions to be 1645 * iclogs to disk. This is because we don't want anymore new transactions to be
1633 * started or completed afterwards. 1646 * started or completed afterwards.
1647 *
1648 * We lock the iclogbufs here so that we can serialise against IO completion
1649 * during unmount. We might be processing a shutdown triggered during unmount,
1650 * and that can occur asynchronously to the unmount thread, and hence we need to
1651 * ensure that completes before tearing down the iclogbufs. Hence we need to
1652 * hold the buffer lock across the log IO to acheive that.
1634 */ 1653 */
1635STATIC int 1654STATIC int
1636xlog_bdstrat( 1655xlog_bdstrat(
@@ -1638,6 +1657,7 @@ xlog_bdstrat(
1638{ 1657{
1639 struct xlog_in_core *iclog = bp->b_fspriv; 1658 struct xlog_in_core *iclog = bp->b_fspriv;
1640 1659
1660 xfs_buf_lock(bp);
1641 if (iclog->ic_state & XLOG_STATE_IOERROR) { 1661 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1642 xfs_buf_ioerror(bp, EIO); 1662 xfs_buf_ioerror(bp, EIO);
1643 xfs_buf_stale(bp); 1663 xfs_buf_stale(bp);
@@ -1645,7 +1665,8 @@ xlog_bdstrat(
1645 /* 1665 /*
1646 * It would seem logical to return EIO here, but we rely on 1666 * It would seem logical to return EIO here, but we rely on
1647 * the log state machine to propagate I/O errors instead of 1667 * the log state machine to propagate I/O errors instead of
1648 * doing it here. 1668 * doing it here. Similarly, IO completion will unlock the
1669 * buffer, so we don't do it here.
1649 */ 1670 */
1650 return 0; 1671 return 0;
1651 } 1672 }
@@ -1847,14 +1868,28 @@ xlog_dealloc_log(
1847 xlog_cil_destroy(log); 1868 xlog_cil_destroy(log);
1848 1869
1849 /* 1870 /*
1850 * always need to ensure that the extra buffer does not point to memory 1871 * Cycle all the iclogbuf locks to make sure all log IO completion
1851 * owned by another log buffer before we free it. 1872 * is done before we tear down these buffers.
1852 */ 1873 */
1874 iclog = log->l_iclog;
1875 for (i = 0; i < log->l_iclog_bufs; i++) {
1876 xfs_buf_lock(iclog->ic_bp);
1877 xfs_buf_unlock(iclog->ic_bp);
1878 iclog = iclog->ic_next;
1879 }
1880
1881 /*
1882 * Always need to ensure that the extra buffer does not point to memory
1883 * owned by another log buffer before we free it. Also, cycle the lock
1884 * first to ensure we've completed IO on it.
1885 */
1886 xfs_buf_lock(log->l_xbuf);
1887 xfs_buf_unlock(log->l_xbuf);
1853 xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); 1888 xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
1854 xfs_buf_free(log->l_xbuf); 1889 xfs_buf_free(log->l_xbuf);
1855 1890
1856 iclog = log->l_iclog; 1891 iclog = log->l_iclog;
1857 for (i=0; i<log->l_iclog_bufs; i++) { 1892 for (i = 0; i < log->l_iclog_bufs; i++) {
1858 xfs_buf_free(iclog->ic_bp); 1893 xfs_buf_free(iclog->ic_bp);
1859 next_iclog = iclog->ic_next; 1894 next_iclog = iclog->ic_next;
1860 kmem_free(iclog); 1895 kmem_free(iclog);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a4ae41c179a8..65d8c793a25c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
603DEFINE_INODE_EVENT(xfs_inactive_symlink); 603DEFINE_INODE_EVENT(xfs_inactive_symlink);
604DEFINE_INODE_EVENT(xfs_alloc_file_space); 604DEFINE_INODE_EVENT(xfs_alloc_file_space);
605DEFINE_INODE_EVENT(xfs_free_file_space); 605DEFINE_INODE_EVENT(xfs_free_file_space);
606DEFINE_INODE_EVENT(xfs_zero_file_space);
606DEFINE_INODE_EVENT(xfs_collapse_file_space); 607DEFINE_INODE_EVENT(xfs_collapse_file_space);
607DEFINE_INODE_EVENT(xfs_readdir); 608DEFINE_INODE_EVENT(xfs_readdir);
608#ifdef CONFIG_XFS_POSIX_ACL 609#ifdef CONFIG_XFS_POSIX_ACL