diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-18 13:17:37 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-18 13:17:37 -0400 |
commit | 962bf3eadfb62d1d15df59e43499ef82036ea878 (patch) | |
tree | 5bdd035489f537925e7329948de10d7a3281ba69 | |
parent | 7d77879bfd5ab0bcd9eb33180224b27fda61a7cd (diff) | |
parent | 330033d697ed8d296fa52b5303db9d802ad901cc (diff) |
Merge tag 'xfs-for-linus-3.15-rc2' of git://oss.sgi.com/xfs/xfs
Pull xfs bug fixes from Dave Chinner:
"The fixes are for data corruption issues, memory corruption and
regressions for changes merged in -rc1.
Data corruption fixes:
- fix a bunch of delayed allocation state mismatches
- fix collapse/zero range bugs
- fix a direct IO block mapping bug @ EOF
Other fixes:
- fix a use after free on metadata IO error
- fix a use after free on IO error during unmount
- fix an incorrect error sign on direct IO write errors
- add missing O_TMPFILE inode security context initialisation"
* tag 'xfs-for-linus-3.15-rc2' of git://oss.sgi.com/xfs/xfs:
xfs: fix tmpfile/selinux deadlock and initialize security
xfs: fix buffer use after free on IO error
xfs: wrong error sign conversion during failed DIO writes
xfs: unmount does not wait for shutdown during unmount
xfs: collapse range is delalloc challenged
xfs: don't map ranges that span EOF for direct IO
xfs: zeroing space needs to punch delalloc blocks
xfs: xfs_vm_write_end truncates too much on failure
xfs: write failure beyond EOF truncates too much data
xfs: kill buffers over failed write ranges properly
-rw-r--r-- | fs/xfs/xfs_aops.c | 51 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.c | 17 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 16 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 20 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 53 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 1 |
10 files changed, 147 insertions, 33 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 75df77d09f75..0479c32c5eb1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -1344,6 +1344,14 @@ __xfs_get_blocks( | |||
1344 | /* | 1344 | /* |
1345 | * If this is O_DIRECT or the mpage code calling tell them how large | 1345 | * If this is O_DIRECT or the mpage code calling tell them how large |
1346 | * the mapping is, so that we can avoid repeated get_blocks calls. | 1346 | * the mapping is, so that we can avoid repeated get_blocks calls. |
1347 | * | ||
1348 | * If the mapping spans EOF, then we have to break the mapping up as the | ||
1349 | * mapping for blocks beyond EOF must be marked new so that sub block | ||
1350 | * regions can be correctly zeroed. We can't do this for mappings within | ||
1351 | * EOF unless the mapping was just allocated or is unwritten, otherwise | ||
1352 | * the callers would overwrite existing data with zeros. Hence we have | ||
1353 | * to split the mapping into a range up to and including EOF, and a | ||
1354 | * second mapping for beyond EOF. | ||
1347 | */ | 1355 | */ |
1348 | if (direct || size > (1 << inode->i_blkbits)) { | 1356 | if (direct || size > (1 << inode->i_blkbits)) { |
1349 | xfs_off_t mapping_size; | 1357 | xfs_off_t mapping_size; |
@@ -1354,6 +1362,12 @@ __xfs_get_blocks( | |||
1354 | ASSERT(mapping_size > 0); | 1362 | ASSERT(mapping_size > 0); |
1355 | if (mapping_size > size) | 1363 | if (mapping_size > size) |
1356 | mapping_size = size; | 1364 | mapping_size = size; |
1365 | if (offset < i_size_read(inode) && | ||
1366 | offset + mapping_size >= i_size_read(inode)) { | ||
1367 | /* limit mapping to block that spans EOF */ | ||
1368 | mapping_size = roundup_64(i_size_read(inode) - offset, | ||
1369 | 1 << inode->i_blkbits); | ||
1370 | } | ||
1357 | if (mapping_size > LONG_MAX) | 1371 | if (mapping_size > LONG_MAX) |
1358 | mapping_size = LONG_MAX; | 1372 | mapping_size = LONG_MAX; |
1359 | 1373 | ||
@@ -1566,6 +1580,16 @@ xfs_vm_write_failed( | |||
1566 | 1580 | ||
1567 | xfs_vm_kill_delalloc_range(inode, block_offset, | 1581 | xfs_vm_kill_delalloc_range(inode, block_offset, |
1568 | block_offset + bh->b_size); | 1582 | block_offset + bh->b_size); |
1583 | |||
1584 | /* | ||
1585 | * This buffer does not contain data anymore. make sure anyone | ||
1586 | * who finds it knows that for certain. | ||
1587 | */ | ||
1588 | clear_buffer_delay(bh); | ||
1589 | clear_buffer_uptodate(bh); | ||
1590 | clear_buffer_mapped(bh); | ||
1591 | clear_buffer_new(bh); | ||
1592 | clear_buffer_dirty(bh); | ||
1569 | } | 1593 | } |
1570 | 1594 | ||
1571 | } | 1595 | } |
@@ -1599,12 +1623,21 @@ xfs_vm_write_begin( | |||
1599 | status = __block_write_begin(page, pos, len, xfs_get_blocks); | 1623 | status = __block_write_begin(page, pos, len, xfs_get_blocks); |
1600 | if (unlikely(status)) { | 1624 | if (unlikely(status)) { |
1601 | struct inode *inode = mapping->host; | 1625 | struct inode *inode = mapping->host; |
1626 | size_t isize = i_size_read(inode); | ||
1602 | 1627 | ||
1603 | xfs_vm_write_failed(inode, page, pos, len); | 1628 | xfs_vm_write_failed(inode, page, pos, len); |
1604 | unlock_page(page); | 1629 | unlock_page(page); |
1605 | 1630 | ||
1606 | if (pos + len > i_size_read(inode)) | 1631 | /* |
1607 | truncate_pagecache(inode, i_size_read(inode)); | 1632 | * If the write is beyond EOF, we only want to kill blocks |
1633 | * allocated in this write, not blocks that were previously | ||
1634 | * written successfully. | ||
1635 | */ | ||
1636 | if (pos + len > isize) { | ||
1637 | ssize_t start = max_t(ssize_t, pos, isize); | ||
1638 | |||
1639 | truncate_pagecache_range(inode, start, pos + len); | ||
1640 | } | ||
1608 | 1641 | ||
1609 | page_cache_release(page); | 1642 | page_cache_release(page); |
1610 | page = NULL; | 1643 | page = NULL; |
@@ -1615,9 +1648,12 @@ xfs_vm_write_begin( | |||
1615 | } | 1648 | } |
1616 | 1649 | ||
1617 | /* | 1650 | /* |
1618 | * On failure, we only need to kill delalloc blocks beyond EOF because they | 1651 | * On failure, we only need to kill delalloc blocks beyond EOF in the range of |
1619 | * will never be written. For blocks within EOF, generic_write_end() zeros them | 1652 | * this specific write because they will never be written. Previous writes |
1620 | * so they are safe to leave alone and be written with all the other valid data. | 1653 | * beyond EOF where block allocation succeeded do not need to be trashed, so |
1654 | * only new blocks from this write should be trashed. For blocks within | ||
1655 | * EOF, generic_write_end() zeros them so they are safe to leave alone and be | ||
1656 | * written with all the other valid data. | ||
1621 | */ | 1657 | */ |
1622 | STATIC int | 1658 | STATIC int |
1623 | xfs_vm_write_end( | 1659 | xfs_vm_write_end( |
@@ -1640,8 +1676,11 @@ xfs_vm_write_end( | |||
1640 | loff_t to = pos + len; | 1676 | loff_t to = pos + len; |
1641 | 1677 | ||
1642 | if (to > isize) { | 1678 | if (to > isize) { |
1643 | truncate_pagecache(inode, isize); | 1679 | /* only kill blocks in this write beyond EOF */ |
1680 | if (pos > isize) | ||
1681 | isize = pos; | ||
1644 | xfs_vm_kill_delalloc_range(inode, isize, to); | 1682 | xfs_vm_kill_delalloc_range(inode, isize, to); |
1683 | truncate_pagecache_range(inode, isize, to); | ||
1645 | } | 1684 | } |
1646 | } | 1685 | } |
1647 | return ret; | 1686 | return ret; |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 5b6092ef51ef..f0efc7e970ef 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -5413,6 +5413,7 @@ xfs_bmap_shift_extents( | |||
5413 | int whichfork = XFS_DATA_FORK; | 5413 | int whichfork = XFS_DATA_FORK; |
5414 | int logflags; | 5414 | int logflags; |
5415 | xfs_filblks_t blockcount = 0; | 5415 | xfs_filblks_t blockcount = 0; |
5416 | int total_extents; | ||
5416 | 5417 | ||
5417 | if (unlikely(XFS_TEST_ERROR( | 5418 | if (unlikely(XFS_TEST_ERROR( |
5418 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | 5419 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && |
@@ -5429,7 +5430,6 @@ xfs_bmap_shift_extents( | |||
5429 | ASSERT(current_ext != NULL); | 5430 | ASSERT(current_ext != NULL); |
5430 | 5431 | ||
5431 | ifp = XFS_IFORK_PTR(ip, whichfork); | 5432 | ifp = XFS_IFORK_PTR(ip, whichfork); |
5432 | |||
5433 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | 5433 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { |
5434 | /* Read in all the extents */ | 5434 | /* Read in all the extents */ |
5435 | error = xfs_iread_extents(tp, ip, whichfork); | 5435 | error = xfs_iread_extents(tp, ip, whichfork); |
@@ -5456,7 +5456,6 @@ xfs_bmap_shift_extents( | |||
5456 | 5456 | ||
5457 | /* We are going to change core inode */ | 5457 | /* We are going to change core inode */ |
5458 | logflags = XFS_ILOG_CORE; | 5458 | logflags = XFS_ILOG_CORE; |
5459 | |||
5460 | if (ifp->if_flags & XFS_IFBROOT) { | 5459 | if (ifp->if_flags & XFS_IFBROOT) { |
5461 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); | 5460 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); |
5462 | cur->bc_private.b.firstblock = *firstblock; | 5461 | cur->bc_private.b.firstblock = *firstblock; |
@@ -5467,8 +5466,14 @@ xfs_bmap_shift_extents( | |||
5467 | logflags |= XFS_ILOG_DEXT; | 5466 | logflags |= XFS_ILOG_DEXT; |
5468 | } | 5467 | } |
5469 | 5468 | ||
5470 | while (nexts++ < num_exts && | 5469 | /* |
5471 | *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) { | 5470 | * There may be delalloc extents in the data fork before the range we |
5471 | * are collapsing out, so we cannot | ||
5472 | * use the count of real extents here. Instead we have to calculate it | ||
5473 | * from the incore fork. | ||
5474 | */ | ||
5475 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | ||
5476 | while (nexts++ < num_exts && *current_ext < total_extents) { | ||
5472 | 5477 | ||
5473 | gotp = xfs_iext_get_ext(ifp, *current_ext); | 5478 | gotp = xfs_iext_get_ext(ifp, *current_ext); |
5474 | xfs_bmbt_get_all(gotp, &got); | 5479 | xfs_bmbt_get_all(gotp, &got); |
@@ -5556,10 +5561,11 @@ xfs_bmap_shift_extents( | |||
5556 | } | 5561 | } |
5557 | 5562 | ||
5558 | (*current_ext)++; | 5563 | (*current_ext)++; |
5564 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | ||
5559 | } | 5565 | } |
5560 | 5566 | ||
5561 | /* Check if we are done */ | 5567 | /* Check if we are done */ |
5562 | if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork)) | 5568 | if (*current_ext == total_extents) |
5563 | *done = 1; | 5569 | *done = 1; |
5564 | 5570 | ||
5565 | del_cursor: | 5571 | del_cursor: |
@@ -5568,6 +5574,5 @@ del_cursor: | |||
5568 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | 5574 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); |
5569 | 5575 | ||
5570 | xfs_trans_log_inode(tp, ip, logflags); | 5576 | xfs_trans_log_inode(tp, ip, logflags); |
5571 | |||
5572 | return error; | 5577 | return error; |
5573 | } | 5578 | } |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 01f6a646caa1..296160b8e78c 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -1418,6 +1418,8 @@ xfs_zero_file_space( | |||
1418 | xfs_off_t end_boundary; | 1418 | xfs_off_t end_boundary; |
1419 | int error; | 1419 | int error; |
1420 | 1420 | ||
1421 | trace_xfs_zero_file_space(ip); | ||
1422 | |||
1421 | granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); | 1423 | granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); |
1422 | 1424 | ||
1423 | /* | 1425 | /* |
@@ -1432,9 +1434,18 @@ xfs_zero_file_space( | |||
1432 | ASSERT(end_boundary <= offset + len); | 1434 | ASSERT(end_boundary <= offset + len); |
1433 | 1435 | ||
1434 | if (start_boundary < end_boundary - 1) { | 1436 | if (start_boundary < end_boundary - 1) { |
1435 | /* punch out the page cache over the conversion range */ | 1437 | /* |
1438 | * punch out delayed allocation blocks and the page cache over | ||
1439 | * the conversion range | ||
1440 | */ | ||
1441 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1442 | error = xfs_bmap_punch_delalloc_range(ip, | ||
1443 | XFS_B_TO_FSBT(mp, start_boundary), | ||
1444 | XFS_B_TO_FSB(mp, end_boundary - start_boundary)); | ||
1445 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1436 | truncate_pagecache_range(VFS_I(ip), start_boundary, | 1446 | truncate_pagecache_range(VFS_I(ip), start_boundary, |
1437 | end_boundary - 1); | 1447 | end_boundary - 1); |
1448 | |||
1438 | /* convert the blocks */ | 1449 | /* convert the blocks */ |
1439 | error = xfs_alloc_file_space(ip, start_boundary, | 1450 | error = xfs_alloc_file_space(ip, start_boundary, |
1440 | end_boundary - start_boundary - 1, | 1451 | end_boundary - start_boundary - 1, |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 107f2fdfe41f..cb10a0aaab3a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -1372,21 +1372,29 @@ xfs_buf_iorequest( | |||
1372 | xfs_buf_wait_unpin(bp); | 1372 | xfs_buf_wait_unpin(bp); |
1373 | xfs_buf_hold(bp); | 1373 | xfs_buf_hold(bp); |
1374 | 1374 | ||
1375 | /* Set the count to 1 initially, this will stop an I/O | 1375 | /* |
1376 | * Set the count to 1 initially, this will stop an I/O | ||
1376 | * completion callout which happens before we have started | 1377 | * completion callout which happens before we have started |
1377 | * all the I/O from calling xfs_buf_ioend too early. | 1378 | * all the I/O from calling xfs_buf_ioend too early. |
1378 | */ | 1379 | */ |
1379 | atomic_set(&bp->b_io_remaining, 1); | 1380 | atomic_set(&bp->b_io_remaining, 1); |
1380 | _xfs_buf_ioapply(bp); | 1381 | _xfs_buf_ioapply(bp); |
1381 | _xfs_buf_ioend(bp, 1); | 1382 | /* |
1383 | * If _xfs_buf_ioapply failed, we'll get back here with | ||
1384 | * only the reference we took above. _xfs_buf_ioend will | ||
1385 | * drop it to zero, so we'd better not queue it for later, | ||
1386 | * or we'll free it before it's done. | ||
1387 | */ | ||
1388 | _xfs_buf_ioend(bp, bp->b_error ? 0 : 1); | ||
1382 | 1389 | ||
1383 | xfs_buf_rele(bp); | 1390 | xfs_buf_rele(bp); |
1384 | } | 1391 | } |
1385 | 1392 | ||
1386 | /* | 1393 | /* |
1387 | * Waits for I/O to complete on the buffer supplied. It returns immediately if | 1394 | * Waits for I/O to complete on the buffer supplied. It returns immediately if |
1388 | * no I/O is pending or there is already a pending error on the buffer. It | 1395 | * no I/O is pending or there is already a pending error on the buffer, in which |
1389 | * returns the I/O error code, if any, or 0 if there was no error. | 1396 | * case nothing will ever complete. It returns the I/O error code, if any, or |
1397 | * 0 if there was no error. | ||
1390 | */ | 1398 | */ |
1391 | int | 1399 | int |
1392 | xfs_buf_iowait( | 1400 | xfs_buf_iowait( |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 79e96ce98733..82afdcb33183 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -679,7 +679,7 @@ xfs_file_dio_aio_write( | |||
679 | goto out; | 679 | goto out; |
680 | 680 | ||
681 | if (mapping->nrpages) { | 681 | if (mapping->nrpages) { |
682 | ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | 682 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
683 | pos, -1); | 683 | pos, -1); |
684 | if (ret) | 684 | if (ret) |
685 | goto out; | 685 | goto out; |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5e7a38fa6ee6..768087bedbac 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1334,7 +1334,8 @@ int | |||
1334 | xfs_create_tmpfile( | 1334 | xfs_create_tmpfile( |
1335 | struct xfs_inode *dp, | 1335 | struct xfs_inode *dp, |
1336 | struct dentry *dentry, | 1336 | struct dentry *dentry, |
1337 | umode_t mode) | 1337 | umode_t mode, |
1338 | struct xfs_inode **ipp) | ||
1338 | { | 1339 | { |
1339 | struct xfs_mount *mp = dp->i_mount; | 1340 | struct xfs_mount *mp = dp->i_mount; |
1340 | struct xfs_inode *ip = NULL; | 1341 | struct xfs_inode *ip = NULL; |
@@ -1402,7 +1403,6 @@ xfs_create_tmpfile( | |||
1402 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); | 1403 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); |
1403 | 1404 | ||
1404 | ip->i_d.di_nlink--; | 1405 | ip->i_d.di_nlink--; |
1405 | d_tmpfile(dentry, VFS_I(ip)); | ||
1406 | error = xfs_iunlink(tp, ip); | 1406 | error = xfs_iunlink(tp, ip); |
1407 | if (error) | 1407 | if (error) |
1408 | goto out_trans_abort; | 1408 | goto out_trans_abort; |
@@ -1415,6 +1415,7 @@ xfs_create_tmpfile( | |||
1415 | xfs_qm_dqrele(gdqp); | 1415 | xfs_qm_dqrele(gdqp); |
1416 | xfs_qm_dqrele(pdqp); | 1416 | xfs_qm_dqrele(pdqp); |
1417 | 1417 | ||
1418 | *ipp = ip; | ||
1418 | return 0; | 1419 | return 0; |
1419 | 1420 | ||
1420 | out_trans_abort: | 1421 | out_trans_abort: |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 396cc1fafd0d..f2fcde52b66d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -334,7 +334,7 @@ int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, | |||
334 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, | 334 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, |
335 | umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); | 335 | umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); |
336 | int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, | 336 | int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, |
337 | umode_t mode); | 337 | umode_t mode, struct xfs_inode **ipp); |
338 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, | 338 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, |
339 | struct xfs_inode *ip); | 339 | struct xfs_inode *ip); |
340 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, | 340 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 89b07e43ca28..ef1ca010f417 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -1053,11 +1053,25 @@ xfs_vn_tmpfile( | |||
1053 | struct dentry *dentry, | 1053 | struct dentry *dentry, |
1054 | umode_t mode) | 1054 | umode_t mode) |
1055 | { | 1055 | { |
1056 | int error; | 1056 | int error; |
1057 | struct xfs_inode *ip; | ||
1058 | struct inode *inode; | ||
1057 | 1059 | ||
1058 | error = xfs_create_tmpfile(XFS_I(dir), dentry, mode); | 1060 | error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip); |
1061 | if (unlikely(error)) | ||
1062 | return -error; | ||
1059 | 1063 | ||
1060 | return -error; | 1064 | inode = VFS_I(ip); |
1065 | |||
1066 | error = xfs_init_security(inode, dir, &dentry->d_name); | ||
1067 | if (unlikely(error)) { | ||
1068 | iput(inode); | ||
1069 | return -error; | ||
1070 | } | ||
1071 | |||
1072 | d_tmpfile(dentry, inode); | ||
1073 | |||
1074 | return 0; | ||
1061 | } | 1075 | } |
1062 | 1076 | ||
1063 | static const struct inode_operations xfs_inode_operations = { | 1077 | static const struct inode_operations xfs_inode_operations = { |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 8497a00e399d..08624dc67317 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -1181,11 +1181,14 @@ xlog_iodone(xfs_buf_t *bp) | |||
1181 | /* log I/O is always issued ASYNC */ | 1181 | /* log I/O is always issued ASYNC */ |
1182 | ASSERT(XFS_BUF_ISASYNC(bp)); | 1182 | ASSERT(XFS_BUF_ISASYNC(bp)); |
1183 | xlog_state_done_syncing(iclog, aborted); | 1183 | xlog_state_done_syncing(iclog, aborted); |
1184 | |||
1184 | /* | 1185 | /* |
1185 | * do not reference the buffer (bp) here as we could race | 1186 | * drop the buffer lock now that we are done. Nothing references |
1186 | * with it being freed after writing the unmount record to the | 1187 | * the buffer after this, so an unmount waiting on this lock can now |
1187 | * log. | 1188 | * tear it down safely. As such, it is unsafe to reference the buffer |
1189 | * (bp) after the unlock as we could race with it being freed. | ||
1188 | */ | 1190 | */ |
1191 | xfs_buf_unlock(bp); | ||
1189 | } | 1192 | } |
1190 | 1193 | ||
1191 | /* | 1194 | /* |
@@ -1368,8 +1371,16 @@ xlog_alloc_log( | |||
1368 | bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0); | 1371 | bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0); |
1369 | if (!bp) | 1372 | if (!bp) |
1370 | goto out_free_log; | 1373 | goto out_free_log; |
1371 | bp->b_iodone = xlog_iodone; | 1374 | |
1375 | /* | ||
1376 | * The iclogbuf buffer locks are held over IO but we are not going to do | ||
1377 | * IO yet. Hence unlock the buffer so that the log IO path can grab it | ||
1378 | * when appropriately. | ||
1379 | */ | ||
1372 | ASSERT(xfs_buf_islocked(bp)); | 1380 | ASSERT(xfs_buf_islocked(bp)); |
1381 | xfs_buf_unlock(bp); | ||
1382 | |||
1383 | bp->b_iodone = xlog_iodone; | ||
1373 | log->l_xbuf = bp; | 1384 | log->l_xbuf = bp; |
1374 | 1385 | ||
1375 | spin_lock_init(&log->l_icloglock); | 1386 | spin_lock_init(&log->l_icloglock); |
@@ -1398,6 +1409,9 @@ xlog_alloc_log( | |||
1398 | if (!bp) | 1409 | if (!bp) |
1399 | goto out_free_iclog; | 1410 | goto out_free_iclog; |
1400 | 1411 | ||
1412 | ASSERT(xfs_buf_islocked(bp)); | ||
1413 | xfs_buf_unlock(bp); | ||
1414 | |||
1401 | bp->b_iodone = xlog_iodone; | 1415 | bp->b_iodone = xlog_iodone; |
1402 | iclog->ic_bp = bp; | 1416 | iclog->ic_bp = bp; |
1403 | iclog->ic_data = bp->b_addr; | 1417 | iclog->ic_data = bp->b_addr; |
@@ -1422,7 +1436,6 @@ xlog_alloc_log( | |||
1422 | iclog->ic_callback_tail = &(iclog->ic_callback); | 1436 | iclog->ic_callback_tail = &(iclog->ic_callback); |
1423 | iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; | 1437 | iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; |
1424 | 1438 | ||
1425 | ASSERT(xfs_buf_islocked(iclog->ic_bp)); | ||
1426 | init_waitqueue_head(&iclog->ic_force_wait); | 1439 | init_waitqueue_head(&iclog->ic_force_wait); |
1427 | init_waitqueue_head(&iclog->ic_write_wait); | 1440 | init_waitqueue_head(&iclog->ic_write_wait); |
1428 | 1441 | ||
@@ -1631,6 +1644,12 @@ xlog_cksum( | |||
1631 | * we transition the iclogs to IOERROR state *after* flushing all existing | 1644 | * we transition the iclogs to IOERROR state *after* flushing all existing |
1632 | * iclogs to disk. This is because we don't want anymore new transactions to be | 1645 | * iclogs to disk. This is because we don't want anymore new transactions to be |
1633 | * started or completed afterwards. | 1646 | * started or completed afterwards. |
1647 | * | ||
1648 | * We lock the iclogbufs here so that we can serialise against IO completion | ||
1649 | * during unmount. We might be processing a shutdown triggered during unmount, | ||
1650 | * and that can occur asynchronously to the unmount thread, and hence we need to | ||
1651 | * ensure that completes before tearing down the iclogbufs. Hence we need to | ||
1652 | * hold the buffer lock across the log IO to acheive that. | ||
1634 | */ | 1653 | */ |
1635 | STATIC int | 1654 | STATIC int |
1636 | xlog_bdstrat( | 1655 | xlog_bdstrat( |
@@ -1638,6 +1657,7 @@ xlog_bdstrat( | |||
1638 | { | 1657 | { |
1639 | struct xlog_in_core *iclog = bp->b_fspriv; | 1658 | struct xlog_in_core *iclog = bp->b_fspriv; |
1640 | 1659 | ||
1660 | xfs_buf_lock(bp); | ||
1641 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 1661 | if (iclog->ic_state & XLOG_STATE_IOERROR) { |
1642 | xfs_buf_ioerror(bp, EIO); | 1662 | xfs_buf_ioerror(bp, EIO); |
1643 | xfs_buf_stale(bp); | 1663 | xfs_buf_stale(bp); |
@@ -1645,7 +1665,8 @@ xlog_bdstrat( | |||
1645 | /* | 1665 | /* |
1646 | * It would seem logical to return EIO here, but we rely on | 1666 | * It would seem logical to return EIO here, but we rely on |
1647 | * the log state machine to propagate I/O errors instead of | 1667 | * the log state machine to propagate I/O errors instead of |
1648 | * doing it here. | 1668 | * doing it here. Similarly, IO completion will unlock the |
1669 | * buffer, so we don't do it here. | ||
1649 | */ | 1670 | */ |
1650 | return 0; | 1671 | return 0; |
1651 | } | 1672 | } |
@@ -1847,14 +1868,28 @@ xlog_dealloc_log( | |||
1847 | xlog_cil_destroy(log); | 1868 | xlog_cil_destroy(log); |
1848 | 1869 | ||
1849 | /* | 1870 | /* |
1850 | * always need to ensure that the extra buffer does not point to memory | 1871 | * Cycle all the iclogbuf locks to make sure all log IO completion |
1851 | * owned by another log buffer before we free it. | 1872 | * is done before we tear down these buffers. |
1852 | */ | 1873 | */ |
1874 | iclog = log->l_iclog; | ||
1875 | for (i = 0; i < log->l_iclog_bufs; i++) { | ||
1876 | xfs_buf_lock(iclog->ic_bp); | ||
1877 | xfs_buf_unlock(iclog->ic_bp); | ||
1878 | iclog = iclog->ic_next; | ||
1879 | } | ||
1880 | |||
1881 | /* | ||
1882 | * Always need to ensure that the extra buffer does not point to memory | ||
1883 | * owned by another log buffer before we free it. Also, cycle the lock | ||
1884 | * first to ensure we've completed IO on it. | ||
1885 | */ | ||
1886 | xfs_buf_lock(log->l_xbuf); | ||
1887 | xfs_buf_unlock(log->l_xbuf); | ||
1853 | xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); | 1888 | xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); |
1854 | xfs_buf_free(log->l_xbuf); | 1889 | xfs_buf_free(log->l_xbuf); |
1855 | 1890 | ||
1856 | iclog = log->l_iclog; | 1891 | iclog = log->l_iclog; |
1857 | for (i=0; i<log->l_iclog_bufs; i++) { | 1892 | for (i = 0; i < log->l_iclog_bufs; i++) { |
1858 | xfs_buf_free(iclog->ic_bp); | 1893 | xfs_buf_free(iclog->ic_bp); |
1859 | next_iclog = iclog->ic_next; | 1894 | next_iclog = iclog->ic_next; |
1860 | kmem_free(iclog); | 1895 | kmem_free(iclog); |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a4ae41c179a8..65d8c793a25c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink); | |||
603 | DEFINE_INODE_EVENT(xfs_inactive_symlink); | 603 | DEFINE_INODE_EVENT(xfs_inactive_symlink); |
604 | DEFINE_INODE_EVENT(xfs_alloc_file_space); | 604 | DEFINE_INODE_EVENT(xfs_alloc_file_space); |
605 | DEFINE_INODE_EVENT(xfs_free_file_space); | 605 | DEFINE_INODE_EVENT(xfs_free_file_space); |
606 | DEFINE_INODE_EVENT(xfs_zero_file_space); | ||
606 | DEFINE_INODE_EVENT(xfs_collapse_file_space); | 607 | DEFINE_INODE_EVENT(xfs_collapse_file_space); |
607 | DEFINE_INODE_EVENT(xfs_readdir); | 608 | DEFINE_INODE_EVENT(xfs_readdir); |
608 | #ifdef CONFIG_XFS_POSIX_ACL | 609 | #ifdef CONFIG_XFS_POSIX_ACL |