aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c101
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c37
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c1
-rw-r--r--fs/xfs/xfs_bmap.c85
-rw-r--r--fs/xfs/xfs_bmap.h5
-rw-r--r--fs/xfs/xfs_dfrag.c13
-rw-r--r--fs/xfs/xfs_error.c3
-rw-r--r--fs/xfs/xfs_error.h5
-rw-r--r--fs/xfs/xfs_filestream.c8
-rw-r--r--fs/xfs/xfs_inode_item.c31
-rw-r--r--fs/xfs/xfs_mount.c1
-rw-r--r--fs/xfs/xfs_quota.h20
-rw-r--r--fs/xfs/xfs_rename.c1
16 files changed, 222 insertions, 97 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c9af48fffcd7..691f61223ed6 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -934,7 +934,6 @@ xfs_aops_discard_page(
934 struct xfs_inode *ip = XFS_I(inode); 934 struct xfs_inode *ip = XFS_I(inode);
935 struct buffer_head *bh, *head; 935 struct buffer_head *bh, *head;
936 loff_t offset = page_offset(page); 936 loff_t offset = page_offset(page);
937 ssize_t len = 1 << inode->i_blkbits;
938 937
939 if (!xfs_is_delayed_page(page, IO_DELAY)) 938 if (!xfs_is_delayed_page(page, IO_DELAY))
940 goto out_invalidate; 939 goto out_invalidate;
@@ -949,58 +948,14 @@ xfs_aops_discard_page(
949 xfs_ilock(ip, XFS_ILOCK_EXCL); 948 xfs_ilock(ip, XFS_ILOCK_EXCL);
950 bh = head = page_buffers(page); 949 bh = head = page_buffers(page);
951 do { 950 do {
952 int done;
953 xfs_fileoff_t offset_fsb;
954 xfs_bmbt_irec_t imap;
955 int nimaps = 1;
956 int error; 951 int error;
957 xfs_fsblock_t firstblock; 952 xfs_fileoff_t start_fsb;
958 xfs_bmap_free_t flist;
959 953
960 if (!buffer_delay(bh)) 954 if (!buffer_delay(bh))
961 goto next_buffer; 955 goto next_buffer;
962 956
963 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 957 start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
964 958 error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
965 /*
966 * Map the range first and check that it is a delalloc extent
967 * before trying to unmap the range. Otherwise we will be
968 * trying to remove a real extent (which requires a
969 * transaction) or a hole, which is probably a bad idea...
970 */
971 error = xfs_bmapi(NULL, ip, offset_fsb, 1,
972 XFS_BMAPI_ENTIRE, NULL, 0, &imap,
973 &nimaps, NULL);
974
975 if (error) {
976 /* something screwed, just bail */
977 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
978 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
979 "page discard failed delalloc mapping lookup.");
980 }
981 break;
982 }
983 if (!nimaps) {
984 /* nothing there */
985 goto next_buffer;
986 }
987 if (imap.br_startblock != DELAYSTARTBLOCK) {
988 /* been converted, ignore */
989 goto next_buffer;
990 }
991 WARN_ON(imap.br_blockcount == 0);
992
993 /*
994 * Note: while we initialise the firstblock/flist pair, they
995 * should never be used because blocks should never be
996 * allocated or freed for a delalloc extent and hence we need
997 * don't cancel or finish them after the xfs_bunmapi() call.
998 */
999 xfs_bmap_init(&flist, &firstblock);
1000 error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
1001 &flist, &done);
1002
1003 ASSERT(!flist.xbf_count && !flist.xbf_first);
1004 if (error) { 959 if (error) {
1005 /* something screwed, just bail */ 960 /* something screwed, just bail */
1006 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 961 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
@@ -1010,7 +965,7 @@ xfs_aops_discard_page(
1010 break; 965 break;
1011 } 966 }
1012next_buffer: 967next_buffer:
1013 offset += len; 968 offset += 1 << inode->i_blkbits;
1014 969
1015 } while ((bh = bh->b_this_page) != head); 970 } while ((bh = bh->b_this_page) != head);
1016 971
@@ -1111,11 +1066,12 @@ xfs_vm_writepage(
1111 uptodate = 0; 1066 uptodate = 0;
1112 1067
1113 /* 1068 /*
1114 * A hole may still be marked uptodate because discard_buffer 1069 * set_page_dirty dirties all buffers in a page, independent
1115 * leaves the flag set. 1070 * of their state. The dirty state however is entirely
1071 * meaningless for holes (!mapped && uptodate), so skip
1072 * buffers covering holes here.
1116 */ 1073 */
1117 if (!buffer_mapped(bh) && buffer_uptodate(bh)) { 1074 if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
1118 ASSERT(!buffer_dirty(bh));
1119 imap_valid = 0; 1075 imap_valid = 0;
1120 continue; 1076 continue;
1121 } 1077 }
@@ -1504,11 +1460,42 @@ xfs_vm_write_failed(
1504 struct inode *inode = mapping->host; 1460 struct inode *inode = mapping->host;
1505 1461
1506 if (to > inode->i_size) { 1462 if (to > inode->i_size) {
1507 struct iattr ia = { 1463 /*
1508 .ia_valid = ATTR_SIZE | ATTR_FORCE, 1464 * punch out the delalloc blocks we have already allocated. We
1509 .ia_size = inode->i_size, 1465 * don't call xfs_setattr() to do this as we may be in the
1510 }; 1466 * middle of a multi-iovec write and so the vfs inode->i_size
1511 xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK); 1467 * will not match the xfs ip->i_size and so it will zero too
1468 * much. Hence we jus truncate the page cache to zero what is
1469 * necessary and punch the delalloc blocks directly.
1470 */
1471 struct xfs_inode *ip = XFS_I(inode);
1472 xfs_fileoff_t start_fsb;
1473 xfs_fileoff_t end_fsb;
1474 int error;
1475
1476 truncate_pagecache(inode, to, inode->i_size);
1477
1478 /*
1479 * Check if there are any blocks that are outside of i_size
1480 * that need to be trimmed back.
1481 */
1482 start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
1483 end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
1484 if (end_fsb <= start_fsb)
1485 return;
1486
1487 xfs_ilock(ip, XFS_ILOCK_EXCL);
1488 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
1489 end_fsb - start_fsb);
1490 if (error) {
1491 /* something screwed, just bail */
1492 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1493 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
1494 "xfs_vm_write_failed: unable to clean up ino %lld",
1495 ip->i_ino);
1496 }
1497 }
1498 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1512 } 1499 }
1513} 1500}
1514 1501
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 63fd2c07cb57..4c5deb6e9e31 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -488,29 +488,16 @@ found:
488 spin_unlock(&pag->pag_buf_lock); 488 spin_unlock(&pag->pag_buf_lock);
489 xfs_perag_put(pag); 489 xfs_perag_put(pag);
490 490
491 /* Attempt to get the semaphore without sleeping, 491 if (xfs_buf_cond_lock(bp)) {
492 * if this does not work then we need to drop the 492 /* failed, so wait for the lock if requested. */
493 * spinlock and do a hard attempt on the semaphore.
494 */
495 if (down_trylock(&bp->b_sema)) {
496 if (!(flags & XBF_TRYLOCK)) { 493 if (!(flags & XBF_TRYLOCK)) {
497 /* wait for buffer ownership */
498 xfs_buf_lock(bp); 494 xfs_buf_lock(bp);
499 XFS_STATS_INC(xb_get_locked_waited); 495 XFS_STATS_INC(xb_get_locked_waited);
500 } else { 496 } else {
501 /* We asked for a trylock and failed, no need
502 * to look at file offset and length here, we
503 * know that this buffer at least overlaps our
504 * buffer and is locked, therefore our buffer
505 * either does not exist, or is this buffer.
506 */
507 xfs_buf_rele(bp); 497 xfs_buf_rele(bp);
508 XFS_STATS_INC(xb_busy_locked); 498 XFS_STATS_INC(xb_busy_locked);
509 return NULL; 499 return NULL;
510 } 500 }
511 } else {
512 /* trylock worked */
513 XB_SET_OWNER(bp);
514 } 501 }
515 502
516 if (bp->b_flags & XBF_STALE) { 503 if (bp->b_flags & XBF_STALE) {
@@ -876,10 +863,18 @@ xfs_buf_rele(
876 */ 863 */
877 864
878/* 865/*
879 * Locks a buffer object, if it is not already locked. 866 * Locks a buffer object, if it is not already locked. Note that this in
880 * Note that this in no way locks the underlying pages, so it is only 867 * no way locks the underlying pages, so it is only useful for
881 * useful for synchronizing concurrent use of buffer objects, not for 868 * synchronizing concurrent use of buffer objects, not for synchronizing
882 * synchronizing independent access to the underlying pages. 869 * independent access to the underlying pages.
870 *
871 * If we come across a stale, pinned, locked buffer, we know that we are
872 * being asked to lock a buffer that has been reallocated. Because it is
873 * pinned, we know that the log has not been pushed to disk and hence it
874 * will still be locked. Rather than continuing to have trylock attempts
875 * fail until someone else pushes the log, push it ourselves before
876 * returning. This means that the xfsaild will not get stuck trying
877 * to push on stale inode buffers.
883 */ 878 */
884int 879int
885xfs_buf_cond_lock( 880xfs_buf_cond_lock(
@@ -890,6 +885,8 @@ xfs_buf_cond_lock(
890 locked = down_trylock(&bp->b_sema) == 0; 885 locked = down_trylock(&bp->b_sema) == 0;
891 if (locked) 886 if (locked)
892 XB_SET_OWNER(bp); 887 XB_SET_OWNER(bp);
888 else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
889 xfs_log_force(bp->b_target->bt_mount, 0);
893 890
894 trace_xfs_buf_cond_lock(bp, _RET_IP_); 891 trace_xfs_buf_cond_lock(bp, _RET_IP_);
895 return locked ? 0 : -EBUSY; 892 return locked ? 0 : -EBUSY;
@@ -1781,7 +1778,6 @@ xfs_buf_delwri_split(
1781 INIT_LIST_HEAD(list); 1778 INIT_LIST_HEAD(list);
1782 spin_lock(dwlk); 1779 spin_lock(dwlk);
1783 list_for_each_entry_safe(bp, n, dwq, b_list) { 1780 list_for_each_entry_safe(bp, n, dwq, b_list) {
1784 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1785 ASSERT(bp->b_flags & XBF_DELWRI); 1781 ASSERT(bp->b_flags & XBF_DELWRI);
1786 1782
1787 if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { 1783 if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1795,6 +1791,7 @@ xfs_buf_delwri_split(
1795 _XBF_RUN_QUEUES); 1791 _XBF_RUN_QUEUES);
1796 bp->b_flags |= XBF_WRITE; 1792 bp->b_flags |= XBF_WRITE;
1797 list_move_tail(&bp->b_list, list); 1793 list_move_tail(&bp->b_list, list);
1794 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1798 } else 1795 } else
1799 skipped++; 1796 skipped++;
1800 } 1797 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 2ea238f6d38e..ad442d9e392e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -416,7 +416,7 @@ xfs_attrlist_by_handle(
416 if (IS_ERR(dentry)) 416 if (IS_ERR(dentry))
417 return PTR_ERR(dentry); 417 return PTR_ERR(dentry);
418 418
419 kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); 419 kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
420 if (!kbuf) 420 if (!kbuf)
421 goto out_dput; 421 goto out_dput;
422 422
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 96107efc0c61..94d5fd6a2973 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -762,7 +762,8 @@ xfs_setup_inode(
762 inode->i_state = I_NEW; 762 inode->i_state = I_NEW;
763 763
764 inode_sb_list_add(inode); 764 inode_sb_list_add(inode);
765 insert_inode_hash(inode); 765 /* make the inode look hashed for the writeback code */
766 hlist_add_fake(&inode->i_hash);
766 767
767 inode->i_mode = ip->i_d.di_mode; 768 inode->i_mode = ip->i_d.di_mode;
768 inode->i_nlink = ip->i_d.di_nlink; 769 inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 7465a7ffc4fd..c115dd5e95a8 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -353,9 +353,6 @@ xfs_parseargs(
353 mp->m_qflags &= ~XFS_OQUOTA_ENFD; 353 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
354 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { 354 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
355 mp->m_flags |= XFS_MOUNT_DELAYLOG; 355 mp->m_flags |= XFS_MOUNT_DELAYLOG;
356 cmn_err(CE_WARN,
357 "Enabling EXPERIMENTAL delayed logging feature "
358 "- use at your own risk.\n");
359 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { 356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
360 mp->m_flags &= ~XFS_MOUNT_DELAYLOG; 357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
361 } else if (!strcmp(this_char, "ihashsize")) { 358 } else if (!strcmp(this_char, "ihashsize")) {
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 37d33254981d..afb0d7cfad1c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -853,6 +853,7 @@ restart:
853 if (trylock) { 853 if (trylock) {
854 if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { 854 if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
855 skipped++; 855 skipped++;
856 xfs_perag_put(pag);
856 continue; 857 continue;
857 } 858 }
858 first_index = pag->pag_ici_reclaim_cursor; 859 first_index = pag->pag_ici_reclaim_cursor;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8abd12e32e13..4111cd3966c7 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5471,8 +5471,13 @@ xfs_getbmap(
5471 if (error) 5471 if (error)
5472 goto out_unlock_iolock; 5472 goto out_unlock_iolock;
5473 } 5473 }
5474 5474 /*
5475 ASSERT(ip->i_delayed_blks == 0); 5475 * even after flushing the inode, there can still be delalloc
5476 * blocks on the inode beyond EOF due to speculative
5477 * preallocation. These are not removed until the release
5478 * function is called or the inode is inactivated. Hence we
5479 * cannot assert here that ip->i_delayed_blks == 0.
5480 */
5476 } 5481 }
5477 5482
5478 lock = xfs_ilock_map_shared(ip); 5483 lock = xfs_ilock_map_shared(ip);
@@ -6070,3 +6075,79 @@ xfs_bmap_disk_count_leaves(
6070 *count += xfs_bmbt_disk_get_blockcount(frp); 6075 *count += xfs_bmbt_disk_get_blockcount(frp);
6071 } 6076 }
6072} 6077}
6078
6079/*
6080 * dead simple method of punching delalyed allocation blocks from a range in
6081 * the inode. Walks a block at a time so will be slow, but is only executed in
6082 * rare error cases so the overhead is not critical. This will alays punch out
6083 * both the start and end blocks, even if the ranges only partially overlap
6084 * them, so it is up to the caller to ensure that partial blocks are not
6085 * passed in.
6086 */
6087int
6088xfs_bmap_punch_delalloc_range(
6089 struct xfs_inode *ip,
6090 xfs_fileoff_t start_fsb,
6091 xfs_fileoff_t length)
6092{
6093 xfs_fileoff_t remaining = length;
6094 int error = 0;
6095
6096 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
6097
6098 do {
6099 int done;
6100 xfs_bmbt_irec_t imap;
6101 int nimaps = 1;
6102 xfs_fsblock_t firstblock;
6103 xfs_bmap_free_t flist;
6104
6105 /*
6106 * Map the range first and check that it is a delalloc extent
6107 * before trying to unmap the range. Otherwise we will be
6108 * trying to remove a real extent (which requires a
6109 * transaction) or a hole, which is probably a bad idea...
6110 */
6111 error = xfs_bmapi(NULL, ip, start_fsb, 1,
6112 XFS_BMAPI_ENTIRE, NULL, 0, &imap,
6113 &nimaps, NULL);
6114
6115 if (error) {
6116 /* something screwed, just bail */
6117 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
6118 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
6119 "Failed delalloc mapping lookup ino %lld fsb %lld.",
6120 ip->i_ino, start_fsb);
6121 }
6122 break;
6123 }
6124 if (!nimaps) {
6125 /* nothing there */
6126 goto next_block;
6127 }
6128 if (imap.br_startblock != DELAYSTARTBLOCK) {
6129 /* been converted, ignore */
6130 goto next_block;
6131 }
6132 WARN_ON(imap.br_blockcount == 0);
6133
6134 /*
6135 * Note: while we initialise the firstblock/flist pair, they
6136 * should never be used because blocks should never be
6137 * allocated or freed for a delalloc extent and hence we need
6138 * don't cancel or finish them after the xfs_bunmapi() call.
6139 */
6140 xfs_bmap_init(&flist, &firstblock);
6141 error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
6142 &flist, &done);
6143 if (error)
6144 break;
6145
6146 ASSERT(!flist.xbf_count && !flist.xbf_first);
6147next_block:
6148 start_fsb++;
6149 remaining--;
6150 } while(remaining > 0);
6151
6152 return error;
6153}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 71ec9b6ecdfc..3651191daea1 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -394,6 +394,11 @@ xfs_bmap_count_blocks(
394 int whichfork, 394 int whichfork,
395 int *count); 395 int *count);
396 396
397int
398xfs_bmap_punch_delalloc_range(
399 struct xfs_inode *ip,
400 xfs_fileoff_t start_fsb,
401 xfs_fileoff_t length);
397#endif /* __KERNEL__ */ 402#endif /* __KERNEL__ */
398 403
399#endif /* __XFS_BMAP_H__ */ 404#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3b9582c60a22..e60490bc00a6 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -377,6 +377,19 @@ xfs_swap_extents(
377 ip->i_d.di_format = tip->i_d.di_format; 377 ip->i_d.di_format = tip->i_d.di_format;
378 tip->i_d.di_format = tmp; 378 tip->i_d.di_format = tmp;
379 379
380 /*
381 * The extents in the source inode could still contain speculative
382 * preallocation beyond EOF (e.g. the file is open but not modified
383 * while defrag is in progress). In that case, we need to copy over the
384 * number of delalloc blocks the data fork in the source inode is
385 * tracking beyond EOF so that when the fork is truncated away when the
386 * temporary inode is unlinked we don't underrun the i_delayed_blks
387 * counter on that inode.
388 */
389 ASSERT(tip->i_delayed_blks == 0);
390 tip->i_delayed_blks = ip->i_delayed_blks;
391 ip->i_delayed_blks = 0;
392
380 ilf_fields = XFS_ILOG_CORE; 393 ilf_fields = XFS_ILOG_CORE;
381 394
382 switch(ip->i_d.di_format) { 395 switch(ip->i_d.di_format) {
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ed9990267661..c78cc6a3d87c 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -58,6 +58,7 @@ xfs_error_trap(int e)
58int xfs_etest[XFS_NUM_INJECT_ERROR]; 58int xfs_etest[XFS_NUM_INJECT_ERROR];
59int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; 59int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
60char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; 60char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
61int xfs_error_test_active;
61 62
62int 63int
63xfs_error_test(int error_tag, int *fsidp, char *expression, 64xfs_error_test(int error_tag, int *fsidp, char *expression,
@@ -108,6 +109,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
108 len = strlen(mp->m_fsname); 109 len = strlen(mp->m_fsname);
109 xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP); 110 xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP);
110 strcpy(xfs_etest_fsname[i], mp->m_fsname); 111 strcpy(xfs_etest_fsname[i], mp->m_fsname);
112 xfs_error_test_active++;
111 return 0; 113 return 0;
112 } 114 }
113 } 115 }
@@ -137,6 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
137 xfs_etest_fsid[i] = 0LL; 139 xfs_etest_fsid[i] = 0LL;
138 kmem_free(xfs_etest_fsname[i]); 140 kmem_free(xfs_etest_fsname[i]);
139 xfs_etest_fsname[i] = NULL; 141 xfs_etest_fsname[i] = NULL;
142 xfs_error_test_active--;
140 } 143 }
141 } 144 }
142 145
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index c2c1a072bb82..f338847f80b8 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -127,13 +127,14 @@ extern void xfs_corruption_error(const char *tag, int level,
127#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT 127#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
128 128
129#ifdef DEBUG 129#ifdef DEBUG
130extern int xfs_error_test_active;
130extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); 131extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
131 132
132#define XFS_NUM_INJECT_ERROR 10 133#define XFS_NUM_INJECT_ERROR 10
133#define XFS_TEST_ERROR(expr, mp, tag, rf) \ 134#define XFS_TEST_ERROR(expr, mp, tag, rf) \
134 ((expr) || \ 135 ((expr) || (xfs_error_test_active && \
135 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ 136 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
136 (rf))) 137 (rf))))
137 138
138extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); 139extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
139extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); 140extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9b715dce5699..9124425b7f2f 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -744,9 +744,15 @@ xfs_filestream_new_ag(
744 * If the file's parent directory is known, take its iolock in exclusive 744 * If the file's parent directory is known, take its iolock in exclusive
745 * mode to prevent two sibling files from racing each other to migrate 745 * mode to prevent two sibling files from racing each other to migrate
746 * themselves and their parent to different AGs. 746 * themselves and their parent to different AGs.
747 *
748 * Note that we lock the parent directory iolock inside the child
749 * iolock here. That's fine as we never hold both parent and child
750 * iolock in any other place. This is different from the ilock,
751 * which requires locking of the child after the parent for namespace
752 * operations.
747 */ 753 */
748 if (pip) 754 if (pip)
749 xfs_ilock(pip, XFS_IOLOCK_EXCL); 755 xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
750 756
751 /* 757 /*
752 * A new AG needs to be found for the file. If the file's parent 758 * A new AG needs to be found for the file. If the file's parent
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c7ac020705df..7c8d30c453c3 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -657,18 +657,37 @@ xfs_inode_item_unlock(
657} 657}
658 658
659/* 659/*
660 * This is called to find out where the oldest active copy of the 660 * This is called to find out where the oldest active copy of the inode log
661 * inode log item in the on disk log resides now that the last log 661 * item in the on disk log resides now that the last log write of it completed
662 * write of it completed at the given lsn. Since we always re-log 662 * at the given lsn. Since we always re-log all dirty data in an inode, the
663 * all dirty data in an inode, the latest copy in the on disk log 663 * latest copy in the on disk log is the only one that matters. Therefore,
664 * is the only one that matters. Therefore, simply return the 664 * simply return the given lsn.
665 * given lsn. 665 *
666 * If the inode has been marked stale because the cluster is being freed, we
667 * don't want to (re-)insert this inode into the AIL. There is a race condition
668 * where the cluster buffer may be unpinned before the inode is inserted into
669 * the AIL during transaction committed processing. If the buffer is unpinned
670 * before the inode item has been committed and inserted, then it is possible
671 * for the buffer to be written and IO completions before the inode is inserted
672 * into the AIL. In that case, we'd be inserting a clean, stale inode into the
673 * AIL which will never get removed. It will, however, get reclaimed which
674 * triggers an assert in xfs_inode_free() complaining about freein an inode
675 * still in the AIL.
676 *
677 * To avoid this, return a lower LSN than the one passed in so that the
678 * transaction committed code will not move the inode forward in the AIL but
679 * will still unpin it properly.
666 */ 680 */
667STATIC xfs_lsn_t 681STATIC xfs_lsn_t
668xfs_inode_item_committed( 682xfs_inode_item_committed(
669 struct xfs_log_item *lip, 683 struct xfs_log_item *lip,
670 xfs_lsn_t lsn) 684 xfs_lsn_t lsn)
671{ 685{
686 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
687 struct xfs_inode *ip = iip->ili_inode;
688
689 if (xfs_iflags_test(ip, XFS_ISTALE))
690 return lsn - 1;
672 return lsn; 691 return lsn;
673} 692}
674 693
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b1498ab5a399..19e9dfa1c254 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -275,6 +275,7 @@ xfs_free_perag(
275 pag = radix_tree_delete(&mp->m_perag_tree, agno); 275 pag = radix_tree_delete(&mp->m_perag_tree, agno);
276 spin_unlock(&mp->m_perag_lock); 276 spin_unlock(&mp->m_perag_lock);
277 ASSERT(pag); 277 ASSERT(pag);
278 ASSERT(atomic_read(&pag->pag_ref) == 0);
278 call_rcu(&pag->rcu_head, __xfs_free_perag); 279 call_rcu(&pag->rcu_head, __xfs_free_perag);
279 } 280 }
280} 281}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index e0e64b113bd6..9bb6eda4cd21 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -346,8 +346,17 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
346#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) 346#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
347#define xfs_trans_apply_dquot_deltas(tp) 347#define xfs_trans_apply_dquot_deltas(tp)
348#define xfs_trans_unreserve_and_mod_dquots(tp) 348#define xfs_trans_unreserve_and_mod_dquots(tp)
349#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0) 349static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
350#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0) 350 struct xfs_inode *ip, long nblks, long ninos, uint flags)
351{
352 return 0;
353}
354static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
355 struct xfs_mount *mp, struct xfs_dquot *udqp,
356 struct xfs_dquot *gdqp, long nblks, long nions, uint flags)
357{
358 return 0;
359}
351#define xfs_qm_vop_create_dqattach(tp, ip, u, g) 360#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
352#define xfs_qm_vop_rename_dqattach(it) (0) 361#define xfs_qm_vop_rename_dqattach(it) (0)
353#define xfs_qm_vop_chown(tp, ip, old, new) (NULL) 362#define xfs_qm_vop_chown(tp, ip, old, new) (NULL)
@@ -357,11 +366,14 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
357#define xfs_qm_dqdetach(ip) 366#define xfs_qm_dqdetach(ip)
358#define xfs_qm_dqrele(d) 367#define xfs_qm_dqrele(d)
359#define xfs_qm_statvfs(ip, s) 368#define xfs_qm_statvfs(ip, s)
360#define xfs_qm_sync(mp, fl) (0) 369static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
370{
371 return 0;
372}
361#define xfs_qm_newmount(mp, a, b) (0) 373#define xfs_qm_newmount(mp, a, b) (0)
362#define xfs_qm_mount_quotas(mp) 374#define xfs_qm_mount_quotas(mp)
363#define xfs_qm_unmount(mp) 375#define xfs_qm_unmount(mp)
364#define xfs_qm_unmount_quotas(mp) (0) 376#define xfs_qm_unmount_quotas(mp)
365#endif /* CONFIG_XFS_QUOTA */ 377#endif /* CONFIG_XFS_QUOTA */
366 378
367#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ 379#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d2af0a8381a6..77a59891734e 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -297,6 +297,7 @@ xfs_rename(
297 * it and some incremental backup programs won't work without it. 297 * it and some incremental backup programs won't work without it.
298 */ 298 */
299 xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); 299 xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
300 xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
300 301
301 /* 302 /*
302 * Adjust the link count on src_dp. This is necessary when 303 * Adjust the link count on src_dp. This is necessary when