aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_ag.h36
-rw-r--r--fs/xfs/xfs_alloc_btree.c1
-rw-r--r--fs/xfs/xfs_aops.c51
-rw-r--r--fs/xfs/xfs_attr.c24
-rw-r--r--fs/xfs/xfs_attr_leaf.c21
-rw-r--r--fs/xfs/xfs_attr_list.c1
-rw-r--r--fs/xfs/xfs_attr_remote.c11
-rw-r--r--fs/xfs/xfs_bmap.c225
-rw-r--r--fs/xfs/xfs_bmap.h4
-rw-r--r--fs/xfs/xfs_bmap_btree.c9
-rw-r--r--fs/xfs/xfs_bmap_btree.h2
-rw-r--r--fs/xfs/xfs_bmap_util.c13
-rw-r--r--fs/xfs/xfs_btree.c44
-rw-r--r--fs/xfs/xfs_btree.h5
-rw-r--r--fs/xfs/xfs_buf.c33
-rw-r--r--fs/xfs/xfs_buf.h4
-rw-r--r--fs/xfs/xfs_buf_item.c3
-rw-r--r--fs/xfs/xfs_da_btree.c8
-rw-r--r--fs/xfs/xfs_da_btree.h8
-rw-r--r--fs/xfs/xfs_da_format.h10
-rw-r--r--fs/xfs/xfs_dir2.c26
-rw-r--r--fs/xfs/xfs_dir2.h4
-rw-r--r--fs/xfs/xfs_dir2_block.c9
-rw-r--r--fs/xfs/xfs_dir2_data.c3
-rw-r--r--fs/xfs/xfs_dir2_leaf.c2
-rw-r--r--fs/xfs/xfs_dir2_node.c2
-rw-r--r--fs/xfs/xfs_dir2_priv.h4
-rw-r--r--fs/xfs/xfs_dir2_readdir.c17
-rw-r--r--fs/xfs/xfs_dir2_sf.c21
-rw-r--r--fs/xfs/xfs_dquot.c53
-rw-r--r--fs/xfs/xfs_dquot.h2
-rw-r--r--fs/xfs/xfs_dquot_buf.c5
-rw-r--r--fs/xfs/xfs_export.c2
-rw-r--r--fs/xfs/xfs_file.c20
-rw-r--r--fs/xfs/xfs_filestream.c684
-rw-r--r--fs/xfs/xfs_filestream.h34
-rw-r--r--fs/xfs/xfs_format.h14
-rw-r--r--fs/xfs/xfs_fs.h1
-rw-r--r--fs/xfs/xfs_fsops.c36
-rw-r--r--fs/xfs/xfs_ialloc.c695
-rw-r--r--fs/xfs/xfs_ialloc_btree.c69
-rw-r--r--fs/xfs/xfs_ialloc_btree.h3
-rw-r--r--fs/xfs/xfs_icache.c12
-rw-r--r--fs/xfs/xfs_icache.h6
-rw-r--r--fs/xfs/xfs_inode.c72
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--fs/xfs/xfs_inode_fork.c3
-rw-r--r--fs/xfs/xfs_inode_fork.h3
-rw-r--r--fs/xfs/xfs_ioctl.c5
-rw-r--r--fs/xfs/xfs_ioctl32.c5
-rw-r--r--fs/xfs/xfs_iomap.c2
-rw-r--r--fs/xfs/xfs_iops.c73
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_log.c72
-rw-r--r--fs/xfs/xfs_log_cil.c50
-rw-r--r--fs/xfs/xfs_log_recover.c11
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_mru_cache.c151
-rw-r--r--fs/xfs/xfs_mru_cache.h31
-rw-r--r--fs/xfs/xfs_qm.c243
-rw-r--r--fs/xfs/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/xfs_quota_defs.h2
-rw-r--r--fs/xfs/xfs_quotaops.c29
-rw-r--r--fs/xfs/xfs_sb.c4
-rw-r--r--fs/xfs/xfs_sb.h10
-rw-r--r--fs/xfs/xfs_shared.h2
-rw-r--r--fs/xfs/xfs_stats.c1
-rw-r--r--fs/xfs/xfs_stats.h18
-rw-r--r--fs/xfs/xfs_super.c26
-rw-r--r--fs/xfs/xfs_symlink.c2
-rw-r--r--fs/xfs/xfs_symlink_remote.c1
-rw-r--r--fs/xfs/xfs_trace.c1
-rw-r--r--fs/xfs/xfs_trace.h59
-rw-r--r--fs/xfs/xfs_trans.c2
-rw-r--r--fs/xfs/xfs_trans_ail.c5
-rw-r--r--fs/xfs/xfs_trans_priv.h3
-rw-r--r--fs/xfs/xfs_trans_resv.c53
-rw-r--r--fs/xfs/xfs_trans_space.h7
-rw-r--r--fs/xfs/xfs_types.h2
79 files changed, 1738 insertions, 1467 deletions
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 0fdd4109c624..6e247a99f5db 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -160,30 +160,38 @@ typedef struct xfs_agi {
160 * still being referenced. 160 * still being referenced.
161 */ 161 */
162 __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; 162 __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
163 163 /*
164 * This marks the end of logging region 1 and start of logging region 2.
165 */
164 uuid_t agi_uuid; /* uuid of filesystem */ 166 uuid_t agi_uuid; /* uuid of filesystem */
165 __be32 agi_crc; /* crc of agi sector */ 167 __be32 agi_crc; /* crc of agi sector */
166 __be32 agi_pad32; 168 __be32 agi_pad32;
167 __be64 agi_lsn; /* last write sequence */ 169 __be64 agi_lsn; /* last write sequence */
168 170
171 __be32 agi_free_root; /* root of the free inode btree */
172 __be32 agi_free_level;/* levels in free inode btree */
173
169 /* structure must be padded to 64 bit alignment */ 174 /* structure must be padded to 64 bit alignment */
170} xfs_agi_t; 175} xfs_agi_t;
171 176
172#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) 177#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc)
173 178
174#define XFS_AGI_MAGICNUM 0x00000001 179#define XFS_AGI_MAGICNUM (1 << 0)
175#define XFS_AGI_VERSIONNUM 0x00000002 180#define XFS_AGI_VERSIONNUM (1 << 1)
176#define XFS_AGI_SEQNO 0x00000004 181#define XFS_AGI_SEQNO (1 << 2)
177#define XFS_AGI_LENGTH 0x00000008 182#define XFS_AGI_LENGTH (1 << 3)
178#define XFS_AGI_COUNT 0x00000010 183#define XFS_AGI_COUNT (1 << 4)
179#define XFS_AGI_ROOT 0x00000020 184#define XFS_AGI_ROOT (1 << 5)
180#define XFS_AGI_LEVEL 0x00000040 185#define XFS_AGI_LEVEL (1 << 6)
181#define XFS_AGI_FREECOUNT 0x00000080 186#define XFS_AGI_FREECOUNT (1 << 7)
182#define XFS_AGI_NEWINO 0x00000100 187#define XFS_AGI_NEWINO (1 << 8)
183#define XFS_AGI_DIRINO 0x00000200 188#define XFS_AGI_DIRINO (1 << 9)
184#define XFS_AGI_UNLINKED 0x00000400 189#define XFS_AGI_UNLINKED (1 << 10)
185#define XFS_AGI_NUM_BITS 11 190#define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */
186#define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1) 191#define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1)
192#define XFS_AGI_FREE_ROOT (1 << 11)
193#define XFS_AGI_FREE_LEVEL (1 << 12)
194#define XFS_AGI_NUM_BITS_R2 13
187 195
188/* disk block (xfs_daddr_t) in the AG */ 196/* disk block (xfs_daddr_t) in the AG */
189#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) 197#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index cc1eadcbb049..8358f1ded94d 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -70,7 +70,6 @@ xfs_allocbt_alloc_block(
70 struct xfs_btree_cur *cur, 70 struct xfs_btree_cur *cur,
71 union xfs_btree_ptr *start, 71 union xfs_btree_ptr *start,
72 union xfs_btree_ptr *new, 72 union xfs_btree_ptr *new,
73 int length,
74 int *stat) 73 int *stat)
75{ 74{
76 int error; 75 int error;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 75df77d09f75..0479c32c5eb1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1344,6 +1344,14 @@ __xfs_get_blocks(
1344 /* 1344 /*
1345 * If this is O_DIRECT or the mpage code calling tell them how large 1345 * If this is O_DIRECT or the mpage code calling tell them how large
1346 * the mapping is, so that we can avoid repeated get_blocks calls. 1346 * the mapping is, so that we can avoid repeated get_blocks calls.
1347 *
1348 * If the mapping spans EOF, then we have to break the mapping up as the
1349 * mapping for blocks beyond EOF must be marked new so that sub block
1350 * regions can be correctly zeroed. We can't do this for mappings within
1351 * EOF unless the mapping was just allocated or is unwritten, otherwise
1352 * the callers would overwrite existing data with zeros. Hence we have
1353 * to split the mapping into a range up to and including EOF, and a
1354 * second mapping for beyond EOF.
1347 */ 1355 */
1348 if (direct || size > (1 << inode->i_blkbits)) { 1356 if (direct || size > (1 << inode->i_blkbits)) {
1349 xfs_off_t mapping_size; 1357 xfs_off_t mapping_size;
@@ -1354,6 +1362,12 @@ __xfs_get_blocks(
1354 ASSERT(mapping_size > 0); 1362 ASSERT(mapping_size > 0);
1355 if (mapping_size > size) 1363 if (mapping_size > size)
1356 mapping_size = size; 1364 mapping_size = size;
1365 if (offset < i_size_read(inode) &&
1366 offset + mapping_size >= i_size_read(inode)) {
1367 /* limit mapping to block that spans EOF */
1368 mapping_size = roundup_64(i_size_read(inode) - offset,
1369 1 << inode->i_blkbits);
1370 }
1357 if (mapping_size > LONG_MAX) 1371 if (mapping_size > LONG_MAX)
1358 mapping_size = LONG_MAX; 1372 mapping_size = LONG_MAX;
1359 1373
@@ -1566,6 +1580,16 @@ xfs_vm_write_failed(
1566 1580
1567 xfs_vm_kill_delalloc_range(inode, block_offset, 1581 xfs_vm_kill_delalloc_range(inode, block_offset,
1568 block_offset + bh->b_size); 1582 block_offset + bh->b_size);
1583
1584 /*
1585 * This buffer does not contain data anymore. make sure anyone
1586 * who finds it knows that for certain.
1587 */
1588 clear_buffer_delay(bh);
1589 clear_buffer_uptodate(bh);
1590 clear_buffer_mapped(bh);
1591 clear_buffer_new(bh);
1592 clear_buffer_dirty(bh);
1569 } 1593 }
1570 1594
1571} 1595}
@@ -1599,12 +1623,21 @@ xfs_vm_write_begin(
1599 status = __block_write_begin(page, pos, len, xfs_get_blocks); 1623 status = __block_write_begin(page, pos, len, xfs_get_blocks);
1600 if (unlikely(status)) { 1624 if (unlikely(status)) {
1601 struct inode *inode = mapping->host; 1625 struct inode *inode = mapping->host;
1626 size_t isize = i_size_read(inode);
1602 1627
1603 xfs_vm_write_failed(inode, page, pos, len); 1628 xfs_vm_write_failed(inode, page, pos, len);
1604 unlock_page(page); 1629 unlock_page(page);
1605 1630
1606 if (pos + len > i_size_read(inode)) 1631 /*
1607 truncate_pagecache(inode, i_size_read(inode)); 1632 * If the write is beyond EOF, we only want to kill blocks
1633 * allocated in this write, not blocks that were previously
1634 * written successfully.
1635 */
1636 if (pos + len > isize) {
1637 ssize_t start = max_t(ssize_t, pos, isize);
1638
1639 truncate_pagecache_range(inode, start, pos + len);
1640 }
1608 1641
1609 page_cache_release(page); 1642 page_cache_release(page);
1610 page = NULL; 1643 page = NULL;
@@ -1615,9 +1648,12 @@ xfs_vm_write_begin(
1615} 1648}
1616 1649
1617/* 1650/*
1618 * On failure, we only need to kill delalloc blocks beyond EOF because they 1651 * On failure, we only need to kill delalloc blocks beyond EOF in the range of
1619 * will never be written. For blocks within EOF, generic_write_end() zeros them 1652 * this specific write because they will never be written. Previous writes
1620 * so they are safe to leave alone and be written with all the other valid data. 1653 * beyond EOF where block allocation succeeded do not need to be trashed, so
1654 * only new blocks from this write should be trashed. For blocks within
1655 * EOF, generic_write_end() zeros them so they are safe to leave alone and be
1656 * written with all the other valid data.
1621 */ 1657 */
1622STATIC int 1658STATIC int
1623xfs_vm_write_end( 1659xfs_vm_write_end(
@@ -1640,8 +1676,11 @@ xfs_vm_write_end(
1640 loff_t to = pos + len; 1676 loff_t to = pos + len;
1641 1677
1642 if (to > isize) { 1678 if (to > isize) {
1643 truncate_pagecache(inode, isize); 1679 /* only kill blocks in this write beyond EOF */
1680 if (pos > isize)
1681 isize = pos;
1644 xfs_vm_kill_delalloc_range(inode, isize, to); 1682 xfs_vm_kill_delalloc_range(inode, isize, to);
1683 truncate_pagecache_range(inode, isize, to);
1645 } 1684 }
1646 } 1685 }
1647 return ret; 1686 return ret;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 86f482e5798f..1fc1f06277da 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -187,7 +187,7 @@ xfs_attr_calc_size(
187 * Out of line attribute, cannot double split, but 187 * Out of line attribute, cannot double split, but
188 * make room for the attribute value itself. 188 * make room for the attribute value itself.
189 */ 189 */
190 uint dblocks = XFS_B_TO_FSB(mp, args->valuelen); 190 uint dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
191 nblks += dblocks; 191 nblks += dblocks;
192 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); 192 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
193 } 193 }
@@ -604,11 +604,22 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
604 604
605 trace_xfs_attr_leaf_replace(args); 605 trace_xfs_attr_leaf_replace(args);
606 606
607 /* save the attribute state for later removal*/
607 args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ 608 args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
608 args->blkno2 = args->blkno; /* set 2nd entry info*/ 609 args->blkno2 = args->blkno; /* set 2nd entry info*/
609 args->index2 = args->index; 610 args->index2 = args->index;
610 args->rmtblkno2 = args->rmtblkno; 611 args->rmtblkno2 = args->rmtblkno;
611 args->rmtblkcnt2 = args->rmtblkcnt; 612 args->rmtblkcnt2 = args->rmtblkcnt;
613 args->rmtvaluelen2 = args->rmtvaluelen;
614
615 /*
616 * clear the remote attr state now that it is saved so that the
617 * values reflect the state of the attribute we are about to
618 * add, not the attribute we just found and will remove later.
619 */
620 args->rmtblkno = 0;
621 args->rmtblkcnt = 0;
622 args->rmtvaluelen = 0;
612 } 623 }
613 624
614 /* 625 /*
@@ -700,6 +711,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
700 args->blkno = args->blkno2; 711 args->blkno = args->blkno2;
701 args->rmtblkno = args->rmtblkno2; 712 args->rmtblkno = args->rmtblkno2;
702 args->rmtblkcnt = args->rmtblkcnt2; 713 args->rmtblkcnt = args->rmtblkcnt2;
714 args->rmtvaluelen = args->rmtvaluelen2;
703 if (args->rmtblkno) { 715 if (args->rmtblkno) {
704 error = xfs_attr_rmtval_remove(args); 716 error = xfs_attr_rmtval_remove(args);
705 if (error) 717 if (error)
@@ -905,13 +917,22 @@ restart:
905 917
906 trace_xfs_attr_node_replace(args); 918 trace_xfs_attr_node_replace(args);
907 919
920 /* save the attribute state for later removal*/
908 args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ 921 args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
909 args->blkno2 = args->blkno; /* set 2nd entry info*/ 922 args->blkno2 = args->blkno; /* set 2nd entry info*/
910 args->index2 = args->index; 923 args->index2 = args->index;
911 args->rmtblkno2 = args->rmtblkno; 924 args->rmtblkno2 = args->rmtblkno;
912 args->rmtblkcnt2 = args->rmtblkcnt; 925 args->rmtblkcnt2 = args->rmtblkcnt;
926 args->rmtvaluelen2 = args->rmtvaluelen;
927
928 /*
929 * clear the remote attr state now that it is saved so that the
930 * values reflect the state of the attribute we are about to
931 * add, not the attribute we just found and will remove later.
932 */
913 args->rmtblkno = 0; 933 args->rmtblkno = 0;
914 args->rmtblkcnt = 0; 934 args->rmtblkcnt = 0;
935 args->rmtvaluelen = 0;
915 } 936 }
916 937
917 retval = xfs_attr3_leaf_add(blk->bp, state->args); 938 retval = xfs_attr3_leaf_add(blk->bp, state->args);
@@ -1039,6 +1060,7 @@ restart:
1039 args->blkno = args->blkno2; 1060 args->blkno = args->blkno2;
1040 args->rmtblkno = args->rmtblkno2; 1061 args->rmtblkno = args->rmtblkno2;
1041 args->rmtblkcnt = args->rmtblkcnt2; 1062 args->rmtblkcnt = args->rmtblkcnt2;
1063 args->rmtvaluelen = args->rmtvaluelen2;
1042 if (args->rmtblkno) { 1064 if (args->rmtblkno) {
1043 error = xfs_attr_rmtval_remove(args); 1065 error = xfs_attr_rmtval_remove(args);
1044 if (error) 1066 if (error)
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index fe9587fab17a..511c283459b1 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1229,6 +1229,7 @@ xfs_attr3_leaf_add_work(
1229 name_rmt->valueblk = 0; 1229 name_rmt->valueblk = 0;
1230 args->rmtblkno = 1; 1230 args->rmtblkno = 1;
1231 args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); 1231 args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
1232 args->rmtvaluelen = args->valuelen;
1232 } 1233 }
1233 xfs_trans_log_buf(args->trans, bp, 1234 xfs_trans_log_buf(args->trans, bp,
1234 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), 1235 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
@@ -2167,11 +2168,11 @@ xfs_attr3_leaf_lookup_int(
2167 if (!xfs_attr_namesp_match(args->flags, entry->flags)) 2168 if (!xfs_attr_namesp_match(args->flags, entry->flags))
2168 continue; 2169 continue;
2169 args->index = probe; 2170 args->index = probe;
2170 args->valuelen = be32_to_cpu(name_rmt->valuelen); 2171 args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
2171 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2172 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2172 args->rmtblkcnt = xfs_attr3_rmt_blocks( 2173 args->rmtblkcnt = xfs_attr3_rmt_blocks(
2173 args->dp->i_mount, 2174 args->dp->i_mount,
2174 args->valuelen); 2175 args->rmtvaluelen);
2175 return XFS_ERROR(EEXIST); 2176 return XFS_ERROR(EEXIST);
2176 } 2177 }
2177 } 2178 }
@@ -2220,19 +2221,19 @@ xfs_attr3_leaf_getvalue(
2220 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); 2221 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2221 ASSERT(name_rmt->namelen == args->namelen); 2222 ASSERT(name_rmt->namelen == args->namelen);
2222 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); 2223 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
2223 valuelen = be32_to_cpu(name_rmt->valuelen); 2224 args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
2224 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2225 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2225 args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, 2226 args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
2226 valuelen); 2227 args->rmtvaluelen);
2227 if (args->flags & ATTR_KERNOVAL) { 2228 if (args->flags & ATTR_KERNOVAL) {
2228 args->valuelen = valuelen; 2229 args->valuelen = args->rmtvaluelen;
2229 return 0; 2230 return 0;
2230 } 2231 }
2231 if (args->valuelen < valuelen) { 2232 if (args->valuelen < args->rmtvaluelen) {
2232 args->valuelen = valuelen; 2233 args->valuelen = args->rmtvaluelen;
2233 return XFS_ERROR(ERANGE); 2234 return XFS_ERROR(ERANGE);
2234 } 2235 }
2235 args->valuelen = valuelen; 2236 args->valuelen = args->rmtvaluelen;
2236 } 2237 }
2237 return 0; 2238 return 0;
2238} 2239}
@@ -2519,7 +2520,7 @@ xfs_attr3_leaf_clearflag(
2519 ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0); 2520 ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);
2520 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); 2521 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2521 name_rmt->valueblk = cpu_to_be32(args->rmtblkno); 2522 name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
2522 name_rmt->valuelen = cpu_to_be32(args->valuelen); 2523 name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
2523 xfs_trans_log_buf(args->trans, bp, 2524 xfs_trans_log_buf(args->trans, bp,
2524 XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); 2525 XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
2525 } 2526 }
@@ -2677,7 +2678,7 @@ xfs_attr3_leaf_flipflags(
2677 ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); 2678 ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
2678 name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index); 2679 name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
2679 name_rmt->valueblk = cpu_to_be32(args->rmtblkno); 2680 name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
2680 name_rmt->valuelen = cpu_to_be32(args->valuelen); 2681 name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
2681 xfs_trans_log_buf(args->trans, bp1, 2682 xfs_trans_log_buf(args->trans, bp1,
2682 XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt))); 2683 XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
2683 } 2684 }
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 01db96f60cf0..833fe5d98d80 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -447,6 +447,7 @@ xfs_attr3_leaf_list_int(
447 args.dp = context->dp; 447 args.dp = context->dp;
448 args.whichfork = XFS_ATTR_FORK; 448 args.whichfork = XFS_ATTR_FORK;
449 args.valuelen = valuelen; 449 args.valuelen = valuelen;
450 args.rmtvaluelen = valuelen;
450 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); 451 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
451 args.rmtblkno = be32_to_cpu(name_rmt->valueblk); 452 args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
452 args.rmtblkcnt = xfs_attr3_rmt_blocks( 453 args.rmtblkcnt = xfs_attr3_rmt_blocks(
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index 6e37823e2932..0f0679a134e2 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -68,7 +68,6 @@ xfs_attr3_rmt_blocks(
68 */ 68 */
69static bool 69static bool
70xfs_attr3_rmt_hdr_ok( 70xfs_attr3_rmt_hdr_ok(
71 struct xfs_mount *mp,
72 void *ptr, 71 void *ptr,
73 xfs_ino_t ino, 72 xfs_ino_t ino,
74 uint32_t offset, 73 uint32_t offset,
@@ -251,7 +250,7 @@ xfs_attr_rmtval_copyout(
251 byte_cnt = min(*valuelen, byte_cnt); 250 byte_cnt = min(*valuelen, byte_cnt);
252 251
253 if (xfs_sb_version_hascrc(&mp->m_sb)) { 252 if (xfs_sb_version_hascrc(&mp->m_sb)) {
254 if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset, 253 if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,
255 byte_cnt, bno)) { 254 byte_cnt, bno)) {
256 xfs_alert(mp, 255 xfs_alert(mp,
257"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", 256"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
@@ -337,7 +336,7 @@ xfs_attr_rmtval_get(
337 struct xfs_buf *bp; 336 struct xfs_buf *bp;
338 xfs_dablk_t lblkno = args->rmtblkno; 337 xfs_dablk_t lblkno = args->rmtblkno;
339 __uint8_t *dst = args->value; 338 __uint8_t *dst = args->value;
340 int valuelen = args->valuelen; 339 int valuelen;
341 int nmap; 340 int nmap;
342 int error; 341 int error;
343 int blkcnt = args->rmtblkcnt; 342 int blkcnt = args->rmtblkcnt;
@@ -347,7 +346,9 @@ xfs_attr_rmtval_get(
347 trace_xfs_attr_rmtval_get(args); 346 trace_xfs_attr_rmtval_get(args);
348 347
349 ASSERT(!(args->flags & ATTR_KERNOVAL)); 348 ASSERT(!(args->flags & ATTR_KERNOVAL));
349 ASSERT(args->rmtvaluelen == args->valuelen);
350 350
351 valuelen = args->rmtvaluelen;
351 while (valuelen > 0) { 352 while (valuelen > 0) {
352 nmap = ATTR_RMTVALUE_MAPSIZE; 353 nmap = ATTR_RMTVALUE_MAPSIZE;
353 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 354 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
@@ -415,7 +416,7 @@ xfs_attr_rmtval_set(
415 * attributes have headers, we can't just do a straight byte to FSB 416 * attributes have headers, we can't just do a straight byte to FSB
416 * conversion and have to take the header space into account. 417 * conversion and have to take the header space into account.
417 */ 418 */
418 blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); 419 blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
419 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 420 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
420 XFS_ATTR_FORK); 421 XFS_ATTR_FORK);
421 if (error) 422 if (error)
@@ -480,7 +481,7 @@ xfs_attr_rmtval_set(
480 */ 481 */
481 lblkno = args->rmtblkno; 482 lblkno = args->rmtblkno;
482 blkcnt = args->rmtblkcnt; 483 blkcnt = args->rmtblkcnt;
483 valuelen = args->valuelen; 484 valuelen = args->rmtvaluelen;
484 while (valuelen > 0) { 485 while (valuelen > 0) {
485 struct xfs_buf *bp; 486 struct xfs_buf *bp;
486 xfs_daddr_t dblkno; 487 xfs_daddr_t dblkno;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 5b6092ef51ef..1ff0da6e2bf9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -94,7 +94,7 @@ xfs_bmap_compute_maxlevels(
94 maxleafents = MAXAEXTNUM; 94 maxleafents = MAXAEXTNUM;
95 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); 95 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
96 } 96 }
97 maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0); 97 maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
98 minleafrecs = mp->m_bmap_dmnr[0]; 98 minleafrecs = mp->m_bmap_dmnr[0];
99 minnoderecs = mp->m_bmap_dmnr[1]; 99 minnoderecs = mp->m_bmap_dmnr[1];
100 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 100 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
@@ -233,7 +233,6 @@ xfs_default_attroffset(
233 */ 233 */
234STATIC void 234STATIC void
235xfs_bmap_forkoff_reset( 235xfs_bmap_forkoff_reset(
236 xfs_mount_t *mp,
237 xfs_inode_t *ip, 236 xfs_inode_t *ip,
238 int whichfork) 237 int whichfork)
239{ 238{
@@ -905,7 +904,7 @@ xfs_bmap_local_to_extents_empty(
905 ASSERT(ifp->if_bytes == 0); 904 ASSERT(ifp->if_bytes == 0);
906 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); 905 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
907 906
908 xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); 907 xfs_bmap_forkoff_reset(ip, whichfork);
909 ifp->if_flags &= ~XFS_IFINLINE; 908 ifp->if_flags &= ~XFS_IFINLINE;
910 ifp->if_flags |= XFS_IFEXTENTS; 909 ifp->if_flags |= XFS_IFEXTENTS;
911 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 910 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
@@ -1675,7 +1674,6 @@ xfs_bmap_isaeof(
1675 */ 1674 */
1676int 1675int
1677xfs_bmap_last_offset( 1676xfs_bmap_last_offset(
1678 struct xfs_trans *tp,
1679 struct xfs_inode *ip, 1677 struct xfs_inode *ip,
1680 xfs_fileoff_t *last_block, 1678 xfs_fileoff_t *last_block,
1681 int whichfork) 1679 int whichfork)
@@ -3517,6 +3515,67 @@ xfs_bmap_adjacent(
3517#undef ISVALID 3515#undef ISVALID
3518} 3516}
3519 3517
3518static int
3519xfs_bmap_longest_free_extent(
3520 struct xfs_trans *tp,
3521 xfs_agnumber_t ag,
3522 xfs_extlen_t *blen,
3523 int *notinit)
3524{
3525 struct xfs_mount *mp = tp->t_mountp;
3526 struct xfs_perag *pag;
3527 xfs_extlen_t longest;
3528 int error = 0;
3529
3530 pag = xfs_perag_get(mp, ag);
3531 if (!pag->pagf_init) {
3532 error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3533 if (error)
3534 goto out;
3535
3536 if (!pag->pagf_init) {
3537 *notinit = 1;
3538 goto out;
3539 }
3540 }
3541
3542 longest = xfs_alloc_longest_free_extent(mp, pag);
3543 if (*blen < longest)
3544 *blen = longest;
3545
3546out:
3547 xfs_perag_put(pag);
3548 return error;
3549}
3550
3551static void
3552xfs_bmap_select_minlen(
3553 struct xfs_bmalloca *ap,
3554 struct xfs_alloc_arg *args,
3555 xfs_extlen_t *blen,
3556 int notinit)
3557{
3558 if (notinit || *blen < ap->minlen) {
3559 /*
3560 * Since we did a BUF_TRYLOCK above, it is possible that
3561 * there is space for this request.
3562 */
3563 args->minlen = ap->minlen;
3564 } else if (*blen < args->maxlen) {
3565 /*
3566 * If the best seen length is less than the request length,
3567 * use the best as the minimum.
3568 */
3569 args->minlen = *blen;
3570 } else {
3571 /*
3572 * Otherwise we've seen an extent as big as maxlen, use that
3573 * as the minimum.
3574 */
3575 args->minlen = args->maxlen;
3576 }
3577}
3578
3520STATIC int 3579STATIC int
3521xfs_bmap_btalloc_nullfb( 3580xfs_bmap_btalloc_nullfb(
3522 struct xfs_bmalloca *ap, 3581 struct xfs_bmalloca *ap,
@@ -3524,111 +3583,74 @@ xfs_bmap_btalloc_nullfb(
3524 xfs_extlen_t *blen) 3583 xfs_extlen_t *blen)
3525{ 3584{
3526 struct xfs_mount *mp = ap->ip->i_mount; 3585 struct xfs_mount *mp = ap->ip->i_mount;
3527 struct xfs_perag *pag;
3528 xfs_agnumber_t ag, startag; 3586 xfs_agnumber_t ag, startag;
3529 int notinit = 0; 3587 int notinit = 0;
3530 int error; 3588 int error;
3531 3589
3532 if (ap->userdata && xfs_inode_is_filestream(ap->ip)) 3590 args->type = XFS_ALLOCTYPE_START_BNO;
3533 args->type = XFS_ALLOCTYPE_NEAR_BNO;
3534 else
3535 args->type = XFS_ALLOCTYPE_START_BNO;
3536 args->total = ap->total; 3591 args->total = ap->total;
3537 3592
3538 /*
3539 * Search for an allocation group with a single extent large enough
3540 * for the request. If one isn't found, then adjust the minimum
3541 * allocation size to the largest space found.
3542 */
3543 startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); 3593 startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3544 if (startag == NULLAGNUMBER) 3594 if (startag == NULLAGNUMBER)
3545 startag = ag = 0; 3595 startag = ag = 0;
3546 3596
3547 pag = xfs_perag_get(mp, ag);
3548 while (*blen < args->maxlen) { 3597 while (*blen < args->maxlen) {
3549 if (!pag->pagf_init) { 3598 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3550 error = xfs_alloc_pagf_init(mp, args->tp, ag, 3599 &notinit);
3551 XFS_ALLOC_FLAG_TRYLOCK); 3600 if (error)
3552 if (error) { 3601 return error;
3553 xfs_perag_put(pag);
3554 return error;
3555 }
3556 }
3557
3558 /*
3559 * See xfs_alloc_fix_freelist...
3560 */
3561 if (pag->pagf_init) {
3562 xfs_extlen_t longest;
3563 longest = xfs_alloc_longest_free_extent(mp, pag);
3564 if (*blen < longest)
3565 *blen = longest;
3566 } else
3567 notinit = 1;
3568
3569 if (xfs_inode_is_filestream(ap->ip)) {
3570 if (*blen >= args->maxlen)
3571 break;
3572
3573 if (ap->userdata) {
3574 /*
3575 * If startag is an invalid AG, we've
3576 * come here once before and
3577 * xfs_filestream_new_ag picked the
3578 * best currently available.
3579 *
3580 * Don't continue looping, since we
3581 * could loop forever.
3582 */
3583 if (startag == NULLAGNUMBER)
3584 break;
3585
3586 error = xfs_filestream_new_ag(ap, &ag);
3587 xfs_perag_put(pag);
3588 if (error)
3589 return error;
3590 3602
3591 /* loop again to set 'blen'*/
3592 startag = NULLAGNUMBER;
3593 pag = xfs_perag_get(mp, ag);
3594 continue;
3595 }
3596 }
3597 if (++ag == mp->m_sb.sb_agcount) 3603 if (++ag == mp->m_sb.sb_agcount)
3598 ag = 0; 3604 ag = 0;
3599 if (ag == startag) 3605 if (ag == startag)
3600 break; 3606 break;
3601 xfs_perag_put(pag);
3602 pag = xfs_perag_get(mp, ag);
3603 } 3607 }
3604 xfs_perag_put(pag);
3605 3608
3606 /* 3609 xfs_bmap_select_minlen(ap, args, blen, notinit);
3607 * Since the above loop did a BUF_TRYLOCK, it is 3610 return 0;
3608 * possible that there is space for this request. 3611}
3609 */ 3612
3610 if (notinit || *blen < ap->minlen) 3613STATIC int
3611 args->minlen = ap->minlen; 3614xfs_bmap_btalloc_filestreams(
3612 /* 3615 struct xfs_bmalloca *ap,
3613 * If the best seen length is less than the request 3616 struct xfs_alloc_arg *args,
3614 * length, use the best as the minimum. 3617 xfs_extlen_t *blen)
3615 */ 3618{
3616 else if (*blen < args->maxlen) 3619 struct xfs_mount *mp = ap->ip->i_mount;
3617 args->minlen = *blen; 3620 xfs_agnumber_t ag;
3618 /* 3621 int notinit = 0;
3619 * Otherwise we've seen an extent as big as maxlen, 3622 int error;
3620 * use that as the minimum. 3623
3621 */ 3624 args->type = XFS_ALLOCTYPE_NEAR_BNO;
3622 else 3625 args->total = ap->total;
3623 args->minlen = args->maxlen; 3626
3627 ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3628 if (ag == NULLAGNUMBER)
3629 ag = 0;
3630
3631 error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3632 if (error)
3633 return error;
3634
3635 if (*blen < args->maxlen) {
3636 error = xfs_filestream_new_ag(ap, &ag);
3637 if (error)
3638 return error;
3639
3640 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3641 &notinit);
3642 if (error)
3643 return error;
3644
3645 }
3646
3647 xfs_bmap_select_minlen(ap, args, blen, notinit);
3624 3648
3625 /* 3649 /*
3626 * set the failure fallback case to look in the selected 3650 * Set the failure fallback case to look in the selected AG as stream
3627 * AG as the stream may have moved. 3651 * may have moved.
3628 */ 3652 */
3629 if (xfs_inode_is_filestream(ap->ip)) 3653 ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3630 ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3631
3632 return 0; 3654 return 0;
3633} 3655}
3634 3656
@@ -3708,7 +3730,15 @@ xfs_bmap_btalloc(
3708 args.firstblock = *ap->firstblock; 3730 args.firstblock = *ap->firstblock;
3709 blen = 0; 3731 blen = 0;
3710 if (nullfb) { 3732 if (nullfb) {
3711 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); 3733 /*
3734 * Search for an allocation group with a single extent large
3735 * enough for the request. If one isn't found, then adjust
3736 * the minimum allocation size to the largest space found.
3737 */
3738 if (ap->userdata && xfs_inode_is_filestream(ap->ip))
3739 error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3740 else
3741 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3712 if (error) 3742 if (error)
3713 return error; 3743 return error;
3714 } else if (ap->flist->xbf_low) { 3744 } else if (ap->flist->xbf_low) {
@@ -5413,6 +5443,7 @@ xfs_bmap_shift_extents(
5413 int whichfork = XFS_DATA_FORK; 5443 int whichfork = XFS_DATA_FORK;
5414 int logflags; 5444 int logflags;
5415 xfs_filblks_t blockcount = 0; 5445 xfs_filblks_t blockcount = 0;
5446 int total_extents;
5416 5447
5417 if (unlikely(XFS_TEST_ERROR( 5448 if (unlikely(XFS_TEST_ERROR(
5418 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5449 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5429,7 +5460,6 @@ xfs_bmap_shift_extents(
5429 ASSERT(current_ext != NULL); 5460 ASSERT(current_ext != NULL);
5430 5461
5431 ifp = XFS_IFORK_PTR(ip, whichfork); 5462 ifp = XFS_IFORK_PTR(ip, whichfork);
5432
5433 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5463 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5434 /* Read in all the extents */ 5464 /* Read in all the extents */
5435 error = xfs_iread_extents(tp, ip, whichfork); 5465 error = xfs_iread_extents(tp, ip, whichfork);
@@ -5456,7 +5486,6 @@ xfs_bmap_shift_extents(
5456 5486
5457 /* We are going to change core inode */ 5487 /* We are going to change core inode */
5458 logflags = XFS_ILOG_CORE; 5488 logflags = XFS_ILOG_CORE;
5459
5460 if (ifp->if_flags & XFS_IFBROOT) { 5489 if (ifp->if_flags & XFS_IFBROOT) {
5461 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5490 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5462 cur->bc_private.b.firstblock = *firstblock; 5491 cur->bc_private.b.firstblock = *firstblock;
@@ -5467,8 +5496,14 @@ xfs_bmap_shift_extents(
5467 logflags |= XFS_ILOG_DEXT; 5496 logflags |= XFS_ILOG_DEXT;
5468 } 5497 }
5469 5498
5470 while (nexts++ < num_exts && 5499 /*
5471 *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) { 5500 * There may be delalloc extents in the data fork before the range we
5501 * are collapsing out, so we cannot
5502 * use the count of real extents here. Instead we have to calculate it
5503 * from the incore fork.
5504 */
5505 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5506 while (nexts++ < num_exts && *current_ext < total_extents) {
5472 5507
5473 gotp = xfs_iext_get_ext(ifp, *current_ext); 5508 gotp = xfs_iext_get_ext(ifp, *current_ext);
5474 xfs_bmbt_get_all(gotp, &got); 5509 xfs_bmbt_get_all(gotp, &got);
@@ -5556,10 +5591,11 @@ xfs_bmap_shift_extents(
5556 } 5591 }
5557 5592
5558 (*current_ext)++; 5593 (*current_ext)++;
5594 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5559 } 5595 }
5560 5596
5561 /* Check if we are done */ 5597 /* Check if we are done */
5562 if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork)) 5598 if (*current_ext == total_extents)
5563 *done = 1; 5599 *done = 1;
5564 5600
5565del_cursor: 5601del_cursor:
@@ -5568,6 +5604,5 @@ del_cursor:
5568 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5604 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5569 5605
5570 xfs_trans_log_inode(tp, ip, logflags); 5606 xfs_trans_log_inode(tp, ip, logflags);
5571
5572 return error; 5607 return error;
5573} 5608}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index f84bd7af43be..38ba36e9b2f0 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -156,8 +156,8 @@ int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
156 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); 156 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
157int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip, 157int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,
158 xfs_fileoff_t *last_block, int whichfork); 158 xfs_fileoff_t *last_block, int whichfork);
159int xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip, 159int xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused,
160 xfs_fileoff_t *unused, int whichfork); 160 int whichfork);
161int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork); 161int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);
162int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip, 162int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
163 int whichfork); 163 int whichfork);
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 818d546664e7..948836c4fd90 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -84,7 +84,7 @@ xfs_bmdr_to_bmbt(
84 rblock->bb_level = dblock->bb_level; 84 rblock->bb_level = dblock->bb_level;
85 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 85 ASSERT(be16_to_cpu(rblock->bb_level) > 0);
86 rblock->bb_numrecs = dblock->bb_numrecs; 86 rblock->bb_numrecs = dblock->bb_numrecs;
87 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); 87 dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
88 fkp = XFS_BMDR_KEY_ADDR(dblock, 1); 88 fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
89 tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); 89 tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
90 fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); 90 fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
@@ -443,7 +443,7 @@ xfs_bmbt_to_bmdr(
443 ASSERT(rblock->bb_level != 0); 443 ASSERT(rblock->bb_level != 0);
444 dblock->bb_level = rblock->bb_level; 444 dblock->bb_level = rblock->bb_level;
445 dblock->bb_numrecs = rblock->bb_numrecs; 445 dblock->bb_numrecs = rblock->bb_numrecs;
446 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); 446 dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
447 fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); 447 fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
448 tkp = XFS_BMDR_KEY_ADDR(dblock, 1); 448 tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
449 fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); 449 fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
@@ -519,7 +519,6 @@ xfs_bmbt_alloc_block(
519 struct xfs_btree_cur *cur, 519 struct xfs_btree_cur *cur,
520 union xfs_btree_ptr *start, 520 union xfs_btree_ptr *start,
521 union xfs_btree_ptr *new, 521 union xfs_btree_ptr *new,
522 int length,
523 int *stat) 522 int *stat)
524{ 523{
525 xfs_alloc_arg_t args; /* block allocation args */ 524 xfs_alloc_arg_t args; /* block allocation args */
@@ -672,8 +671,7 @@ xfs_bmbt_get_dmaxrecs(
672{ 671{
673 if (level != cur->bc_nlevels - 1) 672 if (level != cur->bc_nlevels - 1)
674 return cur->bc_mp->m_bmap_dmxr[level != 0]; 673 return cur->bc_mp->m_bmap_dmxr[level != 0];
675 return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize, 674 return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0);
676 level == 0);
677} 675}
678 676
679STATIC void 677STATIC void
@@ -914,7 +912,6 @@ xfs_bmbt_maxrecs(
914 */ 912 */
915int 913int
916xfs_bmdr_maxrecs( 914xfs_bmdr_maxrecs(
917 struct xfs_mount *mp,
918 int blocklen, 915 int blocklen,
919 int leaf) 916 int leaf)
920{ 917{
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 6e42e1e50b89..819a8a4dee95 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -130,7 +130,7 @@ extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
130 xfs_bmdr_block_t *, int); 130 xfs_bmdr_block_t *, int);
131 131
132extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); 132extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
133extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf); 133extern int xfs_bmdr_maxrecs(int blocklen, int leaf);
134extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); 134extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
135 135
136extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, 136extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 01f6a646caa1..296160b8e78c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1418,6 +1418,8 @@ xfs_zero_file_space(
1418 xfs_off_t end_boundary; 1418 xfs_off_t end_boundary;
1419 int error; 1419 int error;
1420 1420
1421 trace_xfs_zero_file_space(ip);
1422
1421 granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 1423 granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
1422 1424
1423 /* 1425 /*
@@ -1432,9 +1434,18 @@ xfs_zero_file_space(
1432 ASSERT(end_boundary <= offset + len); 1434 ASSERT(end_boundary <= offset + len);
1433 1435
1434 if (start_boundary < end_boundary - 1) { 1436 if (start_boundary < end_boundary - 1) {
1435 /* punch out the page cache over the conversion range */ 1437 /*
1438 * punch out delayed allocation blocks and the page cache over
1439 * the conversion range
1440 */
1441 xfs_ilock(ip, XFS_ILOCK_EXCL);
1442 error = xfs_bmap_punch_delalloc_range(ip,
1443 XFS_B_TO_FSBT(mp, start_boundary),
1444 XFS_B_TO_FSB(mp, end_boundary - start_boundary));
1445 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1436 truncate_pagecache_range(VFS_I(ip), start_boundary, 1446 truncate_pagecache_range(VFS_I(ip), start_boundary,
1437 end_boundary - 1); 1447 end_boundary - 1);
1448
1438 /* convert the blocks */ 1449 /* convert the blocks */
1439 error = xfs_alloc_file_space(ip, start_boundary, 1450 error = xfs_alloc_file_space(ip, start_boundary,
1440 end_boundary - start_boundary - 1, 1451 end_boundary - start_boundary - 1,
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index e80d59fdf89a..182bac2bb276 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -43,9 +43,10 @@ kmem_zone_t *xfs_btree_cur_zone;
43 * Btree magic numbers. 43 * Btree magic numbers.
44 */ 44 */
45static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 45static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
46 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }, 46 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
47 XFS_FIBT_MAGIC },
47 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, 48 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
48 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC } 49 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
49}; 50};
50#define xfs_btree_magic(cur) \ 51#define xfs_btree_magic(cur) \
51 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] 52 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
@@ -1115,6 +1116,7 @@ xfs_btree_set_refs(
1115 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1116 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
1116 break; 1117 break;
1117 case XFS_BTNUM_INO: 1118 case XFS_BTNUM_INO:
1119 case XFS_BTNUM_FINO:
1118 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1120 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
1119 break; 1121 break;
1120 case XFS_BTNUM_BMAP: 1122 case XFS_BTNUM_BMAP:
@@ -1159,7 +1161,6 @@ STATIC int
1159xfs_btree_read_buf_block( 1161xfs_btree_read_buf_block(
1160 struct xfs_btree_cur *cur, 1162 struct xfs_btree_cur *cur,
1161 union xfs_btree_ptr *ptr, 1163 union xfs_btree_ptr *ptr,
1162 int level,
1163 int flags, 1164 int flags,
1164 struct xfs_btree_block **block, 1165 struct xfs_btree_block **block,
1165 struct xfs_buf **bpp) 1166 struct xfs_buf **bpp)
@@ -1517,8 +1518,8 @@ xfs_btree_increment(
1517 union xfs_btree_ptr *ptrp; 1518 union xfs_btree_ptr *ptrp;
1518 1519
1519 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1520 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1520 error = xfs_btree_read_buf_block(cur, ptrp, --lev, 1521 --lev;
1521 0, &block, &bp); 1522 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1522 if (error) 1523 if (error)
1523 goto error0; 1524 goto error0;
1524 1525
@@ -1616,8 +1617,8 @@ xfs_btree_decrement(
1616 union xfs_btree_ptr *ptrp; 1617 union xfs_btree_ptr *ptrp;
1617 1618
1618 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1619 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1619 error = xfs_btree_read_buf_block(cur, ptrp, --lev, 1620 --lev;
1620 0, &block, &bp); 1621 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1621 if (error) 1622 if (error)
1622 goto error0; 1623 goto error0;
1623 xfs_btree_setbuf(cur, lev, bp); 1624 xfs_btree_setbuf(cur, lev, bp);
@@ -1667,7 +1668,7 @@ xfs_btree_lookup_get_block(
1667 return 0; 1668 return 0;
1668 } 1669 }
1669 1670
1670 error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp); 1671 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
1671 if (error) 1672 if (error)
1672 return error; 1673 return error;
1673 1674
@@ -2018,7 +2019,7 @@ xfs_btree_lshift(
2018 goto out0; 2019 goto out0;
2019 2020
2020 /* Set up the left neighbor as "left". */ 2021 /* Set up the left neighbor as "left". */
2021 error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp); 2022 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
2022 if (error) 2023 if (error)
2023 goto error0; 2024 goto error0;
2024 2025
@@ -2202,7 +2203,7 @@ xfs_btree_rshift(
2202 goto out0; 2203 goto out0;
2203 2204
2204 /* Set up the right neighbor as "right". */ 2205 /* Set up the right neighbor as "right". */
2205 error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp); 2206 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
2206 if (error) 2207 if (error)
2207 goto error0; 2208 goto error0;
2208 2209
@@ -2372,7 +2373,7 @@ xfs_btree_split(
2372 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2373 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2373 2374
2374 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2375 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2375 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat); 2376 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
2376 if (error) 2377 if (error)
2377 goto error0; 2378 goto error0;
2378 if (*stat == 0) 2379 if (*stat == 0)
@@ -2470,7 +2471,7 @@ xfs_btree_split(
2470 * point back to right instead of to left. 2471 * point back to right instead of to left.
2471 */ 2472 */
2472 if (!xfs_btree_ptr_is_null(cur, &rrptr)) { 2473 if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2473 error = xfs_btree_read_buf_block(cur, &rrptr, level, 2474 error = xfs_btree_read_buf_block(cur, &rrptr,
2474 0, &rrblock, &rrbp); 2475 0, &rrblock, &rrbp);
2475 if (error) 2476 if (error)
2476 goto error0; 2477 goto error0;
@@ -2545,7 +2546,7 @@ xfs_btree_new_iroot(
2545 pp = xfs_btree_ptr_addr(cur, 1, block); 2546 pp = xfs_btree_ptr_addr(cur, 1, block);
2546 2547
2547 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2548 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2548 error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat); 2549 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
2549 if (error) 2550 if (error)
2550 goto error0; 2551 goto error0;
2551 if (*stat == 0) { 2552 if (*stat == 0) {
@@ -2649,7 +2650,7 @@ xfs_btree_new_root(
2649 cur->bc_ops->init_ptr_from_cur(cur, &rptr); 2650 cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2650 2651
2651 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2652 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2652 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat); 2653 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
2653 if (error) 2654 if (error)
2654 goto error0; 2655 goto error0;
2655 if (*stat == 0) 2656 if (*stat == 0)
@@ -2684,8 +2685,7 @@ xfs_btree_new_root(
2684 lbp = bp; 2685 lbp = bp;
2685 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2686 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2686 left = block; 2687 left = block;
2687 error = xfs_btree_read_buf_block(cur, &rptr, 2688 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
2688 cur->bc_nlevels - 1, 0, &right, &rbp);
2689 if (error) 2689 if (error)
2690 goto error0; 2690 goto error0;
2691 bp = rbp; 2691 bp = rbp;
@@ -2696,8 +2696,7 @@ xfs_btree_new_root(
2696 xfs_btree_buf_to_ptr(cur, rbp, &rptr); 2696 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2697 right = block; 2697 right = block;
2698 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2698 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2699 error = xfs_btree_read_buf_block(cur, &lptr, 2699 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
2700 cur->bc_nlevels - 1, 0, &left, &lbp);
2701 if (error) 2700 if (error)
2702 goto error0; 2701 goto error0;
2703 bp = lbp; 2702 bp = lbp;
@@ -3649,8 +3648,7 @@ xfs_btree_delrec(
3649 rptr = cptr; 3648 rptr = cptr;
3650 right = block; 3649 right = block;
3651 rbp = bp; 3650 rbp = bp;
3652 error = xfs_btree_read_buf_block(cur, &lptr, level, 3651 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
3653 0, &left, &lbp);
3654 if (error) 3652 if (error)
3655 goto error0; 3653 goto error0;
3656 3654
@@ -3667,8 +3665,7 @@ xfs_btree_delrec(
3667 lptr = cptr; 3665 lptr = cptr;
3668 left = block; 3666 left = block;
3669 lbp = bp; 3667 lbp = bp;
3670 error = xfs_btree_read_buf_block(cur, &rptr, level, 3668 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
3671 0, &right, &rbp);
3672 if (error) 3669 if (error)
3673 goto error0; 3670 goto error0;
3674 3671
@@ -3740,8 +3737,7 @@ xfs_btree_delrec(
3740 /* If there is a right sibling, point it to the remaining block. */ 3737 /* If there is a right sibling, point it to the remaining block. */
3741 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 3738 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3742 if (!xfs_btree_ptr_is_null(cur, &cptr)) { 3739 if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3743 error = xfs_btree_read_buf_block(cur, &cptr, level, 3740 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
3744 0, &rrblock, &rrbp);
3745 if (error) 3741 if (error)
3746 goto error0; 3742 goto error0;
3747 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); 3743 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 91e34f21bace..a04b69422f67 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -62,6 +62,7 @@ union xfs_btree_rec {
62#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) 62#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi)
63#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) 63#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
64#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) 64#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
65#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
65 66
66/* 67/*
67 * For logging record fields. 68 * For logging record fields.
@@ -92,6 +93,7 @@ do { \
92 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ 93 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
93 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ 94 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
94 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ 95 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
96 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \
95 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 97 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
96 } \ 98 } \
97} while (0) 99} while (0)
@@ -105,6 +107,7 @@ do { \
105 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ 107 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
106 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ 108 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
107 case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ 109 case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
110 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
108 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 111 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
109 } \ 112 } \
110} while (0) 113} while (0)
@@ -129,7 +132,7 @@ struct xfs_btree_ops {
129 int (*alloc_block)(struct xfs_btree_cur *cur, 132 int (*alloc_block)(struct xfs_btree_cur *cur,
130 union xfs_btree_ptr *start_bno, 133 union xfs_btree_ptr *start_bno,
131 union xfs_btree_ptr *new_bno, 134 union xfs_btree_ptr *new_bno,
132 int length, int *stat); 135 int *stat);
133 int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp); 136 int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
134 137
135 /* update last record information */ 138 /* update last record information */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 107f2fdfe41f..7a34a1ae6552 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -216,8 +216,7 @@ _xfs_buf_alloc(
216STATIC int 216STATIC int
217_xfs_buf_get_pages( 217_xfs_buf_get_pages(
218 xfs_buf_t *bp, 218 xfs_buf_t *bp,
219 int page_count, 219 int page_count)
220 xfs_buf_flags_t flags)
221{ 220{
222 /* Make sure that we have a page list */ 221 /* Make sure that we have a page list */
223 if (bp->b_pages == NULL) { 222 if (bp->b_pages == NULL) {
@@ -330,7 +329,7 @@ use_alloc_page:
330 end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1) 329 end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
331 >> PAGE_SHIFT; 330 >> PAGE_SHIFT;
332 page_count = end - start; 331 page_count = end - start;
333 error = _xfs_buf_get_pages(bp, page_count, flags); 332 error = _xfs_buf_get_pages(bp, page_count);
334 if (unlikely(error)) 333 if (unlikely(error))
335 return error; 334 return error;
336 335
@@ -778,7 +777,7 @@ xfs_buf_associate_memory(
778 bp->b_pages = NULL; 777 bp->b_pages = NULL;
779 bp->b_addr = mem; 778 bp->b_addr = mem;
780 779
781 rval = _xfs_buf_get_pages(bp, page_count, 0); 780 rval = _xfs_buf_get_pages(bp, page_count);
782 if (rval) 781 if (rval)
783 return rval; 782 return rval;
784 783
@@ -811,7 +810,7 @@ xfs_buf_get_uncached(
811 goto fail; 810 goto fail;
812 811
813 page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; 812 page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
814 error = _xfs_buf_get_pages(bp, page_count, 0); 813 error = _xfs_buf_get_pages(bp, page_count);
815 if (error) 814 if (error)
816 goto fail_free_buf; 815 goto fail_free_buf;
817 816
@@ -1372,21 +1371,29 @@ xfs_buf_iorequest(
1372 xfs_buf_wait_unpin(bp); 1371 xfs_buf_wait_unpin(bp);
1373 xfs_buf_hold(bp); 1372 xfs_buf_hold(bp);
1374 1373
1375 /* Set the count to 1 initially, this will stop an I/O 1374 /*
1375 * Set the count to 1 initially, this will stop an I/O
1376 * completion callout which happens before we have started 1376 * completion callout which happens before we have started
1377 * all the I/O from calling xfs_buf_ioend too early. 1377 * all the I/O from calling xfs_buf_ioend too early.
1378 */ 1378 */
1379 atomic_set(&bp->b_io_remaining, 1); 1379 atomic_set(&bp->b_io_remaining, 1);
1380 _xfs_buf_ioapply(bp); 1380 _xfs_buf_ioapply(bp);
1381 _xfs_buf_ioend(bp, 1); 1381 /*
1382 * If _xfs_buf_ioapply failed, we'll get back here with
1383 * only the reference we took above. _xfs_buf_ioend will
1384 * drop it to zero, so we'd better not queue it for later,
1385 * or we'll free it before it's done.
1386 */
1387 _xfs_buf_ioend(bp, bp->b_error ? 0 : 1);
1382 1388
1383 xfs_buf_rele(bp); 1389 xfs_buf_rele(bp);
1384} 1390}
1385 1391
1386/* 1392/*
1387 * Waits for I/O to complete on the buffer supplied. It returns immediately if 1393 * Waits for I/O to complete on the buffer supplied. It returns immediately if
1388 * no I/O is pending or there is already a pending error on the buffer. It 1394 * no I/O is pending or there is already a pending error on the buffer, in which
1389 * returns the I/O error code, if any, or 0 if there was no error. 1395 * case nothing will ever complete. It returns the I/O error code, if any, or
1396 * 0 if there was no error.
1390 */ 1397 */
1391int 1398int
1392xfs_buf_iowait( 1399xfs_buf_iowait(
@@ -1607,7 +1614,6 @@ xfs_free_buftarg(
1607int 1614int
1608xfs_setsize_buftarg( 1615xfs_setsize_buftarg(
1609 xfs_buftarg_t *btp, 1616 xfs_buftarg_t *btp,
1610 unsigned int blocksize,
1611 unsigned int sectorsize) 1617 unsigned int sectorsize)
1612{ 1618{
1613 /* Set up metadata sector size info */ 1619 /* Set up metadata sector size info */
@@ -1642,16 +1648,13 @@ xfs_setsize_buftarg_early(
1642 xfs_buftarg_t *btp, 1648 xfs_buftarg_t *btp,
1643 struct block_device *bdev) 1649 struct block_device *bdev)
1644{ 1650{
1645 return xfs_setsize_buftarg(btp, PAGE_SIZE, 1651 return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
1646 bdev_logical_block_size(bdev));
1647} 1652}
1648 1653
1649xfs_buftarg_t * 1654xfs_buftarg_t *
1650xfs_alloc_buftarg( 1655xfs_alloc_buftarg(
1651 struct xfs_mount *mp, 1656 struct xfs_mount *mp,
1652 struct block_device *bdev, 1657 struct block_device *bdev)
1653 int external,
1654 const char *fsname)
1655{ 1658{
1656 xfs_buftarg_t *btp; 1659 xfs_buftarg_t *btp;
1657 1660
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b8a3abf6cf47..0e47fd1fedba 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -387,10 +387,10 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
387 * Handling of buftargs. 387 * Handling of buftargs.
388 */ 388 */
389extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, 389extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
390 struct block_device *, int, const char *); 390 struct block_device *);
391extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); 391extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
392extern void xfs_wait_buftarg(xfs_buftarg_t *); 392extern void xfs_wait_buftarg(xfs_buftarg_t *);
393extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 393extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
394 394
395#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) 395#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
396#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) 396#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 8752821443be..64b17f5bed9a 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -812,7 +812,6 @@ xfs_buf_item_init(
812 */ 812 */
813static void 813static void
814xfs_buf_item_log_segment( 814xfs_buf_item_log_segment(
815 struct xfs_buf_log_item *bip,
816 uint first, 815 uint first,
817 uint last, 816 uint last,
818 uint *map) 817 uint *map)
@@ -920,7 +919,7 @@ xfs_buf_item_log(
920 if (end > last) 919 if (end > last)
921 end = last; 920 end = last;
922 921
923 xfs_buf_item_log_segment(bip, first, end, 922 xfs_buf_item_log_segment(first, end,
924 &bip->bli_formats[i].blf_data_map[0]); 923 &bip->bli_formats[i].blf_data_map[0]);
925 924
926 start += bp->b_maps[i].bm_len; 925 start += bp->b_maps[i].bm_len;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 6cc5f6785a77..9eec594cc25a 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2462,7 +2462,6 @@ xfs_buf_map_from_irec(
2462 */ 2462 */
2463static int 2463static int
2464xfs_dabuf_map( 2464xfs_dabuf_map(
2465 struct xfs_trans *trans,
2466 struct xfs_inode *dp, 2465 struct xfs_inode *dp,
2467 xfs_dablk_t bno, 2466 xfs_dablk_t bno,
2468 xfs_daddr_t mappedbno, 2467 xfs_daddr_t mappedbno,
@@ -2558,7 +2557,7 @@ xfs_da_get_buf(
2558 *bpp = NULL; 2557 *bpp = NULL;
2559 mapp = &map; 2558 mapp = &map;
2560 nmap = 1; 2559 nmap = 1;
2561 error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, 2560 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
2562 &mapp, &nmap); 2561 &mapp, &nmap);
2563 if (error) { 2562 if (error) {
2564 /* mapping a hole is not an error, but we don't continue */ 2563 /* mapping a hole is not an error, but we don't continue */
@@ -2606,7 +2605,7 @@ xfs_da_read_buf(
2606 *bpp = NULL; 2605 *bpp = NULL;
2607 mapp = &map; 2606 mapp = &map;
2608 nmap = 1; 2607 nmap = 1;
2609 error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, 2608 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
2610 &mapp, &nmap); 2609 &mapp, &nmap);
2611 if (error) { 2610 if (error) {
2612 /* mapping a hole is not an error, but we don't continue */ 2611 /* mapping a hole is not an error, but we don't continue */
@@ -2679,7 +2678,6 @@ out_free:
2679 */ 2678 */
2680xfs_daddr_t 2679xfs_daddr_t
2681xfs_da_reada_buf( 2680xfs_da_reada_buf(
2682 struct xfs_trans *trans,
2683 struct xfs_inode *dp, 2681 struct xfs_inode *dp,
2684 xfs_dablk_t bno, 2682 xfs_dablk_t bno,
2685 xfs_daddr_t mappedbno, 2683 xfs_daddr_t mappedbno,
@@ -2693,7 +2691,7 @@ xfs_da_reada_buf(
2693 2691
2694 mapp = &map; 2692 mapp = &map;
2695 nmap = 1; 2693 nmap = 1;
2696 error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, 2694 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
2697 &mapp, &nmap); 2695 &mapp, &nmap);
2698 if (error) { 2696 if (error) {
2699 /* mapping a hole is not an error, but we don't continue */ 2697 /* mapping a hole is not an error, but we don't continue */
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 6e95ea79f5d7..c824a0aa039f 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -60,10 +60,12 @@ typedef struct xfs_da_args {
60 int index; /* index of attr of interest in blk */ 60 int index; /* index of attr of interest in blk */
61 xfs_dablk_t rmtblkno; /* remote attr value starting blkno */ 61 xfs_dablk_t rmtblkno; /* remote attr value starting blkno */
62 int rmtblkcnt; /* remote attr value block count */ 62 int rmtblkcnt; /* remote attr value block count */
63 int rmtvaluelen; /* remote attr value length in bytes */
63 xfs_dablk_t blkno2; /* blkno of 2nd attr leaf of interest */ 64 xfs_dablk_t blkno2; /* blkno of 2nd attr leaf of interest */
64 int index2; /* index of 2nd attr in blk */ 65 int index2; /* index of 2nd attr in blk */
65 xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */ 66 xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */
66 int rmtblkcnt2; /* remote attr value block count */ 67 int rmtblkcnt2; /* remote attr value block count */
68 int rmtvaluelen2; /* remote attr value length in bytes */
67 int op_flags; /* operation flags */ 69 int op_flags; /* operation flags */
68 enum xfs_dacmp cmpresult; /* name compare result for lookups */ 70 enum xfs_dacmp cmpresult; /* name compare result for lookups */
69} xfs_da_args_t; 71} xfs_da_args_t;
@@ -183,9 +185,9 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
183 xfs_dablk_t bno, xfs_daddr_t mappedbno, 185 xfs_dablk_t bno, xfs_daddr_t mappedbno,
184 struct xfs_buf **bpp, int whichfork, 186 struct xfs_buf **bpp, int whichfork,
185 const struct xfs_buf_ops *ops); 187 const struct xfs_buf_ops *ops);
186xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp, 188xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
187 xfs_dablk_t bno, xfs_daddr_t mapped_bno, 189 xfs_daddr_t mapped_bno, int whichfork,
188 int whichfork, const struct xfs_buf_ops *ops); 190 const struct xfs_buf_ops *ops);
189int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, 191int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
190 struct xfs_buf *dead_buf); 192 struct xfs_buf *dead_buf);
191 193
diff --git a/fs/xfs/xfs_da_format.h b/fs/xfs/xfs_da_format.h
index a19d3f8f639c..1432b576b4a7 100644
--- a/fs/xfs/xfs_da_format.h
+++ b/fs/xfs/xfs_da_format.h
@@ -541,7 +541,7 @@ xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
541 * Convert dataptr to byte in file space 541 * Convert dataptr to byte in file space
542 */ 542 */
543static inline xfs_dir2_off_t 543static inline xfs_dir2_off_t
544xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) 544xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp)
545{ 545{
546 return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; 546 return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
547} 547}
@@ -550,7 +550,7 @@ xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
550 * Convert byte in file space to dataptr. It had better be aligned. 550 * Convert byte in file space to dataptr. It had better be aligned.
551 */ 551 */
552static inline xfs_dir2_dataptr_t 552static inline xfs_dir2_dataptr_t
553xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) 553xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by)
554{ 554{
555 return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); 555 return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
556} 556}
@@ -571,7 +571,7 @@ xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
571static inline xfs_dir2_db_t 571static inline xfs_dir2_db_t
572xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) 572xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
573{ 573{
574 return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); 574 return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(dp));
575} 575}
576 576
577/* 577/*
@@ -590,7 +590,7 @@ xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
590static inline xfs_dir2_data_aoff_t 590static inline xfs_dir2_data_aoff_t
591xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) 591xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
592{ 592{
593 return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); 593 return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(dp));
594} 594}
595 595
596/* 596/*
@@ -629,7 +629,7 @@ static inline xfs_dir2_dataptr_t
629xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, 629xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
630 xfs_dir2_data_aoff_t o) 630 xfs_dir2_data_aoff_t o)
631{ 631{
632 return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); 632 return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(mp, db, o));
633} 633}
634 634
635/* 635/*
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index fda46253966a..e365c98c0f1e 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -244,7 +244,7 @@ xfs_dir_createname(
244 goto out_free; 244 goto out_free;
245 } 245 }
246 246
247 rval = xfs_dir2_isblock(tp, dp, &v); 247 rval = xfs_dir2_isblock(dp, &v);
248 if (rval) 248 if (rval)
249 goto out_free; 249 goto out_free;
250 if (v) { 250 if (v) {
@@ -252,7 +252,7 @@ xfs_dir_createname(
252 goto out_free; 252 goto out_free;
253 } 253 }
254 254
255 rval = xfs_dir2_isleaf(tp, dp, &v); 255 rval = xfs_dir2_isleaf(dp, &v);
256 if (rval) 256 if (rval)
257 goto out_free; 257 goto out_free;
258 if (v) 258 if (v)
@@ -336,7 +336,7 @@ xfs_dir_lookup(
336 goto out_check_rval; 336 goto out_check_rval;
337 } 337 }
338 338
339 rval = xfs_dir2_isblock(tp, dp, &v); 339 rval = xfs_dir2_isblock(dp, &v);
340 if (rval) 340 if (rval)
341 goto out_free; 341 goto out_free;
342 if (v) { 342 if (v) {
@@ -344,7 +344,7 @@ xfs_dir_lookup(
344 goto out_check_rval; 344 goto out_check_rval;
345 } 345 }
346 346
347 rval = xfs_dir2_isleaf(tp, dp, &v); 347 rval = xfs_dir2_isleaf(dp, &v);
348 if (rval) 348 if (rval)
349 goto out_free; 349 goto out_free;
350 if (v) 350 if (v)
@@ -408,7 +408,7 @@ xfs_dir_removename(
408 goto out_free; 408 goto out_free;
409 } 409 }
410 410
411 rval = xfs_dir2_isblock(tp, dp, &v); 411 rval = xfs_dir2_isblock(dp, &v);
412 if (rval) 412 if (rval)
413 goto out_free; 413 goto out_free;
414 if (v) { 414 if (v) {
@@ -416,7 +416,7 @@ xfs_dir_removename(
416 goto out_free; 416 goto out_free;
417 } 417 }
418 418
419 rval = xfs_dir2_isleaf(tp, dp, &v); 419 rval = xfs_dir2_isleaf(dp, &v);
420 if (rval) 420 if (rval)
421 goto out_free; 421 goto out_free;
422 if (v) 422 if (v)
@@ -472,7 +472,7 @@ xfs_dir_replace(
472 goto out_free; 472 goto out_free;
473 } 473 }
474 474
475 rval = xfs_dir2_isblock(tp, dp, &v); 475 rval = xfs_dir2_isblock(dp, &v);
476 if (rval) 476 if (rval)
477 goto out_free; 477 goto out_free;
478 if (v) { 478 if (v) {
@@ -480,7 +480,7 @@ xfs_dir_replace(
480 goto out_free; 480 goto out_free;
481 } 481 }
482 482
483 rval = xfs_dir2_isleaf(tp, dp, &v); 483 rval = xfs_dir2_isleaf(dp, &v);
484 if (rval) 484 if (rval)
485 goto out_free; 485 goto out_free;
486 if (v) 486 if (v)
@@ -531,7 +531,7 @@ xfs_dir_canenter(
531 goto out_free; 531 goto out_free;
532 } 532 }
533 533
534 rval = xfs_dir2_isblock(tp, dp, &v); 534 rval = xfs_dir2_isblock(dp, &v);
535 if (rval) 535 if (rval)
536 goto out_free; 536 goto out_free;
537 if (v) { 537 if (v) {
@@ -539,7 +539,7 @@ xfs_dir_canenter(
539 goto out_free; 539 goto out_free;
540 } 540 }
541 541
542 rval = xfs_dir2_isleaf(tp, dp, &v); 542 rval = xfs_dir2_isleaf(dp, &v);
543 if (rval) 543 if (rval)
544 goto out_free; 544 goto out_free;
545 if (v) 545 if (v)
@@ -607,7 +607,6 @@ xfs_dir2_grow_inode(
607 */ 607 */
608int 608int
609xfs_dir2_isblock( 609xfs_dir2_isblock(
610 xfs_trans_t *tp,
611 xfs_inode_t *dp, 610 xfs_inode_t *dp,
612 int *vp) /* out: 1 is block, 0 is not block */ 611 int *vp) /* out: 1 is block, 0 is not block */
613{ 612{
@@ -616,7 +615,7 @@ xfs_dir2_isblock(
616 int rval; 615 int rval;
617 616
618 mp = dp->i_mount; 617 mp = dp->i_mount;
619 if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) 618 if ((rval = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK)))
620 return rval; 619 return rval;
621 rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize; 620 rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize;
622 ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize); 621 ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize);
@@ -629,7 +628,6 @@ xfs_dir2_isblock(
629 */ 628 */
630int 629int
631xfs_dir2_isleaf( 630xfs_dir2_isleaf(
632 xfs_trans_t *tp,
633 xfs_inode_t *dp, 631 xfs_inode_t *dp,
634 int *vp) /* out: 1 is leaf, 0 is not leaf */ 632 int *vp) /* out: 1 is leaf, 0 is not leaf */
635{ 633{
@@ -638,7 +636,7 @@ xfs_dir2_isleaf(
638 int rval; 636 int rval;
639 637
640 mp = dp->i_mount; 638 mp = dp->i_mount;
641 if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) 639 if ((rval = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK)))
642 return rval; 640 return rval;
643 *vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog); 641 *vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog);
644 return 0; 642 return 0;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index cec70e0781ab..64a6b19c2fd0 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -142,8 +142,8 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
142/* 142/*
143 * Interface routines used by userspace utilities 143 * Interface routines used by userspace utilities
144 */ 144 */
145extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r); 145extern int xfs_dir2_isblock(struct xfs_inode *dp, int *r);
146extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); 146extern int xfs_dir2_isleaf(struct xfs_inode *dp, int *r);
147extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, 147extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
148 struct xfs_buf *bp); 148 struct xfs_buf *bp);
149 149
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 4f6a38cb83a4..dd9d00515582 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -319,7 +319,6 @@ xfs_dir2_block_compact(
319 (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), 319 (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
320 (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), 320 (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
321 needlog, &needscan); 321 needlog, &needscan);
322 blp += be32_to_cpu(btp->stale) - 1;
323 btp->stale = cpu_to_be32(1); 322 btp->stale = cpu_to_be32(1);
324 /* 323 /*
325 * If we now need to rebuild the bestfree map, do so. 324 * If we now need to rebuild the bestfree map, do so.
@@ -537,7 +536,7 @@ xfs_dir2_block_addname(
537 * Fill in the leaf entry. 536 * Fill in the leaf entry.
538 */ 537 */
539 blp[mid].hashval = cpu_to_be32(args->hashval); 538 blp[mid].hashval = cpu_to_be32(args->hashval);
540 blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 539 blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
541 (char *)dep - (char *)hdr)); 540 (char *)dep - (char *)hdr));
542 xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); 541 xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
543 /* 542 /*
@@ -1170,7 +1169,7 @@ xfs_dir2_sf_to_block(
1170 *tagp = cpu_to_be16((char *)dep - (char *)hdr); 1169 *tagp = cpu_to_be16((char *)dep - (char *)hdr);
1171 xfs_dir2_data_log_entry(tp, dp, bp, dep); 1170 xfs_dir2_data_log_entry(tp, dp, bp, dep);
1172 blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); 1171 blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
1173 blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1172 blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
1174 (char *)dep - (char *)hdr)); 1173 (char *)dep - (char *)hdr));
1175 /* 1174 /*
1176 * Create entry for .. 1175 * Create entry for ..
@@ -1184,7 +1183,7 @@ xfs_dir2_sf_to_block(
1184 *tagp = cpu_to_be16((char *)dep - (char *)hdr); 1183 *tagp = cpu_to_be16((char *)dep - (char *)hdr);
1185 xfs_dir2_data_log_entry(tp, dp, bp, dep); 1184 xfs_dir2_data_log_entry(tp, dp, bp, dep);
1186 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); 1185 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
1187 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1186 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
1188 (char *)dep - (char *)hdr)); 1187 (char *)dep - (char *)hdr));
1189 offset = dp->d_ops->data_first_offset; 1188 offset = dp->d_ops->data_first_offset;
1190 /* 1189 /*
@@ -1238,7 +1237,7 @@ xfs_dir2_sf_to_block(
1238 name.len = sfep->namelen; 1237 name.len = sfep->namelen;
1239 blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> 1238 blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
1240 hashname(&name)); 1239 hashname(&name));
1241 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1240 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
1242 (char *)dep - (char *)hdr)); 1241 (char *)dep - (char *)hdr));
1243 offset = (int)((char *)(tagp + 1) - (char *)hdr); 1242 offset = (int)((char *)(tagp + 1) - (char *)hdr);
1244 if (++i == sfp->count) 1243 if (++i == sfp->count)
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index afa4ad523f3f..bae8b5b8d1c2 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -329,12 +329,11 @@ xfs_dir3_data_read(
329 329
330int 330int
331xfs_dir3_data_readahead( 331xfs_dir3_data_readahead(
332 struct xfs_trans *tp,
333 struct xfs_inode *dp, 332 struct xfs_inode *dp,
334 xfs_dablk_t bno, 333 xfs_dablk_t bno,
335 xfs_daddr_t mapped_bno) 334 xfs_daddr_t mapped_bno)
336{ 335{
337 return xfs_da_reada_buf(tp, dp, bno, mapped_bno, 336 return xfs_da_reada_buf(dp, bno, mapped_bno,
338 XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops); 337 XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
339} 338}
340 339
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index d36e97df1187..f571723e2378 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1708,7 +1708,7 @@ xfs_dir2_node_to_leaf(
1708 /* 1708 /*
1709 * Get the last offset in the file. 1709 * Get the last offset in the file.
1710 */ 1710 */
1711 if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) { 1711 if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) {
1712 return error; 1712 return error;
1713 } 1713 }
1714 fo -= mp->m_dirblkfsbs; 1714 fo -= mp->m_dirblkfsbs;
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index cb434d732681..9cb91ee0914b 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -1727,7 +1727,7 @@ xfs_dir2_node_addname_int(
1727 if (dbno == -1) { 1727 if (dbno == -1) {
1728 xfs_fileoff_t fo; /* freespace block number */ 1728 xfs_fileoff_t fo; /* freespace block number */
1729 1729
1730 if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) 1730 if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK)))
1731 return error; 1731 return error;
1732 lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo); 1732 lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo);
1733 fbno = ifbno; 1733 fbno = ifbno;
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
index 8b9d2281f85b..2429960739e9 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -54,8 +54,8 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
54extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); 54extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
55extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, 55extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
56 xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); 56 xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
57extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp, 57extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
58 xfs_dablk_t bno, xfs_daddr_t mapped_bno); 58 xfs_daddr_t mapped_bno);
59 59
60extern struct xfs_dir2_data_free * 60extern struct xfs_dir2_data_free *
61xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, 61xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index aead369e1c30..bf7a5cee7adc 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -434,7 +434,7 @@ xfs_dir2_leaf_readbuf(
434 */ 434 */
435 if (i > mip->ra_current && 435 if (i > mip->ra_current &&
436 map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) { 436 map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
437 xfs_dir3_data_readahead(NULL, dp, 437 xfs_dir3_data_readahead(dp,
438 map[mip->ra_index].br_startoff + mip->ra_offset, 438 map[mip->ra_index].br_startoff + mip->ra_offset,
439 XFS_FSB_TO_DADDR(mp, 439 XFS_FSB_TO_DADDR(mp,
440 map[mip->ra_index].br_startblock + 440 map[mip->ra_index].br_startblock +
@@ -447,7 +447,7 @@ xfs_dir2_leaf_readbuf(
447 * use our mapping, but this is a very rare case. 447 * use our mapping, but this is a very rare case.
448 */ 448 */
449 else if (i > mip->ra_current) { 449 else if (i > mip->ra_current) {
450 xfs_dir3_data_readahead(NULL, dp, 450 xfs_dir3_data_readahead(dp,
451 map[mip->ra_index].br_startoff + 451 map[mip->ra_index].br_startoff +
452 mip->ra_offset, -1); 452 mip->ra_offset, -1);
453 mip->ra_current = i; 453 mip->ra_current = i;
@@ -456,7 +456,7 @@ xfs_dir2_leaf_readbuf(
456 /* 456 /*
457 * Advance offset through the mapping table. 457 * Advance offset through the mapping table.
458 */ 458 */
459 for (j = 0; j < mp->m_dirblkfsbs; j++) { 459 for (j = 0; j < mp->m_dirblkfsbs; j += length ) {
460 /* 460 /*
461 * The rest of this extent but not more than a dir 461 * The rest of this extent but not more than a dir
462 * block. 462 * block.
@@ -464,7 +464,6 @@ xfs_dir2_leaf_readbuf(
464 length = min_t(int, mp->m_dirblkfsbs, 464 length = min_t(int, mp->m_dirblkfsbs,
465 map[mip->ra_index].br_blockcount - 465 map[mip->ra_index].br_blockcount -
466 mip->ra_offset); 466 mip->ra_offset);
467 j += length;
468 mip->ra_offset += length; 467 mip->ra_offset += length;
469 468
470 /* 469 /*
@@ -531,7 +530,7 @@ xfs_dir2_leaf_getdents(
531 * Inside the loop we keep the main offset value as a byte offset 530 * Inside the loop we keep the main offset value as a byte offset
532 * in the directory file. 531 * in the directory file.
533 */ 532 */
534 curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos); 533 curoff = xfs_dir2_dataptr_to_byte(ctx->pos);
535 534
536 /* 535 /*
537 * Force this conversion through db so we truncate the offset 536 * Force this conversion through db so we truncate the offset
@@ -635,7 +634,7 @@ xfs_dir2_leaf_getdents(
635 length = dp->d_ops->data_entsize(dep->namelen); 634 length = dp->d_ops->data_entsize(dep->namelen);
636 filetype = dp->d_ops->data_get_ftype(dep); 635 filetype = dp->d_ops->data_get_ftype(dep);
637 636
638 ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; 637 ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
639 if (!dir_emit(ctx, (char *)dep->name, dep->namelen, 638 if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
640 be64_to_cpu(dep->inumber), 639 be64_to_cpu(dep->inumber),
641 xfs_dir3_get_dtype(mp, filetype))) 640 xfs_dir3_get_dtype(mp, filetype)))
@@ -653,10 +652,10 @@ xfs_dir2_leaf_getdents(
653 /* 652 /*
654 * All done. Set output offset value to current offset. 653 * All done. Set output offset value to current offset.
655 */ 654 */
656 if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR)) 655 if (curoff > xfs_dir2_dataptr_to_byte(XFS_DIR2_MAX_DATAPTR))
657 ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; 656 ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
658 else 657 else
659 ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; 658 ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
660 kmem_free(map_info); 659 kmem_free(map_info);
661 if (bp) 660 if (bp)
662 xfs_trans_brelse(NULL, bp); 661 xfs_trans_brelse(NULL, bp);
@@ -687,7 +686,7 @@ xfs_readdir(
687 lock_mode = xfs_ilock_data_map_shared(dp); 686 lock_mode = xfs_ilock_data_map_shared(dp);
688 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 687 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
689 rval = xfs_dir2_sf_getdents(dp, ctx); 688 rval = xfs_dir2_sf_getdents(dp, ctx);
690 else if ((rval = xfs_dir2_isblock(NULL, dp, &v))) 689 else if ((rval = xfs_dir2_isblock(dp, &v)))
691 ; 690 ;
692 else if (v) 691 else if (v)
693 rval = xfs_dir2_block_getdents(dp, ctx); 692 rval = xfs_dir2_block_getdents(dp, ctx);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 3725fb1b902b..7aab8ec117ad 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -285,14 +285,12 @@ int /* error */
285xfs_dir2_sf_addname( 285xfs_dir2_sf_addname(
286 xfs_da_args_t *args) /* operation arguments */ 286 xfs_da_args_t *args) /* operation arguments */
287{ 287{
288 int add_entsize; /* size of the new entry */
289 xfs_inode_t *dp; /* incore directory inode */ 288 xfs_inode_t *dp; /* incore directory inode */
290 int error; /* error return value */ 289 int error; /* error return value */
291 int incr_isize; /* total change in size */ 290 int incr_isize; /* total change in size */
292 int new_isize; /* di_size after adding name */ 291 int new_isize; /* di_size after adding name */
293 int objchange; /* changing to 8-byte inodes */ 292 int objchange; /* changing to 8-byte inodes */
294 xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ 293 xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */
295 int old_isize; /* di_size before adding name */
296 int pick; /* which algorithm to use */ 294 int pick; /* which algorithm to use */
297 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ 295 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
298 xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ 296 xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */
@@ -316,8 +314,7 @@ xfs_dir2_sf_addname(
316 /* 314 /*
317 * Compute entry (and change in) size. 315 * Compute entry (and change in) size.
318 */ 316 */
319 add_entsize = dp->d_ops->sf_entsize(sfp, args->namelen); 317 incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen);
320 incr_isize = add_entsize;
321 objchange = 0; 318 objchange = 0;
322#if XFS_BIG_INUMS 319#if XFS_BIG_INUMS
323 /* 320 /*
@@ -325,11 +322,8 @@ xfs_dir2_sf_addname(
325 */ 322 */
326 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { 323 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
327 /* 324 /*
328 * Yes, adjust the entry size and the total size. 325 * Yes, adjust the inode size. old count + (parent + new)
329 */ 326 */
330 add_entsize +=
331 (uint)sizeof(xfs_dir2_ino8_t) -
332 (uint)sizeof(xfs_dir2_ino4_t);
333 incr_isize += 327 incr_isize +=
334 (sfp->count + 2) * 328 (sfp->count + 2) *
335 ((uint)sizeof(xfs_dir2_ino8_t) - 329 ((uint)sizeof(xfs_dir2_ino8_t) -
@@ -337,8 +331,7 @@ xfs_dir2_sf_addname(
337 objchange = 1; 331 objchange = 1;
338 } 332 }
339#endif 333#endif
340 old_isize = (int)dp->i_d.di_size; 334 new_isize = (int)dp->i_d.di_size + incr_isize;
341 new_isize = old_isize + incr_isize;
342 /* 335 /*
343 * Won't fit as shortform any more (due to size), 336 * Won't fit as shortform any more (due to size),
344 * or the pick routine says it won't (due to offset values). 337 * or the pick routine says it won't (due to offset values).
@@ -1110,9 +1103,9 @@ xfs_dir2_sf_toino4(
1110} 1103}
1111 1104
1112/* 1105/*
1113 * Convert from 4-byte inode numbers to 8-byte inode numbers. 1106 * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers.
1114 * The new 8-byte inode number is not there yet, we leave with the 1107 * The new entry w/ an 8-byte inode number is not there yet; we leave with
1115 * count 1 but no corresponding entry. 1108 * i8count set to 1, but no corresponding 8-byte entry.
1116 */ 1109 */
1117static void 1110static void
1118xfs_dir2_sf_toino8( 1111xfs_dir2_sf_toino8(
@@ -1145,7 +1138,7 @@ xfs_dir2_sf_toino8(
1145 ASSERT(oldsfp->i8count == 0); 1138 ASSERT(oldsfp->i8count == 0);
1146 memcpy(buf, oldsfp, oldsize); 1139 memcpy(buf, oldsfp, oldsize);
1147 /* 1140 /*
1148 * Compute the new inode size. 1141 * Compute the new inode size (nb: entry count + 1 for parent)
1149 */ 1142 */
1150 newsize = 1143 newsize =
1151 oldsize + 1144 oldsize +
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 868b19f096bf..5fec738f1f2e 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -832,47 +832,6 @@ restart:
832 return (0); 832 return (0);
833} 833}
834 834
835
836STATIC void
837xfs_qm_dqput_final(
838 struct xfs_dquot *dqp)
839{
840 struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo;
841 struct xfs_dquot *gdqp;
842 struct xfs_dquot *pdqp;
843
844 trace_xfs_dqput_free(dqp);
845
846 if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
847 XFS_STATS_INC(xs_qm_dquot_unused);
848
849 /*
850 * If we just added a udquot to the freelist, then we want to release
851 * the gdquot/pdquot reference that it (probably) has. Otherwise it'll
852 * keep the gdquot/pdquot from getting reclaimed.
853 */
854 gdqp = dqp->q_gdquot;
855 if (gdqp) {
856 xfs_dqlock(gdqp);
857 dqp->q_gdquot = NULL;
858 }
859
860 pdqp = dqp->q_pdquot;
861 if (pdqp) {
862 xfs_dqlock(pdqp);
863 dqp->q_pdquot = NULL;
864 }
865 xfs_dqunlock(dqp);
866
867 /*
868 * If we had a group/project quota hint, release it now.
869 */
870 if (gdqp)
871 xfs_qm_dqput(gdqp);
872 if (pdqp)
873 xfs_qm_dqput(pdqp);
874}
875
876/* 835/*
877 * Release a reference to the dquot (decrement ref-count) and unlock it. 836 * Release a reference to the dquot (decrement ref-count) and unlock it.
878 * 837 *
@@ -888,10 +847,14 @@ xfs_qm_dqput(
888 847
889 trace_xfs_dqput(dqp); 848 trace_xfs_dqput(dqp);
890 849
891 if (--dqp->q_nrefs > 0) 850 if (--dqp->q_nrefs == 0) {
892 xfs_dqunlock(dqp); 851 struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo;
893 else 852 trace_xfs_dqput_free(dqp);
894 xfs_qm_dqput_final(dqp); 853
854 if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
855 XFS_STATS_INC(xs_qm_dquot_unused);
856 }
857 xfs_dqunlock(dqp);
895} 858}
896 859
897/* 860/*
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index d22ed0053c32..68a68f704837 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -52,8 +52,6 @@ typedef struct xfs_dquot {
52 int q_bufoffset; /* off of dq in buffer (# dquots) */ 52 int q_bufoffset; /* off of dq in buffer (# dquots) */
53 xfs_fileoff_t q_fileoffset; /* offset in quotas file */ 53 xfs_fileoff_t q_fileoffset; /* offset in quotas file */
54 54
55 struct xfs_dquot*q_gdquot; /* group dquot, hint only */
56 struct xfs_dquot*q_pdquot; /* project dquot, hint only */
57 xfs_disk_dquot_t q_core; /* actual usage & quotas */ 55 xfs_disk_dquot_t q_core; /* actual usage & quotas */
58 xfs_dq_logitem_t q_logitem; /* dquot log item */ 56 xfs_dq_logitem_t q_logitem; /* dquot log item */
59 xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ 57 xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c
index 610da8177737..c2ac0c611ad8 100644
--- a/fs/xfs/xfs_dquot_buf.c
+++ b/fs/xfs/xfs_dquot_buf.c
@@ -35,7 +35,6 @@
35 35
36int 36int
37xfs_calc_dquots_per_chunk( 37xfs_calc_dquots_per_chunk(
38 struct xfs_mount *mp,
39 unsigned int nbblks) /* basic block units */ 38 unsigned int nbblks) /* basic block units */
40{ 39{
41 unsigned int ndquots; 40 unsigned int ndquots;
@@ -194,7 +193,7 @@ xfs_dquot_buf_verify_crc(
194 if (mp->m_quotainfo) 193 if (mp->m_quotainfo)
195 ndquots = mp->m_quotainfo->qi_dqperchunk; 194 ndquots = mp->m_quotainfo->qi_dqperchunk;
196 else 195 else
197 ndquots = xfs_calc_dquots_per_chunk(mp, 196 ndquots = xfs_calc_dquots_per_chunk(
198 XFS_BB_TO_FSB(mp, bp->b_length)); 197 XFS_BB_TO_FSB(mp, bp->b_length));
199 198
200 for (i = 0; i < ndquots; i++, d++) { 199 for (i = 0; i < ndquots; i++, d++) {
@@ -225,7 +224,7 @@ xfs_dquot_buf_verify(
225 if (mp->m_quotainfo) 224 if (mp->m_quotainfo)
226 ndquots = mp->m_quotainfo->qi_dqperchunk; 225 ndquots = mp->m_quotainfo->qi_dqperchunk;
227 else 226 else
228 ndquots = xfs_calc_dquots_per_chunk(mp, bp->b_length); 227 ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
229 228
230 /* 229 /*
231 * On the first read of the buffer, verify that each dquot is valid. 230 * On the first read of the buffer, verify that each dquot is valid.
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 1399e187d425..753e467aa1a5 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata(
237 237
238 if (!lsn) 238 if (!lsn)
239 return 0; 239 return 0;
240 return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); 240 return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
241} 241}
242 242
243const struct export_operations xfs_export_operations = { 243const struct export_operations xfs_export_operations = {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 79e96ce98733..1b8160dc04d1 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -155,7 +155,7 @@ xfs_dir_fsync(
155 155
156 if (!lsn) 156 if (!lsn)
157 return 0; 157 return 0;
158 return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); 158 return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
159} 159}
160 160
161STATIC int 161STATIC int
@@ -295,7 +295,7 @@ xfs_file_aio_read(
295 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); 295 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
296 296
297 if (inode->i_mapping->nrpages) { 297 if (inode->i_mapping->nrpages) {
298 ret = -filemap_write_and_wait_range( 298 ret = filemap_write_and_wait_range(
299 VFS_I(ip)->i_mapping, 299 VFS_I(ip)->i_mapping,
300 pos, -1); 300 pos, -1);
301 if (ret) { 301 if (ret) {
@@ -679,7 +679,7 @@ xfs_file_dio_aio_write(
679 goto out; 679 goto out;
680 680
681 if (mapping->nrpages) { 681 if (mapping->nrpages) {
682 ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 682 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
683 pos, -1); 683 pos, -1);
684 if (ret) 684 if (ret)
685 goto out; 685 goto out;
@@ -837,11 +837,19 @@ xfs_file_fallocate(
837 unsigned blksize_mask = (1 << inode->i_blkbits) - 1; 837 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
838 838
839 if (offset & blksize_mask || len & blksize_mask) { 839 if (offset & blksize_mask || len & blksize_mask) {
840 error = -EINVAL; 840 error = EINVAL;
841 goto out_unlock;
842 }
843
844 /*
845 * There is no need to overlap collapse range with EOF,
846 * in which case it is effectively a truncate operation
847 */
848 if (offset + len >= i_size_read(inode)) {
849 error = EINVAL;
841 goto out_unlock; 850 goto out_unlock;
842 } 851 }
843 852
844 ASSERT(offset + len < i_size_read(inode));
845 new_size = i_size_read(inode) - len; 853 new_size = i_size_read(inode) - len;
846 854
847 error = xfs_collapse_file_space(ip, offset, len); 855 error = xfs_collapse_file_space(ip, offset, len);
@@ -936,7 +944,7 @@ xfs_dir_open(
936 */ 944 */
937 mode = xfs_ilock_data_map_shared(ip); 945 mode = xfs_ilock_data_map_shared(ip);
938 if (ip->i_d.di_nextents > 0) 946 if (ip->i_d.di_nextents > 0)
939 xfs_dir3_data_readahead(NULL, ip, 0, -1); 947 xfs_dir3_data_readahead(ip, 0, -1);
940 xfs_iunlock(ip, mode); 948 xfs_iunlock(ip, mode);
941 return 0; 949 return 0;
942} 950}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 12b6e7701985..8ec81bed7992 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2006-2007 Silicon Graphics, Inc. 2 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
3 * Copyright (c) 2014 Christoph Hellwig.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -32,100 +33,20 @@
32#include "xfs_filestream.h" 33#include "xfs_filestream.h"
33#include "xfs_trace.h" 34#include "xfs_trace.h"
34 35
35#ifdef XFS_FILESTREAMS_TRACE 36struct xfs_fstrm_item {
36 37 struct xfs_mru_cache_elem mru;
37ktrace_t *xfs_filestreams_trace_buf; 38 struct xfs_inode *ip;
38 39 xfs_agnumber_t ag; /* AG in use for this directory */
39STATIC void 40};
40xfs_filestreams_trace(
41 xfs_mount_t *mp, /* mount point */
42 int type, /* type of trace */
43 const char *func, /* source function */
44 int line, /* source line number */
45 __psunsigned_t arg0,
46 __psunsigned_t arg1,
47 __psunsigned_t arg2,
48 __psunsigned_t arg3,
49 __psunsigned_t arg4,
50 __psunsigned_t arg5)
51{
52 ktrace_enter(xfs_filestreams_trace_buf,
53 (void *)(__psint_t)(type | (line << 16)),
54 (void *)func,
55 (void *)(__psunsigned_t)current_pid(),
56 (void *)mp,
57 (void *)(__psunsigned_t)arg0,
58 (void *)(__psunsigned_t)arg1,
59 (void *)(__psunsigned_t)arg2,
60 (void *)(__psunsigned_t)arg3,
61 (void *)(__psunsigned_t)arg4,
62 (void *)(__psunsigned_t)arg5,
63 NULL, NULL, NULL, NULL, NULL, NULL);
64}
65
66#define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0)
67#define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0)
68#define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0)
69#define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0)
70#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0)
71#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0)
72#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
73 xfs_filestreams_trace(mp, t, __func__, __LINE__, \
74 (__psunsigned_t)a0, (__psunsigned_t)a1, \
75 (__psunsigned_t)a2, (__psunsigned_t)a3, \
76 (__psunsigned_t)a4, (__psunsigned_t)a5)
77
78#define TRACE_AG_SCAN(mp, ag, ag2) \
79 TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2);
80#define TRACE_AG_PICK1(mp, max_ag, maxfree) \
81 TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree);
82#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \
83 TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \
84 cnt, free, scan, flag)
85#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \
86 TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2)
87#define TRACE_FREE(mp, ip, pip, ag, cnt) \
88 TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt)
89#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \
90 TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt)
91#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \
92 TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt)
93#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \
94 TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt)
95#define TRACE_ORPHAN(mp, ip, ag) \
96 TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag);
97
98
99#else
100#define TRACE_AG_SCAN(mp, ag, ag2)
101#define TRACE_AG_PICK1(mp, max_ag, maxfree)
102#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag)
103#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2)
104#define TRACE_FREE(mp, ip, pip, ag, cnt)
105#define TRACE_LOOKUP(mp, ip, pip, ag, cnt)
106#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt)
107#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt)
108#define TRACE_ORPHAN(mp, ip, ag)
109#endif
110
111static kmem_zone_t *item_zone;
112 41
113/* 42enum xfs_fstrm_alloc {
114 * Structure for associating a file or a directory with an allocation group. 43 XFS_PICK_USERDATA = 1,
115 * The parent directory pointer is only needed for files, but since there will 44 XFS_PICK_LOWSPACE = 2,
116 * generally be vastly more files than directories in the cache, using the same 45};
117 * data structure simplifies the code with very little memory overhead.
118 */
119typedef struct fstrm_item
120{
121 xfs_agnumber_t ag; /* AG currently in use for the file/directory. */
122 xfs_inode_t *ip; /* inode self-pointer. */
123 xfs_inode_t *pip; /* Parent directory inode pointer. */
124} fstrm_item_t;
125 46
126/* 47/*
127 * Allocation group filestream associations are tracked with per-ag atomic 48 * Allocation group filestream associations are tracked with per-ag atomic
128 * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a 49 * counters. These counters allow xfs_filestream_pick_ag() to tell whether a
129 * particular AG already has active filestreams associated with it. The mount 50 * particular AG already has active filestreams associated with it. The mount
130 * point's m_peraglock is used to protect these counters from per-ag array 51 * point's m_peraglock is used to protect these counters from per-ag array
131 * re-allocation during a growfs operation. When xfs_growfs_data_private() is 52 * re-allocation during a growfs operation. When xfs_growfs_data_private() is
@@ -160,7 +81,7 @@ typedef struct fstrm_item
160 * the cache that reference per-ag array elements that have since been 81 * the cache that reference per-ag array elements that have since been
161 * reallocated. 82 * reallocated.
162 */ 83 */
163static int 84int
164xfs_filestream_peek_ag( 85xfs_filestream_peek_ag(
165 xfs_mount_t *mp, 86 xfs_mount_t *mp,
166 xfs_agnumber_t agno) 87 xfs_agnumber_t agno)
@@ -200,23 +121,40 @@ xfs_filestream_put_ag(
200 xfs_perag_put(pag); 121 xfs_perag_put(pag);
201} 122}
202 123
124static void
125xfs_fstrm_free_func(
126 struct xfs_mru_cache_elem *mru)
127{
128 struct xfs_fstrm_item *item =
129 container_of(mru, struct xfs_fstrm_item, mru);
130
131 xfs_filestream_put_ag(item->ip->i_mount, item->ag);
132
133 trace_xfs_filestream_free(item->ip, item->ag);
134
135 kmem_free(item);
136}
137
203/* 138/*
204 * Scan the AGs starting at startag looking for an AG that isn't in use and has 139 * Scan the AGs starting at startag looking for an AG that isn't in use and has
205 * at least minlen blocks free. 140 * at least minlen blocks free.
206 */ 141 */
207static int 142static int
208_xfs_filestream_pick_ag( 143xfs_filestream_pick_ag(
209 xfs_mount_t *mp, 144 struct xfs_inode *ip,
210 xfs_agnumber_t startag, 145 xfs_agnumber_t startag,
211 xfs_agnumber_t *agp, 146 xfs_agnumber_t *agp,
212 int flags, 147 int flags,
213 xfs_extlen_t minlen) 148 xfs_extlen_t minlen)
214{ 149{
215 int streams, max_streams; 150 struct xfs_mount *mp = ip->i_mount;
216 int err, trylock, nscan; 151 struct xfs_fstrm_item *item;
217 xfs_extlen_t longest, free, minfree, maxfree = 0; 152 struct xfs_perag *pag;
218 xfs_agnumber_t ag, max_ag = NULLAGNUMBER; 153 xfs_extlen_t longest, free = 0, minfree, maxfree = 0;
219 struct xfs_perag *pag; 154 xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
155 int err, trylock, nscan;
156
157 ASSERT(S_ISDIR(ip->i_d.di_mode));
220 158
221 /* 2% of an AG's blocks must be free for it to be chosen. */ 159 /* 2% of an AG's blocks must be free for it to be chosen. */
222 minfree = mp->m_sb.sb_agblocks / 50; 160 minfree = mp->m_sb.sb_agblocks / 50;
@@ -228,8 +166,9 @@ _xfs_filestream_pick_ag(
228 trylock = XFS_ALLOC_FLAG_TRYLOCK; 166 trylock = XFS_ALLOC_FLAG_TRYLOCK;
229 167
230 for (nscan = 0; 1; nscan++) { 168 for (nscan = 0; 1; nscan++) {
169 trace_xfs_filestream_scan(ip, ag);
170
231 pag = xfs_perag_get(mp, ag); 171 pag = xfs_perag_get(mp, ag);
232 TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms));
233 172
234 if (!pag->pagf_init) { 173 if (!pag->pagf_init) {
235 err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); 174 err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
@@ -246,7 +185,6 @@ _xfs_filestream_pick_ag(
246 /* Keep track of the AG with the most free blocks. */ 185 /* Keep track of the AG with the most free blocks. */
247 if (pag->pagf_freeblks > maxfree) { 186 if (pag->pagf_freeblks > maxfree) {
248 maxfree = pag->pagf_freeblks; 187 maxfree = pag->pagf_freeblks;
249 max_streams = atomic_read(&pag->pagf_fstrms);
250 max_ag = ag; 188 max_ag = ag;
251 } 189 }
252 190
@@ -269,7 +207,6 @@ _xfs_filestream_pick_ag(
269 207
270 /* Break out, retaining the reference on the AG. */ 208 /* Break out, retaining the reference on the AG. */
271 free = pag->pagf_freeblks; 209 free = pag->pagf_freeblks;
272 streams = atomic_read(&pag->pagf_fstrms);
273 xfs_perag_put(pag); 210 xfs_perag_put(pag);
274 *agp = ag; 211 *agp = ag;
275 break; 212 break;
@@ -305,317 +242,98 @@ next_ag:
305 */ 242 */
306 if (max_ag != NULLAGNUMBER) { 243 if (max_ag != NULLAGNUMBER) {
307 xfs_filestream_get_ag(mp, max_ag); 244 xfs_filestream_get_ag(mp, max_ag);
308 TRACE_AG_PICK1(mp, max_ag, maxfree);
309 streams = max_streams;
310 free = maxfree; 245 free = maxfree;
311 *agp = max_ag; 246 *agp = max_ag;
312 break; 247 break;
313 } 248 }
314 249
315 /* take AG 0 if none matched */ 250 /* take AG 0 if none matched */
316 TRACE_AG_PICK1(mp, max_ag, maxfree); 251 trace_xfs_filestream_pick(ip, *agp, free, nscan);
317 *agp = 0; 252 *agp = 0;
318 return 0; 253 return 0;
319 } 254 }
320 255
321 TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags); 256 trace_xfs_filestream_pick(ip, *agp, free, nscan);
322
323 return 0;
324}
325 257
326/* 258 if (*agp == NULLAGNUMBER)
327 * Set the allocation group number for a file or a directory, updating inode
328 * references and per-AG references as appropriate.
329 */
330static int
331_xfs_filestream_update_ag(
332 xfs_inode_t *ip,
333 xfs_inode_t *pip,
334 xfs_agnumber_t ag)
335{
336 int err = 0;
337 xfs_mount_t *mp;
338 xfs_mru_cache_t *cache;
339 fstrm_item_t *item;
340 xfs_agnumber_t old_ag;
341 xfs_inode_t *old_pip;
342
343 /*
344 * Either ip is a regular file and pip is a directory, or ip is a
345 * directory and pip is NULL.
346 */
347 ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip &&
348 S_ISDIR(pip->i_d.di_mode)) ||
349 (S_ISDIR(ip->i_d.di_mode) && !pip)));
350
351 mp = ip->i_mount;
352 cache = mp->m_filestream;
353
354 item = xfs_mru_cache_lookup(cache, ip->i_ino);
355 if (item) {
356 ASSERT(item->ip == ip);
357 old_ag = item->ag;
358 item->ag = ag;
359 old_pip = item->pip;
360 item->pip = pip;
361 xfs_mru_cache_done(cache);
362
363 /*
364 * If the AG has changed, drop the old ref and take a new one,
365 * effectively transferring the reference from old to new AG.
366 */
367 if (ag != old_ag) {
368 xfs_filestream_put_ag(mp, old_ag);
369 xfs_filestream_get_ag(mp, ag);
370 }
371
372 /*
373 * If ip is a file and its pip has changed, drop the old ref and
374 * take a new one.
375 */
376 if (pip && pip != old_pip) {
377 IRELE(old_pip);
378 IHOLD(pip);
379 }
380
381 TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag),
382 ag, xfs_filestream_peek_ag(mp, ag));
383 return 0; 259 return 0;
384 }
385 260
386 item = kmem_zone_zalloc(item_zone, KM_MAYFAIL); 261 err = ENOMEM;
262 item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
387 if (!item) 263 if (!item)
388 return ENOMEM; 264 goto out_put_ag;
389 265
390 item->ag = ag; 266 item->ag = *agp;
391 item->ip = ip; 267 item->ip = ip;
392 item->pip = pip;
393 268
394 err = xfs_mru_cache_insert(cache, ip->i_ino, item); 269 err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
395 if (err) { 270 if (err) {
396 kmem_zone_free(item_zone, item); 271 if (err == EEXIST)
397 return err; 272 err = 0;
273 goto out_free_item;
398 } 274 }
399 275
400 /* Take a reference on the AG. */
401 xfs_filestream_get_ag(mp, ag);
402
403 /*
404 * Take a reference on the inode itself regardless of whether it's a
405 * regular file or a directory.
406 */
407 IHOLD(ip);
408
409 /*
410 * In the case of a regular file, take a reference on the parent inode
411 * as well to ensure it remains in-core.
412 */
413 if (pip)
414 IHOLD(pip);
415
416 TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag),
417 ag, xfs_filestream_peek_ag(mp, ag));
418
419 return 0; 276 return 0;
420}
421
422/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
423STATIC void
424xfs_fstrm_free_func(
425 unsigned long ino,
426 void *data)
427{
428 fstrm_item_t *item = (fstrm_item_t *)data;
429 xfs_inode_t *ip = item->ip;
430
431 ASSERT(ip->i_ino == ino);
432
433 xfs_iflags_clear(ip, XFS_IFILESTREAM);
434
435 /* Drop the reference taken on the AG when the item was added. */
436 xfs_filestream_put_ag(ip->i_mount, item->ag);
437
438 TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
439 xfs_filestream_peek_ag(ip->i_mount, item->ag));
440
441 /*
442 * _xfs_filestream_update_ag() always takes a reference on the inode
443 * itself, whether it's a file or a directory. Release it here.
444 * This can result in the inode being freed and so we must
445 * not hold any inode locks when freeing filesstreams objects
446 * otherwise we can deadlock here.
447 */
448 IRELE(ip);
449
450 /*
451 * In the case of a regular file, _xfs_filestream_update_ag() also
452 * takes a ref on the parent inode to keep it in-core. Release that
453 * too.
454 */
455 if (item->pip)
456 IRELE(item->pip);
457
458 /* Finally, free the memory allocated for the item. */
459 kmem_zone_free(item_zone, item);
460}
461
462/*
463 * xfs_filestream_init() is called at xfs initialisation time to set up the
464 * memory zone that will be used for filestream data structure allocation.
465 */
466int
467xfs_filestream_init(void)
468{
469 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
470 if (!item_zone)
471 return -ENOMEM;
472
473 return 0;
474}
475
476/*
477 * xfs_filestream_uninit() is called at xfs termination time to destroy the
478 * memory zone that was used for filestream data structure allocation.
479 */
480void
481xfs_filestream_uninit(void)
482{
483 kmem_zone_destroy(item_zone);
484}
485
486/*
487 * xfs_filestream_mount() is called when a file system is mounted with the
488 * filestream option. It is responsible for allocating the data structures
489 * needed to track the new file system's file streams.
490 */
491int
492xfs_filestream_mount(
493 xfs_mount_t *mp)
494{
495 int err;
496 unsigned int lifetime, grp_count;
497
498 /*
499 * The filestream timer tunable is currently fixed within the range of
500 * one second to four minutes, with five seconds being the default. The
501 * group count is somewhat arbitrary, but it'd be nice to adhere to the
502 * timer tunable to within about 10 percent. This requires at least 10
503 * groups.
504 */
505 lifetime = xfs_fstrm_centisecs * 10;
506 grp_count = 10;
507
508 err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
509 xfs_fstrm_free_func);
510 277
278out_free_item:
279 kmem_free(item);
280out_put_ag:
281 xfs_filestream_put_ag(mp, *agp);
511 return err; 282 return err;
512} 283}
513 284
514/* 285static struct xfs_inode *
515 * xfs_filestream_unmount() is called when a file system that was mounted with 286xfs_filestream_get_parent(
516 * the filestream option is unmounted. It drains the data structures created 287 struct xfs_inode *ip)
517 * to track the file system's file streams and frees all the memory that was
518 * allocated.
519 */
520void
521xfs_filestream_unmount(
522 xfs_mount_t *mp)
523{ 288{
524 xfs_mru_cache_destroy(mp->m_filestream); 289 struct inode *inode = VFS_I(ip), *dir = NULL;
525} 290 struct dentry *dentry, *parent;
526 291
527/* 292 dentry = d_find_alias(inode);
528 * Return the AG of the filestream the file or directory belongs to, or 293 if (!dentry)
529 * NULLAGNUMBER otherwise. 294 goto out;
530 */
531xfs_agnumber_t
532xfs_filestream_lookup_ag(
533 xfs_inode_t *ip)
534{
535 xfs_mru_cache_t *cache;
536 fstrm_item_t *item;
537 xfs_agnumber_t ag;
538 int ref;
539
540 if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) {
541 ASSERT(0);
542 return NULLAGNUMBER;
543 }
544 295
545 cache = ip->i_mount->m_filestream; 296 parent = dget_parent(dentry);
546 item = xfs_mru_cache_lookup(cache, ip->i_ino); 297 if (!parent)
547 if (!item) { 298 goto out_dput;
548 TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0);
549 return NULLAGNUMBER;
550 }
551 299
552 ASSERT(ip == item->ip); 300 dir = igrab(parent->d_inode);
553 ag = item->ag; 301 dput(parent);
554 ref = xfs_filestream_peek_ag(ip->i_mount, ag);
555 xfs_mru_cache_done(cache);
556 302
557 TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref); 303out_dput:
558 return ag; 304 dput(dentry);
305out:
306 return dir ? XFS_I(dir) : NULL;
559} 307}
560 308
561/* 309/*
562 * xfs_filestream_associate() should only be called to associate a regular file 310 * Find the right allocation group for a file, either by finding an
563 * with its parent directory. Calling it with a child directory isn't 311 * existing file stream or creating a new one.
564 * appropriate because filestreams don't apply to entire directory hierarchies.
565 * Creating a file in a child directory of an existing filestream directory
566 * starts a new filestream with its own allocation group association.
567 * 312 *
568 * Returns < 0 on error, 0 if successful association occurred, > 0 if 313 * Returns NULLAGNUMBER in case of an error.
569 * we failed to get an association because of locking issues.
570 */ 314 */
571int 315xfs_agnumber_t
572xfs_filestream_associate( 316xfs_filestream_lookup_ag(
573 xfs_inode_t *pip, 317 struct xfs_inode *ip)
574 xfs_inode_t *ip)
575{ 318{
576 xfs_mount_t *mp; 319 struct xfs_mount *mp = ip->i_mount;
577 xfs_mru_cache_t *cache; 320 struct xfs_inode *pip = NULL;
578 fstrm_item_t *item; 321 xfs_agnumber_t startag, ag = NULLAGNUMBER;
579 xfs_agnumber_t ag, rotorstep, startag; 322 struct xfs_mru_cache_elem *mru;
580 int err = 0;
581 323
582 ASSERT(S_ISDIR(pip->i_d.di_mode));
583 ASSERT(S_ISREG(ip->i_d.di_mode)); 324 ASSERT(S_ISREG(ip->i_d.di_mode));
584 if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode))
585 return -EINVAL;
586 325
587 mp = pip->i_mount; 326 pip = xfs_filestream_get_parent(ip);
588 cache = mp->m_filestream; 327 if (!pip)
328 goto out;
589 329
590 /* 330 mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
591 * We have a problem, Houston. 331 if (mru) {
592 * 332 ag = container_of(mru, struct xfs_fstrm_item, mru)->ag;
593 * Taking the iolock here violates inode locking order - we already 333 xfs_mru_cache_done(mp->m_filestream);
594 * hold the ilock. Hence if we block getting this lock we may never
595 * wake. Unfortunately, that means if we can't get the lock, we're
596 * screwed in terms of getting a stream association - we can't spin
597 * waiting for the lock because someone else is waiting on the lock we
598 * hold and we cannot drop that as we are in a transaction here.
599 *
600 * Lucky for us, this inversion is not a problem because it's a
601 * directory inode that we are trying to lock here.
602 *
603 * So, if we can't get the iolock without sleeping then just give up
604 */
605 if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL))
606 return 1;
607
608 /* If the parent directory is already in the cache, use its AG. */
609 item = xfs_mru_cache_lookup(cache, pip->i_ino);
610 if (item) {
611 ASSERT(item->ip == pip);
612 ag = item->ag;
613 xfs_mru_cache_done(cache);
614
615 TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag));
616 err = _xfs_filestream_update_ag(ip, pip, ag);
617 334
618 goto exit; 335 trace_xfs_filestream_lookup(ip, ag);
336 goto out;
619 } 337 }
620 338
621 /* 339 /*
@@ -623,202 +341,94 @@ xfs_filestream_associate(
623 * use the directory inode's AG. 341 * use the directory inode's AG.
624 */ 342 */
625 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 343 if (mp->m_flags & XFS_MOUNT_32BITINODES) {
626 rotorstep = xfs_rotorstep; 344 xfs_agnumber_t rotorstep = xfs_rotorstep;
627 startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; 345 startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
628 mp->m_agfrotor = (mp->m_agfrotor + 1) % 346 mp->m_agfrotor = (mp->m_agfrotor + 1) %
629 (mp->m_sb.sb_agcount * rotorstep); 347 (mp->m_sb.sb_agcount * rotorstep);
630 } else 348 } else
631 startag = XFS_INO_TO_AGNO(mp, pip->i_ino); 349 startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
632 350
633 /* Pick a new AG for the parent inode starting at startag. */ 351 if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0))
634 err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0); 352 ag = NULLAGNUMBER;
635 if (err || ag == NULLAGNUMBER) 353out:
636 goto exit_did_pick; 354 IRELE(pip);
637 355 return ag;
638 /* Associate the parent inode with the AG. */
639 err = _xfs_filestream_update_ag(pip, NULL, ag);
640 if (err)
641 goto exit_did_pick;
642
643 /* Associate the file inode with the AG. */
644 err = _xfs_filestream_update_ag(ip, pip, ag);
645 if (err)
646 goto exit_did_pick;
647
648 TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag));
649
650exit_did_pick:
651 /*
652 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
653 * reference it took on it, since the file and directory will have taken
654 * their own now if they were successfully cached.
655 */
656 if (ag != NULLAGNUMBER)
657 xfs_filestream_put_ag(mp, ag);
658
659exit:
660 xfs_iunlock(pip, XFS_IOLOCK_EXCL);
661 return -err;
662} 356}
663 357
664/* 358/*
665 * Pick a new allocation group for the current file and its file stream. This 359 * Pick a new allocation group for the current file and its file stream.
666 * function is called by xfs_bmap_filestreams() with the mount point's per-ag 360 *
667 * lock held. 361 * This is called when the allocator can't find a suitable extent in the
362 * current AG, and we have to move the stream into a new AG with more space.
668 */ 363 */
669int 364int
670xfs_filestream_new_ag( 365xfs_filestream_new_ag(
671 struct xfs_bmalloca *ap, 366 struct xfs_bmalloca *ap,
672 xfs_agnumber_t *agp) 367 xfs_agnumber_t *agp)
673{ 368{
674 int flags, err; 369 struct xfs_inode *ip = ap->ip, *pip;
675 xfs_inode_t *ip, *pip = NULL; 370 struct xfs_mount *mp = ip->i_mount;
676 xfs_mount_t *mp; 371 xfs_extlen_t minlen = ap->length;
677 xfs_mru_cache_t *cache; 372 xfs_agnumber_t startag = 0;
678 xfs_extlen_t minlen; 373 int flags, err = 0;
679 fstrm_item_t *dir, *file; 374 struct xfs_mru_cache_elem *mru;
680 xfs_agnumber_t ag = NULLAGNUMBER;
681
682 ip = ap->ip;
683 mp = ip->i_mount;
684 cache = mp->m_filestream;
685 minlen = ap->length;
686 *agp = NULLAGNUMBER;
687 375
688 /* 376 *agp = NULLAGNUMBER;
689 * Look for the file in the cache, removing it if it's found. Doing
690 * this allows it to be held across the dir lookup that follows.
691 */
692 file = xfs_mru_cache_remove(cache, ip->i_ino);
693 if (file) {
694 ASSERT(ip == file->ip);
695
696 /* Save the file's parent inode and old AG number for later. */
697 pip = file->pip;
698 ag = file->ag;
699
700 /* Look for the file's directory in the cache. */
701 dir = xfs_mru_cache_lookup(cache, pip->i_ino);
702 if (dir) {
703 ASSERT(pip == dir->ip);
704
705 /*
706 * If the directory has already moved on to a new AG,
707 * use that AG as the new AG for the file. Don't
708 * forget to twiddle the AG refcounts to match the
709 * movement.
710 */
711 if (dir->ag != file->ag) {
712 xfs_filestream_put_ag(mp, file->ag);
713 xfs_filestream_get_ag(mp, dir->ag);
714 *agp = file->ag = dir->ag;
715 }
716
717 xfs_mru_cache_done(cache);
718 }
719 377
720 /* 378 pip = xfs_filestream_get_parent(ip);
721 * Put the file back in the cache. If this fails, the free 379 if (!pip)
722 * function needs to be called to tidy up in the same way as if 380 goto exit;
723 * the item had simply expired from the cache.
724 */
725 err = xfs_mru_cache_insert(cache, ip->i_ino, file);
726 if (err) {
727 xfs_fstrm_free_func(ip->i_ino, file);
728 return err;
729 }
730 381
731 /* 382 mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
732 * If the file's AG was moved to the directory's new AG, there's 383 if (mru) {
733 * nothing more to be done. 384 struct xfs_fstrm_item *item =
734 */ 385 container_of(mru, struct xfs_fstrm_item, mru);
735 if (*agp != NULLAGNUMBER) { 386 startag = (item->ag + 1) % mp->m_sb.sb_agcount;
736 TRACE_MOVEAG(mp, ip, pip,
737 ag, xfs_filestream_peek_ag(mp, ag),
738 *agp, xfs_filestream_peek_ag(mp, *agp));
739 return 0;
740 }
741 } 387 }
742 388
743 /*
744 * If the file's parent directory is known, take its iolock in exclusive
745 * mode to prevent two sibling files from racing each other to migrate
746 * themselves and their parent to different AGs.
747 *
748 * Note that we lock the parent directory iolock inside the child
749 * iolock here. That's fine as we never hold both parent and child
750 * iolock in any other place. This is different from the ilock,
751 * which requires locking of the child after the parent for namespace
752 * operations.
753 */
754 if (pip)
755 xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
756
757 /*
758 * A new AG needs to be found for the file. If the file's parent
759 * directory is also known, it will be moved to the new AG as well to
760 * ensure that files created inside it in future use the new AG.
761 */
762 ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
763 flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | 389 flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
764 (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); 390 (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0);
765 391
766 err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen); 392 err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
767 if (err || *agp == NULLAGNUMBER)
768 goto exit;
769 393
770 /* 394 /*
771 * If the file wasn't found in the file cache, then its parent directory 395 * Only free the item here so we skip over the old AG earlier.
772 * inode isn't known. For this to have happened, the file must either
773 * be pre-existing, or it was created long enough ago that its cache
774 * entry has expired. This isn't the sort of usage that the filestreams
775 * allocator is trying to optimise, so there's no point trying to track
776 * its new AG somehow in the filestream data structures.
777 */ 396 */
778 if (!pip) { 397 if (mru)
779 TRACE_ORPHAN(mp, ip, *agp); 398 xfs_fstrm_free_func(mru);
780 goto exit;
781 }
782
783 /* Associate the parent inode with the AG. */
784 err = _xfs_filestream_update_ag(pip, NULL, *agp);
785 if (err)
786 goto exit;
787
788 /* Associate the file inode with the AG. */
789 err = _xfs_filestream_update_ag(ip, pip, *agp);
790 if (err)
791 goto exit;
792
793 TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0,
794 *agp, xfs_filestream_peek_ag(mp, *agp));
795 399
400 IRELE(pip);
796exit: 401exit:
797 /* 402 if (*agp == NULLAGNUMBER)
798 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
799 * reference it took on it, since the file and directory will have taken
800 * their own now if they were successfully cached.
801 */
802 if (*agp != NULLAGNUMBER)
803 xfs_filestream_put_ag(mp, *agp);
804 else
805 *agp = 0; 403 *agp = 0;
806
807 if (pip)
808 xfs_iunlock(pip, XFS_IOLOCK_EXCL);
809
810 return err; 404 return err;
811} 405}
812 406
813/*
814 * Remove an association between an inode and a filestream object.
815 * Typically this is done on last close of an unlinked file.
816 */
817void 407void
818xfs_filestream_deassociate( 408xfs_filestream_deassociate(
819 xfs_inode_t *ip) 409 struct xfs_inode *ip)
820{ 410{
821 xfs_mru_cache_t *cache = ip->i_mount->m_filestream; 411 xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
412}
413
414int
415xfs_filestream_mount(
416 xfs_mount_t *mp)
417{
418 /*
419 * The filestream timer tunable is currently fixed within the range of
420 * one second to four minutes, with five seconds being the default. The
421 * group count is somewhat arbitrary, but it'd be nice to adhere to the
422 * timer tunable to within about 10 percent. This requires at least 10
423 * groups.
424 */
425 return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10,
426 10, xfs_fstrm_free_func);
427}
822 428
823 xfs_mru_cache_delete(cache, ip->i_ino); 429void
430xfs_filestream_unmount(
431 xfs_mount_t *mp)
432{
433 xfs_mru_cache_destroy(mp->m_filestream);
824} 434}
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 6d61dbee8564..2ef43406e53b 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -20,50 +20,20 @@
20 20
21struct xfs_mount; 21struct xfs_mount;
22struct xfs_inode; 22struct xfs_inode;
23struct xfs_perag;
24struct xfs_bmalloca; 23struct xfs_bmalloca;
25 24
26#ifdef XFS_FILESTREAMS_TRACE
27#define XFS_FSTRM_KTRACE_INFO 1
28#define XFS_FSTRM_KTRACE_AGSCAN 2
29#define XFS_FSTRM_KTRACE_AGPICK1 3
30#define XFS_FSTRM_KTRACE_AGPICK2 4
31#define XFS_FSTRM_KTRACE_UPDATE 5
32#define XFS_FSTRM_KTRACE_FREE 6
33#define XFS_FSTRM_KTRACE_ITEM_LOOKUP 7
34#define XFS_FSTRM_KTRACE_ASSOCIATE 8
35#define XFS_FSTRM_KTRACE_MOVEAG 9
36#define XFS_FSTRM_KTRACE_ORPHAN 10
37
38#define XFS_FSTRM_KTRACE_SIZE 16384
39extern ktrace_t *xfs_filestreams_trace_buf;
40
41#endif
42
43/* allocation selection flags */
44typedef enum xfs_fstrm_alloc {
45 XFS_PICK_USERDATA = 1,
46 XFS_PICK_LOWSPACE = 2,
47} xfs_fstrm_alloc_t;
48
49/* prototypes for filestream.c */
50int xfs_filestream_init(void);
51void xfs_filestream_uninit(void);
52int xfs_filestream_mount(struct xfs_mount *mp); 25int xfs_filestream_mount(struct xfs_mount *mp);
53void xfs_filestream_unmount(struct xfs_mount *mp); 26void xfs_filestream_unmount(struct xfs_mount *mp);
54xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
55int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
56void xfs_filestream_deassociate(struct xfs_inode *ip); 27void xfs_filestream_deassociate(struct xfs_inode *ip);
28xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
57int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); 29int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
30int xfs_filestream_peek_ag(struct xfs_mount *mp, xfs_agnumber_t agno);
58 31
59
60/* filestreams for the inode? */
61static inline int 32static inline int
62xfs_inode_is_filestream( 33xfs_inode_is_filestream(
63 struct xfs_inode *ip) 34 struct xfs_inode *ip)
64{ 35{
65 return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) || 36 return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
66 xfs_iflags_test(ip, XFS_IFILESTREAM) ||
67 (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM); 37 (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
68} 38}
69 39
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h
index 9898f31d05d8..34d85aca3058 100644
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/xfs_format.h
@@ -202,6 +202,8 @@ typedef __be32 xfs_alloc_ptr_t;
202 */ 202 */
203#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ 203#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
204#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ 204#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */
205#define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */
206#define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */
205 207
206typedef __uint64_t xfs_inofree_t; 208typedef __uint64_t xfs_inofree_t;
207#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) 209#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
@@ -244,7 +246,17 @@ typedef __be32 xfs_inobt_ptr_t;
244 * block numbers in the AG. 246 * block numbers in the AG.
245 */ 247 */
246#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) 248#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
247#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) 249#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
250
251/*
252 * The first data block of an AG depends on whether the filesystem was formatted
253 * with the finobt feature. If so, account for the finobt reserved root btree
254 * block.
255 */
256#define XFS_PREALLOC_BLOCKS(mp) \
257 (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
258 XFS_FIBT_BLOCK(mp) + 1 : \
259 XFS_IBT_BLOCK(mp) + 1)
248 260
249 261
250 262
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c5fc116dfaa3..d34703dbcb42 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -238,6 +238,7 @@ typedef struct xfs_fsop_resblks {
238#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ 238#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
239#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ 239#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
240#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ 240#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
241#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
241 242
242/* 243/*
243 * Minimum and maximum sizes need for growth checks. 244 * Minimum and maximum sizes need for growth checks.
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 02fb943cbf22..3445ead7c1fc 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -104,7 +104,9 @@ xfs_fs_geometry(
104 (xfs_sb_version_hascrc(&mp->m_sb) ? 104 (xfs_sb_version_hascrc(&mp->m_sb) ?
105 XFS_FSOP_GEOM_FLAGS_V5SB : 0) | 105 XFS_FSOP_GEOM_FLAGS_V5SB : 0) |
106 (xfs_sb_version_hasftype(&mp->m_sb) ? 106 (xfs_sb_version_hasftype(&mp->m_sb) ?
107 XFS_FSOP_GEOM_FLAGS_FTYPE : 0); 107 XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
108 (xfs_sb_version_hasfinobt(&mp->m_sb) ?
109 XFS_FSOP_GEOM_FLAGS_FINOBT : 0);
108 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? 110 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
109 mp->m_sb.sb_logsectsize : BBSIZE; 111 mp->m_sb.sb_logsectsize : BBSIZE;
110 geo->rtsectsize = mp->m_sb.sb_blocksize; 112 geo->rtsectsize = mp->m_sb.sb_blocksize;
@@ -316,6 +318,10 @@ xfs_growfs_data_private(
316 agi->agi_dirino = cpu_to_be32(NULLAGINO); 318 agi->agi_dirino = cpu_to_be32(NULLAGINO);
317 if (xfs_sb_version_hascrc(&mp->m_sb)) 319 if (xfs_sb_version_hascrc(&mp->m_sb))
318 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid); 320 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid);
321 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
322 agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
323 agi->agi_free_level = cpu_to_be32(1);
324 }
319 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) 325 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
320 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 326 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
321 327
@@ -407,6 +413,34 @@ xfs_growfs_data_private(
407 xfs_buf_relse(bp); 413 xfs_buf_relse(bp);
408 if (error) 414 if (error)
409 goto error0; 415 goto error0;
416
417 /*
418 * FINO btree root block
419 */
420 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
421 bp = xfs_growfs_get_hdr_buf(mp,
422 XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
423 BTOBB(mp->m_sb.sb_blocksize), 0,
424 &xfs_inobt_buf_ops);
425 if (!bp) {
426 error = ENOMEM;
427 goto error0;
428 }
429
430 if (xfs_sb_version_hascrc(&mp->m_sb))
431 xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC,
432 0, 0, agno,
433 XFS_BTREE_CRC_BLOCKS);
434 else
435 xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0,
436 0, agno, 0);
437
438 error = xfs_bwrite(bp);
439 xfs_buf_relse(bp);
440 if (error)
441 goto error0;
442 }
443
410 } 444 }
411 xfs_trans_agblocks_delta(tp, nfree); 445 xfs_trans_agblocks_delta(tp, nfree);
412 /* 446 /*
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 8f711db61a0c..6ac0c2986c32 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -112,6 +112,66 @@ xfs_inobt_get_rec(
112} 112}
113 113
114/* 114/*
115 * Insert a single inobt record. Cursor must already point to desired location.
116 */
117STATIC int
118xfs_inobt_insert_rec(
119 struct xfs_btree_cur *cur,
120 __int32_t freecount,
121 xfs_inofree_t free,
122 int *stat)
123{
124 cur->bc_rec.i.ir_freecount = freecount;
125 cur->bc_rec.i.ir_free = free;
126 return xfs_btree_insert(cur, stat);
127}
128
129/*
130 * Insert records describing a newly allocated inode chunk into the inobt.
131 */
132STATIC int
133xfs_inobt_insert(
134 struct xfs_mount *mp,
135 struct xfs_trans *tp,
136 struct xfs_buf *agbp,
137 xfs_agino_t newino,
138 xfs_agino_t newlen,
139 xfs_btnum_t btnum)
140{
141 struct xfs_btree_cur *cur;
142 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
143 xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
144 xfs_agino_t thisino;
145 int i;
146 int error;
147
148 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
149
150 for (thisino = newino;
151 thisino < newino + newlen;
152 thisino += XFS_INODES_PER_CHUNK) {
153 error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i);
154 if (error) {
155 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
156 return error;
157 }
158 ASSERT(i == 0);
159
160 error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
161 XFS_INOBT_ALL_FREE, &i);
162 if (error) {
163 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
164 return error;
165 }
166 ASSERT(i == 1);
167 }
168
169 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
170
171 return 0;
172}
173
174/*
115 * Verify that the number of free inodes in the AGI is correct. 175 * Verify that the number of free inodes in the AGI is correct.
116 */ 176 */
117#ifdef DEBUG 177#ifdef DEBUG
@@ -303,13 +363,10 @@ xfs_ialloc_ag_alloc(
303{ 363{
304 xfs_agi_t *agi; /* allocation group header */ 364 xfs_agi_t *agi; /* allocation group header */
305 xfs_alloc_arg_t args; /* allocation argument structure */ 365 xfs_alloc_arg_t args; /* allocation argument structure */
306 xfs_btree_cur_t *cur; /* inode btree cursor */
307 xfs_agnumber_t agno; 366 xfs_agnumber_t agno;
308 int error; 367 int error;
309 int i;
310 xfs_agino_t newino; /* new first inode's number */ 368 xfs_agino_t newino; /* new first inode's number */
311 xfs_agino_t newlen; /* new number of inodes */ 369 xfs_agino_t newlen; /* new number of inodes */
312 xfs_agino_t thisino; /* current inode number, for loop */
313 int isaligned = 0; /* inode allocation at stripe unit */ 370 int isaligned = 0; /* inode allocation at stripe unit */
314 /* boundary */ 371 /* boundary */
315 struct xfs_perag *pag; 372 struct xfs_perag *pag;
@@ -459,29 +516,19 @@ xfs_ialloc_ag_alloc(
459 agi->agi_newino = cpu_to_be32(newino); 516 agi->agi_newino = cpu_to_be32(newino);
460 517
461 /* 518 /*
462 * Insert records describing the new inode chunk into the btree. 519 * Insert records describing the new inode chunk into the btrees.
463 */ 520 */
464 cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); 521 error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
465 for (thisino = newino; 522 XFS_BTNUM_INO);
466 thisino < newino + newlen; 523 if (error)
467 thisino += XFS_INODES_PER_CHUNK) { 524 return error;
468 cur->bc_rec.i.ir_startino = thisino; 525
469 cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; 526 if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
470 cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; 527 error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
471 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); 528 XFS_BTNUM_FINO);
472 if (error) { 529 if (error)
473 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
474 return error;
475 }
476 ASSERT(i == 0);
477 error = xfs_btree_insert(cur, &i);
478 if (error) {
479 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
480 return error; 530 return error;
481 }
482 ASSERT(i == 1);
483 } 531 }
484 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
485 /* 532 /*
486 * Log allocation group header fields 533 * Log allocation group header fields
487 */ 534 */
@@ -675,13 +722,10 @@ xfs_ialloc_get_rec(
675} 722}
676 723
677/* 724/*
678 * Allocate an inode. 725 * Allocate an inode using the inobt-only algorithm.
679 *
680 * The caller selected an AG for us, and made sure that free inodes are
681 * available.
682 */ 726 */
683STATIC int 727STATIC int
684xfs_dialloc_ag( 728xfs_dialloc_ag_inobt(
685 struct xfs_trans *tp, 729 struct xfs_trans *tp,
686 struct xfs_buf *agbp, 730 struct xfs_buf *agbp,
687 xfs_ino_t parent, 731 xfs_ino_t parent,
@@ -707,7 +751,7 @@ xfs_dialloc_ag(
707 ASSERT(pag->pagi_freecount > 0); 751 ASSERT(pag->pagi_freecount > 0);
708 752
709 restart_pagno: 753 restart_pagno:
710 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 754 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
711 /* 755 /*
712 * If pagino is 0 (this is the root inode allocation) use newino. 756 * If pagino is 0 (this is the root inode allocation) use newino.
713 * This must work because we've just allocated some. 757 * This must work because we've just allocated some.
@@ -940,6 +984,294 @@ error0:
940} 984}
941 985
942/* 986/*
987 * Use the free inode btree to allocate an inode based on distance from the
988 * parent. Note that the provided cursor may be deleted and replaced.
989 */
990STATIC int
991xfs_dialloc_ag_finobt_near(
992 xfs_agino_t pagino,
993 struct xfs_btree_cur **ocur,
994 struct xfs_inobt_rec_incore *rec)
995{
996 struct xfs_btree_cur *lcur = *ocur; /* left search cursor */
997 struct xfs_btree_cur *rcur; /* right search cursor */
998 struct xfs_inobt_rec_incore rrec;
999 int error;
1000 int i, j;
1001
1002 error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i);
1003 if (error)
1004 return error;
1005
1006 if (i == 1) {
1007 error = xfs_inobt_get_rec(lcur, rec, &i);
1008 if (error)
1009 return error;
1010 XFS_WANT_CORRUPTED_RETURN(i == 1);
1011
1012 /*
1013 * See if we've landed in the parent inode record. The finobt
1014 * only tracks chunks with at least one free inode, so record
1015 * existence is enough.
1016 */
1017 if (pagino >= rec->ir_startino &&
1018 pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
1019 return 0;
1020 }
1021
1022 error = xfs_btree_dup_cursor(lcur, &rcur);
1023 if (error)
1024 return error;
1025
1026 error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j);
1027 if (error)
1028 goto error_rcur;
1029 if (j == 1) {
1030 error = xfs_inobt_get_rec(rcur, &rrec, &j);
1031 if (error)
1032 goto error_rcur;
1033 XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
1034 }
1035
1036 XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur);
1037 if (i == 1 && j == 1) {
1038 /*
1039 * Both the left and right records are valid. Choose the closer
1040 * inode chunk to the target.
1041 */
1042 if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
1043 (rrec.ir_startino - pagino)) {
1044 *rec = rrec;
1045 xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
1046 *ocur = rcur;
1047 } else {
1048 xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
1049 }
1050 } else if (j == 1) {
1051 /* only the right record is valid */
1052 *rec = rrec;
1053 xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
1054 *ocur = rcur;
1055 } else if (i == 1) {
1056 /* only the left record is valid */
1057 xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
1058 }
1059
1060 return 0;
1061
1062error_rcur:
1063 xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
1064 return error;
1065}
1066
1067/*
1068 * Use the free inode btree to find a free inode based on a newino hint. If
1069 * the hint is NULL, find the first free inode in the AG.
1070 */
1071STATIC int
1072xfs_dialloc_ag_finobt_newino(
1073 struct xfs_agi *agi,
1074 struct xfs_btree_cur *cur,
1075 struct xfs_inobt_rec_incore *rec)
1076{
1077 int error;
1078 int i;
1079
1080 if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
1081 error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ,
1082 &i);
1083 if (error)
1084 return error;
1085 if (i == 1) {
1086 error = xfs_inobt_get_rec(cur, rec, &i);
1087 if (error)
1088 return error;
1089 XFS_WANT_CORRUPTED_RETURN(i == 1);
1090
1091 return 0;
1092 }
1093 }
1094
1095 /*
1096 * Find the first inode available in the AG.
1097 */
1098 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
1099 if (error)
1100 return error;
1101 XFS_WANT_CORRUPTED_RETURN(i == 1);
1102
1103 error = xfs_inobt_get_rec(cur, rec, &i);
1104 if (error)
1105 return error;
1106 XFS_WANT_CORRUPTED_RETURN(i == 1);
1107
1108 return 0;
1109}
1110
1111/*
1112 * Update the inobt based on a modification made to the finobt. Also ensure that
1113 * the records from both trees are equivalent post-modification.
1114 */
1115STATIC int
1116xfs_dialloc_ag_update_inobt(
1117 struct xfs_btree_cur *cur, /* inobt cursor */
1118 struct xfs_inobt_rec_incore *frec, /* finobt record */
1119 int offset) /* inode offset */
1120{
1121 struct xfs_inobt_rec_incore rec;
1122 int error;
1123 int i;
1124
1125 error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
1126 if (error)
1127 return error;
1128 XFS_WANT_CORRUPTED_RETURN(i == 1);
1129
1130 error = xfs_inobt_get_rec(cur, &rec, &i);
1131 if (error)
1132 return error;
1133 XFS_WANT_CORRUPTED_RETURN(i == 1);
1134 ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
1135 XFS_INODES_PER_CHUNK) == 0);
1136
1137 rec.ir_free &= ~XFS_INOBT_MASK(offset);
1138 rec.ir_freecount--;
1139
1140 XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) &&
1141 (rec.ir_freecount == frec->ir_freecount));
1142
1143 error = xfs_inobt_update(cur, &rec);
1144 if (error)
1145 return error;
1146
1147 return 0;
1148}
1149
1150/*
1151 * Allocate an inode using the free inode btree, if available. Otherwise, fall
1152 * back to the inobt search algorithm.
1153 *
1154 * The caller selected an AG for us, and made sure that free inodes are
1155 * available.
1156 */
1157STATIC int
1158xfs_dialloc_ag(
1159 struct xfs_trans *tp,
1160 struct xfs_buf *agbp,
1161 xfs_ino_t parent,
1162 xfs_ino_t *inop)
1163{
1164 struct xfs_mount *mp = tp->t_mountp;
1165 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
1166 xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
1167 xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent);
1168 xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent);
1169 struct xfs_perag *pag;
1170 struct xfs_btree_cur *cur; /* finobt cursor */
1171 struct xfs_btree_cur *icur; /* inobt cursor */
1172 struct xfs_inobt_rec_incore rec;
1173 xfs_ino_t ino;
1174 int error;
1175 int offset;
1176 int i;
1177
1178 if (!xfs_sb_version_hasfinobt(&mp->m_sb))
1179 return xfs_dialloc_ag_inobt(tp, agbp, parent, inop);
1180
1181 pag = xfs_perag_get(mp, agno);
1182
1183 /*
1184 * If pagino is 0 (this is the root inode allocation) use newino.
1185 * This must work because we've just allocated some.
1186 */
1187 if (!pagino)
1188 pagino = be32_to_cpu(agi->agi_newino);
1189
1190 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
1191
1192 error = xfs_check_agi_freecount(cur, agi);
1193 if (error)
1194 goto error_cur;
1195
1196 /*
1197 * The search algorithm depends on whether we're in the same AG as the
1198 * parent. If so, find the closest available inode to the parent. If
1199 * not, consider the agi hint or find the first free inode in the AG.
1200 */
1201 if (agno == pagno)
1202 error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
1203 else
1204 error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
1205 if (error)
1206 goto error_cur;
1207
1208 offset = xfs_lowbit64(rec.ir_free);
1209 ASSERT(offset >= 0);
1210 ASSERT(offset < XFS_INODES_PER_CHUNK);
1211 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
1212 XFS_INODES_PER_CHUNK) == 0);
1213 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
1214
1215 /*
1216 * Modify or remove the finobt record.
1217 */
1218 rec.ir_free &= ~XFS_INOBT_MASK(offset);
1219 rec.ir_freecount--;
1220 if (rec.ir_freecount)
1221 error = xfs_inobt_update(cur, &rec);
1222 else
1223 error = xfs_btree_delete(cur, &i);
1224 if (error)
1225 goto error_cur;
1226
1227 /*
1228 * The finobt has now been updated appropriately. We haven't updated the
1229 * agi and superblock yet, so we can create an inobt cursor and validate
1230 * the original freecount. If all is well, make the equivalent update to
1231 * the inobt using the finobt record and offset information.
1232 */
1233 icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
1234
1235 error = xfs_check_agi_freecount(icur, agi);
1236 if (error)
1237 goto error_icur;
1238
1239 error = xfs_dialloc_ag_update_inobt(icur, &rec, offset);
1240 if (error)
1241 goto error_icur;
1242
1243 /*
1244 * Both trees have now been updated. We must update the perag and
1245 * superblock before we can check the freecount for each btree.
1246 */
1247 be32_add_cpu(&agi->agi_freecount, -1);
1248 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1249 pag->pagi_freecount--;
1250
1251 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
1252
1253 error = xfs_check_agi_freecount(icur, agi);
1254 if (error)
1255 goto error_icur;
1256 error = xfs_check_agi_freecount(cur, agi);
1257 if (error)
1258 goto error_icur;
1259
1260 xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR);
1261 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1262 xfs_perag_put(pag);
1263 *inop = ino;
1264 return 0;
1265
1266error_icur:
1267 xfs_btree_del_cursor(icur, XFS_BTREE_ERROR);
1268error_cur:
1269 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1270 xfs_perag_put(pag);
1271 return error;
1272}
1273
1274/*
943 * Allocate an inode on disk. 1275 * Allocate an inode on disk.
944 * 1276 *
945 * Mode is used to tell whether the new inode will need space, and whether it 1277 * Mode is used to tell whether the new inode will need space, and whether it
@@ -1098,78 +1430,34 @@ out_error:
1098 return XFS_ERROR(error); 1430 return XFS_ERROR(error);
1099} 1431}
1100 1432
1101/* 1433STATIC int
1102 * Free disk inode. Carefully avoids touching the incore inode, all 1434xfs_difree_inobt(
1103 * manipulations incore are the caller's responsibility. 1435 struct xfs_mount *mp,
1104 * The on-disk inode is not changed by this operation, only the 1436 struct xfs_trans *tp,
1105 * btree (free inode mask) is changed. 1437 struct xfs_buf *agbp,
1106 */ 1438 xfs_agino_t agino,
1107int 1439 struct xfs_bmap_free *flist,
1108xfs_difree( 1440 int *delete,
1109 xfs_trans_t *tp, /* transaction pointer */ 1441 xfs_ino_t *first_ino,
1110 xfs_ino_t inode, /* inode to be freed */ 1442 struct xfs_inobt_rec_incore *orec)
1111 xfs_bmap_free_t *flist, /* extents to free */
1112 int *delete, /* set if inode cluster was deleted */
1113 xfs_ino_t *first_ino) /* first inode in deleted cluster */
1114{ 1443{
1115 /* REFERENCED */ 1444 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
1116 xfs_agblock_t agbno; /* block number containing inode */ 1445 xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
1117 xfs_buf_t *agbp; /* buffer containing allocation group header */ 1446 struct xfs_perag *pag;
1118 xfs_agino_t agino; /* inode number relative to allocation group */ 1447 struct xfs_btree_cur *cur;
1119 xfs_agnumber_t agno; /* allocation group number */ 1448 struct xfs_inobt_rec_incore rec;
1120 xfs_agi_t *agi; /* allocation group header */ 1449 int ilen;
1121 xfs_btree_cur_t *cur; /* inode btree cursor */ 1450 int error;
1122 int error; /* error return value */ 1451 int i;
1123 int i; /* result code */ 1452 int off;
1124 int ilen; /* inodes in an inode cluster */
1125 xfs_mount_t *mp; /* mount structure for filesystem */
1126 int off; /* offset of inode in inode chunk */
1127 xfs_inobt_rec_incore_t rec; /* btree record */
1128 struct xfs_perag *pag;
1129
1130 mp = tp->t_mountp;
1131 1453
1132 /*
1133 * Break up inode number into its components.
1134 */
1135 agno = XFS_INO_TO_AGNO(mp, inode);
1136 if (agno >= mp->m_sb.sb_agcount) {
1137 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
1138 __func__, agno, mp->m_sb.sb_agcount);
1139 ASSERT(0);
1140 return XFS_ERROR(EINVAL);
1141 }
1142 agino = XFS_INO_TO_AGINO(mp, inode);
1143 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
1144 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
1145 __func__, (unsigned long long)inode,
1146 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
1147 ASSERT(0);
1148 return XFS_ERROR(EINVAL);
1149 }
1150 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1151 if (agbno >= mp->m_sb.sb_agblocks) {
1152 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
1153 __func__, agbno, mp->m_sb.sb_agblocks);
1154 ASSERT(0);
1155 return XFS_ERROR(EINVAL);
1156 }
1157 /*
1158 * Get the allocation group header.
1159 */
1160 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1161 if (error) {
1162 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
1163 __func__, error);
1164 return error;
1165 }
1166 agi = XFS_BUF_TO_AGI(agbp);
1167 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1454 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
1168 ASSERT(agbno < be32_to_cpu(agi->agi_length)); 1455 ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length));
1456
1169 /* 1457 /*
1170 * Initialize the cursor. 1458 * Initialize the cursor.
1171 */ 1459 */
1172 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1460 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
1173 1461
1174 error = xfs_check_agi_freecount(cur, agi); 1462 error = xfs_check_agi_freecount(cur, agi);
1175 if (error) 1463 if (error)
@@ -1261,6 +1549,7 @@ xfs_difree(
1261 if (error) 1549 if (error)
1262 goto error0; 1550 goto error0;
1263 1551
1552 *orec = rec;
1264 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1553 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1265 return 0; 1554 return 0;
1266 1555
@@ -1269,6 +1558,182 @@ error0:
1269 return error; 1558 return error;
1270} 1559}
1271 1560
1561/*
1562 * Free an inode in the free inode btree.
1563 */
1564STATIC int
1565xfs_difree_finobt(
1566 struct xfs_mount *mp,
1567 struct xfs_trans *tp,
1568 struct xfs_buf *agbp,
1569 xfs_agino_t agino,
1570 struct xfs_inobt_rec_incore *ibtrec) /* inobt record */
1571{
1572 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
1573 xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
1574 struct xfs_btree_cur *cur;
1575 struct xfs_inobt_rec_incore rec;
1576 int offset = agino - ibtrec->ir_startino;
1577 int error;
1578 int i;
1579
1580 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
1581
1582 error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
1583 if (error)
1584 goto error;
1585 if (i == 0) {
1586 /*
1587 * If the record does not exist in the finobt, we must have just
1588 * freed an inode in a previously fully allocated chunk. If not,
1589 * something is out of sync.
1590 */
1591 XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error);
1592
1593 error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
1594 ibtrec->ir_free, &i);
1595 if (error)
1596 goto error;
1597 ASSERT(i == 1);
1598
1599 goto out;
1600 }
1601
1602 /*
1603 * Read and update the existing record. We could just copy the ibtrec
1604 * across here, but that would defeat the purpose of having redundant
1605 * metadata. By making the modifications independently, we can catch
1606 * corruptions that we wouldn't see if we just copied from one record
1607 * to another.
1608 */
1609 error = xfs_inobt_get_rec(cur, &rec, &i);
1610 if (error)
1611 goto error;
1612 XFS_WANT_CORRUPTED_GOTO(i == 1, error);
1613
1614 rec.ir_free |= XFS_INOBT_MASK(offset);
1615 rec.ir_freecount++;
1616
1617 XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) &&
1618 (rec.ir_freecount == ibtrec->ir_freecount),
1619 error);
1620
1621 /*
1622 * The content of inobt records should always match between the inobt
1623 * and finobt. The lifecycle of records in the finobt is different from
1624 * the inobt in that the finobt only tracks records with at least one
1625 * free inode. Hence, if all of the inodes are free and we aren't
1626 * keeping inode chunks permanently on disk, remove the record.
1627 * Otherwise, update the record with the new information.
1628 */
1629 if (rec.ir_freecount == mp->m_ialloc_inos &&
1630 !(mp->m_flags & XFS_MOUNT_IKEEP)) {
1631 error = xfs_btree_delete(cur, &i);
1632 if (error)
1633 goto error;
1634 ASSERT(i == 1);
1635 } else {
1636 error = xfs_inobt_update(cur, &rec);
1637 if (error)
1638 goto error;
1639 }
1640
1641out:
1642 error = xfs_check_agi_freecount(cur, agi);
1643 if (error)
1644 goto error;
1645
1646 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1647 return 0;
1648
1649error:
1650 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1651 return error;
1652}
1653
1654/*
1655 * Free disk inode. Carefully avoids touching the incore inode, all
1656 * manipulations incore are the caller's responsibility.
1657 * The on-disk inode is not changed by this operation, only the
1658 * btree (free inode mask) is changed.
1659 */
1660int
1661xfs_difree(
1662 struct xfs_trans *tp, /* transaction pointer */
1663 xfs_ino_t inode, /* inode to be freed */
1664 struct xfs_bmap_free *flist, /* extents to free */
1665 int *delete,/* set if inode cluster was deleted */
1666 xfs_ino_t *first_ino)/* first inode in deleted cluster */
1667{
1668 /* REFERENCED */
1669 xfs_agblock_t agbno; /* block number containing inode */
1670 struct xfs_buf *agbp; /* buffer for allocation group header */
1671 xfs_agino_t agino; /* allocation group inode number */
1672 xfs_agnumber_t agno; /* allocation group number */
1673 int error; /* error return value */
1674 struct xfs_mount *mp; /* mount structure for filesystem */
1675 struct xfs_inobt_rec_incore rec;/* btree record */
1676
1677 mp = tp->t_mountp;
1678
1679 /*
1680 * Break up inode number into its components.
1681 */
1682 agno = XFS_INO_TO_AGNO(mp, inode);
1683 if (agno >= mp->m_sb.sb_agcount) {
1684 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
1685 __func__, agno, mp->m_sb.sb_agcount);
1686 ASSERT(0);
1687 return XFS_ERROR(EINVAL);
1688 }
1689 agino = XFS_INO_TO_AGINO(mp, inode);
1690 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
1691 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
1692 __func__, (unsigned long long)inode,
1693 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
1694 ASSERT(0);
1695 return XFS_ERROR(EINVAL);
1696 }
1697 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1698 if (agbno >= mp->m_sb.sb_agblocks) {
1699 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
1700 __func__, agbno, mp->m_sb.sb_agblocks);
1701 ASSERT(0);
1702 return XFS_ERROR(EINVAL);
1703 }
1704 /*
1705 * Get the allocation group header.
1706 */
1707 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1708 if (error) {
1709 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
1710 __func__, error);
1711 return error;
1712 }
1713
1714 /*
1715 * Fix up the inode allocation btree.
1716 */
1717 error = xfs_difree_inobt(mp, tp, agbp, agino, flist, delete, first_ino,
1718 &rec);
1719 if (error)
1720 goto error0;
1721
1722 /*
1723 * Fix up the free inode btree.
1724 */
1725 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
1726 error = xfs_difree_finobt(mp, tp, agbp, agino, &rec);
1727 if (error)
1728 goto error0;
1729 }
1730
1731 return 0;
1732
1733error0:
1734 return error;
1735}
1736
1272STATIC int 1737STATIC int
1273xfs_imap_lookup( 1738xfs_imap_lookup(
1274 struct xfs_mount *mp, 1739 struct xfs_mount *mp,
@@ -1300,7 +1765,7 @@ xfs_imap_lookup(
1300 * we have a record, we need to ensure it contains the inode number 1765 * we have a record, we need to ensure it contains the inode number
1301 * we are looking up. 1766 * we are looking up.
1302 */ 1767 */
1303 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1768 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
1304 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); 1769 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1305 if (!error) { 1770 if (!error) {
1306 if (i) 1771 if (i)
@@ -1488,7 +1953,16 @@ xfs_ialloc_compute_maxlevels(
1488} 1953}
1489 1954
1490/* 1955/*
1491 * Log specified fields for the ag hdr (inode section) 1956 * Log specified fields for the ag hdr (inode section). The growth of the agi
1957 * structure over time requires that we interpret the buffer as two logical
1958 * regions delineated by the end of the unlinked list. This is due to the size
1959 * of the hash table and its location in the middle of the agi.
1960 *
1961 * For example, a request to log a field before agi_unlinked and a field after
1962 * agi_unlinked could cause us to log the entire hash table and use an excessive
1963 * amount of log space. To avoid this behavior, log the region up through
1964 * agi_unlinked in one call and the region after agi_unlinked through the end of
1965 * the structure in another.
1492 */ 1966 */
1493void 1967void
1494xfs_ialloc_log_agi( 1968xfs_ialloc_log_agi(
@@ -1511,6 +1985,8 @@ xfs_ialloc_log_agi(
1511 offsetof(xfs_agi_t, agi_newino), 1985 offsetof(xfs_agi_t, agi_newino),
1512 offsetof(xfs_agi_t, agi_dirino), 1986 offsetof(xfs_agi_t, agi_dirino),
1513 offsetof(xfs_agi_t, agi_unlinked), 1987 offsetof(xfs_agi_t, agi_unlinked),
1988 offsetof(xfs_agi_t, agi_free_root),
1989 offsetof(xfs_agi_t, agi_free_level),
1514 sizeof(xfs_agi_t) 1990 sizeof(xfs_agi_t)
1515 }; 1991 };
1516#ifdef DEBUG 1992#ifdef DEBUG
@@ -1519,15 +1995,30 @@ xfs_ialloc_log_agi(
1519 agi = XFS_BUF_TO_AGI(bp); 1995 agi = XFS_BUF_TO_AGI(bp);
1520 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1996 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
1521#endif 1997#endif
1998
1999 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
2000
1522 /* 2001 /*
1523 * Compute byte offsets for the first and last fields. 2002 * Compute byte offsets for the first and last fields in the first
2003 * region and log the agi buffer. This only logs up through
2004 * agi_unlinked.
1524 */ 2005 */
1525 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); 2006 if (fields & XFS_AGI_ALL_BITS_R1) {
2007 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
2008 &first, &last);
2009 xfs_trans_log_buf(tp, bp, first, last);
2010 }
2011
1526 /* 2012 /*
1527 * Log the allocation group inode header buffer. 2013 * Mask off the bits in the first region and calculate the first and
2014 * last field offsets for any bits in the second region.
1528 */ 2015 */
1529 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); 2016 fields &= ~XFS_AGI_ALL_BITS_R1;
1530 xfs_trans_log_buf(tp, bp, first, last); 2017 if (fields) {
2018 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
2019 &first, &last);
2020 xfs_trans_log_buf(tp, bp, first, last);
2021 }
1531} 2022}
1532 2023
1533#ifdef DEBUG 2024#ifdef DEBUG
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 7e309b11e87d..726f83a681a5 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -49,7 +49,8 @@ xfs_inobt_dup_cursor(
49 struct xfs_btree_cur *cur) 49 struct xfs_btree_cur *cur)
50{ 50{
51 return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, 51 return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
52 cur->bc_private.a.agbp, cur->bc_private.a.agno); 52 cur->bc_private.a.agbp, cur->bc_private.a.agno,
53 cur->bc_btnum);
53} 54}
54 55
55STATIC void 56STATIC void
@@ -66,12 +67,26 @@ xfs_inobt_set_root(
66 xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); 67 xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
67} 68}
68 69
70STATIC void
71xfs_finobt_set_root(
72 struct xfs_btree_cur *cur,
73 union xfs_btree_ptr *nptr,
74 int inc) /* level change */
75{
76 struct xfs_buf *agbp = cur->bc_private.a.agbp;
77 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
78
79 agi->agi_free_root = nptr->s;
80 be32_add_cpu(&agi->agi_free_level, inc);
81 xfs_ialloc_log_agi(cur->bc_tp, agbp,
82 XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL);
83}
84
69STATIC int 85STATIC int
70xfs_inobt_alloc_block( 86xfs_inobt_alloc_block(
71 struct xfs_btree_cur *cur, 87 struct xfs_btree_cur *cur,
72 union xfs_btree_ptr *start, 88 union xfs_btree_ptr *start,
73 union xfs_btree_ptr *new, 89 union xfs_btree_ptr *new,
74 int length,
75 int *stat) 90 int *stat)
76{ 91{
77 xfs_alloc_arg_t args; /* block allocation args */ 92 xfs_alloc_arg_t args; /* block allocation args */
@@ -173,6 +188,17 @@ xfs_inobt_init_ptr_from_cur(
173 ptr->s = agi->agi_root; 188 ptr->s = agi->agi_root;
174} 189}
175 190
191STATIC void
192xfs_finobt_init_ptr_from_cur(
193 struct xfs_btree_cur *cur,
194 union xfs_btree_ptr *ptr)
195{
196 struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
197
198 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
199 ptr->s = agi->agi_free_root;
200}
201
176STATIC __int64_t 202STATIC __int64_t
177xfs_inobt_key_diff( 203xfs_inobt_key_diff(
178 struct xfs_btree_cur *cur, 204 struct xfs_btree_cur *cur,
@@ -203,6 +229,7 @@ xfs_inobt_verify(
203 */ 229 */
204 switch (block->bb_magic) { 230 switch (block->bb_magic) {
205 case cpu_to_be32(XFS_IBT_CRC_MAGIC): 231 case cpu_to_be32(XFS_IBT_CRC_MAGIC):
232 case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
206 if (!xfs_sb_version_hascrc(&mp->m_sb)) 233 if (!xfs_sb_version_hascrc(&mp->m_sb))
207 return false; 234 return false;
208 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) 235 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
@@ -214,6 +241,7 @@ xfs_inobt_verify(
214 return false; 241 return false;
215 /* fall through */ 242 /* fall through */
216 case cpu_to_be32(XFS_IBT_MAGIC): 243 case cpu_to_be32(XFS_IBT_MAGIC):
244 case cpu_to_be32(XFS_FIBT_MAGIC):
217 break; 245 break;
218 default: 246 default:
219 return 0; 247 return 0;
@@ -317,6 +345,28 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
317#endif 345#endif
318}; 346};
319 347
348static const struct xfs_btree_ops xfs_finobt_ops = {
349 .rec_len = sizeof(xfs_inobt_rec_t),
350 .key_len = sizeof(xfs_inobt_key_t),
351
352 .dup_cursor = xfs_inobt_dup_cursor,
353 .set_root = xfs_finobt_set_root,
354 .alloc_block = xfs_inobt_alloc_block,
355 .free_block = xfs_inobt_free_block,
356 .get_minrecs = xfs_inobt_get_minrecs,
357 .get_maxrecs = xfs_inobt_get_maxrecs,
358 .init_key_from_rec = xfs_inobt_init_key_from_rec,
359 .init_rec_from_key = xfs_inobt_init_rec_from_key,
360 .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
361 .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
362 .key_diff = xfs_inobt_key_diff,
363 .buf_ops = &xfs_inobt_buf_ops,
364#if defined(DEBUG) || defined(XFS_WARN)
365 .keys_inorder = xfs_inobt_keys_inorder,
366 .recs_inorder = xfs_inobt_recs_inorder,
367#endif
368};
369
320/* 370/*
321 * Allocate a new inode btree cursor. 371 * Allocate a new inode btree cursor.
322 */ 372 */
@@ -325,7 +375,8 @@ xfs_inobt_init_cursor(
325 struct xfs_mount *mp, /* file system mount point */ 375 struct xfs_mount *mp, /* file system mount point */
326 struct xfs_trans *tp, /* transaction pointer */ 376 struct xfs_trans *tp, /* transaction pointer */
327 struct xfs_buf *agbp, /* buffer for agi structure */ 377 struct xfs_buf *agbp, /* buffer for agi structure */
328 xfs_agnumber_t agno) /* allocation group number */ 378 xfs_agnumber_t agno, /* allocation group number */
379 xfs_btnum_t btnum) /* ialloc or free ino btree */
329{ 380{
330 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 381 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
331 struct xfs_btree_cur *cur; 382 struct xfs_btree_cur *cur;
@@ -334,11 +385,17 @@ xfs_inobt_init_cursor(
334 385
335 cur->bc_tp = tp; 386 cur->bc_tp = tp;
336 cur->bc_mp = mp; 387 cur->bc_mp = mp;
337 cur->bc_nlevels = be32_to_cpu(agi->agi_level); 388 cur->bc_btnum = btnum;
338 cur->bc_btnum = XFS_BTNUM_INO; 389 if (btnum == XFS_BTNUM_INO) {
390 cur->bc_nlevels = be32_to_cpu(agi->agi_level);
391 cur->bc_ops = &xfs_inobt_ops;
392 } else {
393 cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
394 cur->bc_ops = &xfs_finobt_ops;
395 }
396
339 cur->bc_blocklog = mp->m_sb.sb_blocklog; 397 cur->bc_blocklog = mp->m_sb.sb_blocklog;
340 398
341 cur->bc_ops = &xfs_inobt_ops;
342 if (xfs_sb_version_hascrc(&mp->m_sb)) 399 if (xfs_sb_version_hascrc(&mp->m_sb))
343 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; 400 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
344 401
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index f38b22011c4e..d7ebea72c2d0 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -58,7 +58,8 @@ struct xfs_mount;
58 ((index) - 1) * sizeof(xfs_inobt_ptr_t))) 58 ((index) - 1) * sizeof(xfs_inobt_ptr_t)))
59 59
60extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, 60extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
61 struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); 61 struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t,
62 xfs_btnum_t);
62extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); 63extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
63 64
64#endif /* __XFS_IALLOC_BTREE_H__ */ 65#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 98d35244eecc..c48df5f25b9f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -507,8 +507,7 @@ STATIC int
507xfs_inode_ag_walk( 507xfs_inode_ag_walk(
508 struct xfs_mount *mp, 508 struct xfs_mount *mp,
509 struct xfs_perag *pag, 509 struct xfs_perag *pag,
510 int (*execute)(struct xfs_inode *ip, 510 int (*execute)(struct xfs_inode *ip, int flags,
511 struct xfs_perag *pag, int flags,
512 void *args), 511 void *args),
513 int flags, 512 int flags,
514 void *args, 513 void *args,
@@ -582,7 +581,7 @@ restart:
582 for (i = 0; i < nr_found; i++) { 581 for (i = 0; i < nr_found; i++) {
583 if (!batch[i]) 582 if (!batch[i])
584 continue; 583 continue;
585 error = execute(batch[i], pag, flags, args); 584 error = execute(batch[i], flags, args);
586 IRELE(batch[i]); 585 IRELE(batch[i]);
587 if (error == EAGAIN) { 586 if (error == EAGAIN) {
588 skipped++; 587 skipped++;
@@ -636,8 +635,7 @@ xfs_eofblocks_worker(
636int 635int
637xfs_inode_ag_iterator( 636xfs_inode_ag_iterator(
638 struct xfs_mount *mp, 637 struct xfs_mount *mp,
639 int (*execute)(struct xfs_inode *ip, 638 int (*execute)(struct xfs_inode *ip, int flags,
640 struct xfs_perag *pag, int flags,
641 void *args), 639 void *args),
642 int flags, 640 int flags,
643 void *args) 641 void *args)
@@ -664,8 +662,7 @@ xfs_inode_ag_iterator(
664int 662int
665xfs_inode_ag_iterator_tag( 663xfs_inode_ag_iterator_tag(
666 struct xfs_mount *mp, 664 struct xfs_mount *mp,
667 int (*execute)(struct xfs_inode *ip, 665 int (*execute)(struct xfs_inode *ip, int flags,
668 struct xfs_perag *pag, int flags,
669 void *args), 666 void *args),
670 int flags, 667 int flags,
671 void *args, 668 void *args,
@@ -1209,7 +1206,6 @@ xfs_inode_match_id(
1209STATIC int 1206STATIC int
1210xfs_inode_free_eofblocks( 1207xfs_inode_free_eofblocks(
1211 struct xfs_inode *ip, 1208 struct xfs_inode *ip,
1212 struct xfs_perag *pag,
1213 int flags, 1209 int flags,
1214 void *args) 1210 void *args)
1215{ 1211{
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 9ed68bb750f5..9cf017b899be 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -60,12 +60,10 @@ int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);
60void xfs_eofblocks_worker(struct work_struct *); 60void xfs_eofblocks_worker(struct work_struct *);
61 61
62int xfs_inode_ag_iterator(struct xfs_mount *mp, 62int xfs_inode_ag_iterator(struct xfs_mount *mp,
63 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, 63 int (*execute)(struct xfs_inode *ip, int flags, void *args),
64 int flags, void *args),
65 int flags, void *args); 64 int flags, void *args);
66int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, 65int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
67 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, 66 int (*execute)(struct xfs_inode *ip, int flags, void *args),
68 int flags, void *args),
69 int flags, void *args, int tag); 67 int flags, void *args, int tag);
70 68
71static inline int 69static inline int
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5e7a38fa6ee6..6d6b44a508f9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -655,7 +655,6 @@ xfs_ialloc(
655 uint flags; 655 uint flags;
656 int error; 656 int error;
657 timespec_t tv; 657 timespec_t tv;
658 int filestreams = 0;
659 658
660 /* 659 /*
661 * Call the space management code to pick 660 * Call the space management code to pick
@@ -772,13 +771,6 @@ xfs_ialloc(
772 flags |= XFS_ILOG_DEV; 771 flags |= XFS_ILOG_DEV;
773 break; 772 break;
774 case S_IFREG: 773 case S_IFREG:
775 /*
776 * we can't set up filestreams until after the VFS inode
777 * is set up properly.
778 */
779 if (pip && xfs_inode_is_filestream(pip))
780 filestreams = 1;
781 /* fall through */
782 case S_IFDIR: 774 case S_IFDIR:
783 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 775 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
784 uint di_flags = 0; 776 uint di_flags = 0;
@@ -844,15 +836,6 @@ xfs_ialloc(
844 /* now that we have an i_mode we can setup inode ops and unlock */ 836 /* now that we have an i_mode we can setup inode ops and unlock */
845 xfs_setup_inode(ip); 837 xfs_setup_inode(ip);
846 838
847 /* now we have set up the vfs inode we can associate the filestream */
848 if (filestreams) {
849 error = xfs_filestream_associate(pip, ip);
850 if (error < 0)
851 return -error;
852 if (!error)
853 xfs_iflags_set(ip, XFS_IFILESTREAM);
854 }
855
856 *ipp = ip; 839 *ipp = ip;
857 return 0; 840 return 0;
858} 841}
@@ -1334,7 +1317,8 @@ int
1334xfs_create_tmpfile( 1317xfs_create_tmpfile(
1335 struct xfs_inode *dp, 1318 struct xfs_inode *dp,
1336 struct dentry *dentry, 1319 struct dentry *dentry,
1337 umode_t mode) 1320 umode_t mode,
1321 struct xfs_inode **ipp)
1338{ 1322{
1339 struct xfs_mount *mp = dp->i_mount; 1323 struct xfs_mount *mp = dp->i_mount;
1340 struct xfs_inode *ip = NULL; 1324 struct xfs_inode *ip = NULL;
@@ -1402,7 +1386,6 @@ xfs_create_tmpfile(
1402 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 1386 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
1403 1387
1404 ip->i_d.di_nlink--; 1388 ip->i_d.di_nlink--;
1405 d_tmpfile(dentry, VFS_I(ip));
1406 error = xfs_iunlink(tp, ip); 1389 error = xfs_iunlink(tp, ip);
1407 if (error) 1390 if (error)
1408 goto out_trans_abort; 1391 goto out_trans_abort;
@@ -1415,6 +1398,7 @@ xfs_create_tmpfile(
1415 xfs_qm_dqrele(gdqp); 1398 xfs_qm_dqrele(gdqp);
1416 xfs_qm_dqrele(pdqp); 1399 xfs_qm_dqrele(pdqp);
1417 1400
1401 *ipp = ip;
1418 return 0; 1402 return 0;
1419 1403
1420 out_trans_abort: 1404 out_trans_abort:
@@ -1698,16 +1682,6 @@ xfs_release(
1698 int truncated; 1682 int truncated;
1699 1683
1700 /* 1684 /*
1701 * If we are using filestreams, and we have an unlinked
1702 * file that we are processing the last close on, then nothing
1703 * will be able to reopen and write to this file. Purge this
1704 * inode from the filestreams cache so that it doesn't delay
1705 * teardown of the inode.
1706 */
1707 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
1708 xfs_filestream_deassociate(ip);
1709
1710 /*
1711 * If we previously truncated this file and removed old data 1685 * If we previously truncated this file and removed old data
1712 * in the process, we want to initiate "early" writeout on 1686 * in the process, we want to initiate "early" writeout on
1713 * the last close. This is an attempt to combat the notorious 1687 * the last close. This is an attempt to combat the notorious
@@ -1837,9 +1811,33 @@ xfs_inactive_ifree(
1837 int error; 1811 int error;
1838 1812
1839 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1813 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1840 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); 1814
1815 /*
1816 * The ifree transaction might need to allocate blocks for record
1817 * insertion to the finobt. We don't want to fail here at ENOSPC, so
1818 * allow ifree to dip into the reserved block pool if necessary.
1819 *
1820 * Freeing large sets of inodes generally means freeing inode chunks,
1821 * directory and file data blocks, so this should be relatively safe.
1822 * Only under severe circumstances should it be possible to free enough
1823 * inodes to exhaust the reserve block pool via finobt expansion while
1824 * at the same time not creating free space in the filesystem.
1825 *
1826 * Send a warning if the reservation does happen to fail, as the inode
1827 * now remains allocated and sits on the unlinked list until the fs is
1828 * repaired.
1829 */
1830 tp->t_flags |= XFS_TRANS_RESERVE;
1831 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree,
1832 XFS_IFREE_SPACE_RES(mp), 0);
1841 if (error) { 1833 if (error) {
1842 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1834 if (error == ENOSPC) {
1835 xfs_warn_ratelimited(mp,
1836 "Failed to remove inode(s) from unlinked list. "
1837 "Please free space, unmount and run xfs_repair.");
1838 } else {
1839 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1840 }
1843 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); 1841 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
1844 return error; 1842 return error;
1845 } 1843 }
@@ -2663,13 +2661,7 @@ xfs_remove(
2663 if (error) 2661 if (error)
2664 goto std_return; 2662 goto std_return;
2665 2663
2666 /* 2664 if (is_dir && xfs_inode_is_filestream(ip))
2667 * If we are using filestreams, kill the stream association.
2668 * If the file is still open it may get a new one but that
2669 * will get killed on last close in xfs_close() so we don't
2670 * have to worry about that.
2671 */
2672 if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
2673 xfs_filestream_deassociate(ip); 2665 xfs_filestream_deassociate(ip);
2674 2666
2675 return 0; 2667 return 0;
@@ -3371,9 +3363,9 @@ xfs_iflush_int(
3371 } 3363 }
3372 } 3364 }
3373 3365
3374 xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); 3366 xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
3375 if (XFS_IFORK_Q(ip)) 3367 if (XFS_IFORK_Q(ip))
3376 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 3368 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
3377 xfs_inobp_check(mp, bp); 3369 xfs_inobp_check(mp, bp);
3378 3370
3379 /* 3371 /*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 396cc1fafd0d..13aea548206c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -209,7 +209,6 @@ xfs_get_initial_prid(struct xfs_inode *dp)
209#define XFS_ISTALE (1 << 1) /* inode has been staled */ 209#define XFS_ISTALE (1 << 1) /* inode has been staled */
210#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ 210#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
211#define XFS_INEW (1 << 3) /* inode has just been allocated */ 211#define XFS_INEW (1 << 3) /* inode has just been allocated */
212#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */
213#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ 212#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
214#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ 213#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
215#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ 214#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
@@ -225,8 +224,7 @@ xfs_get_initial_prid(struct xfs_inode *dp)
225 */ 224 */
226#define XFS_IRECLAIM_RESET_FLAGS \ 225#define XFS_IRECLAIM_RESET_FLAGS \
227 (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ 226 (XFS_IRECLAIMABLE | XFS_IRECLAIM | \
228 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ 227 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)
229 XFS_IFILESTREAM);
230 228
231/* 229/*
232 * Synchronize processes attempting to flush the in-core inode back to disk. 230 * Synchronize processes attempting to flush the in-core inode back to disk.
@@ -334,7 +332,7 @@ int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
334int xfs_create(struct xfs_inode *dp, struct xfs_name *name, 332int xfs_create(struct xfs_inode *dp, struct xfs_name *name,
335 umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); 333 umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
336int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, 334int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
337 umode_t mode); 335 umode_t mode, struct xfs_inode **ipp);
338int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 336int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
339 struct xfs_inode *ip); 337 struct xfs_inode *ip);
340int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, 338int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c
index 73514c0486b7..b031e8d0d928 100644
--- a/fs/xfs/xfs_inode_fork.c
+++ b/fs/xfs/xfs_inode_fork.c
@@ -798,8 +798,7 @@ xfs_iflush_fork(
798 xfs_inode_t *ip, 798 xfs_inode_t *ip,
799 xfs_dinode_t *dip, 799 xfs_dinode_t *dip,
800 xfs_inode_log_item_t *iip, 800 xfs_inode_log_item_t *iip,
801 int whichfork, 801 int whichfork)
802 xfs_buf_t *bp)
803{ 802{
804 char *cp; 803 char *cp;
805 xfs_ifork_t *ifp; 804 xfs_ifork_t *ifp;
diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/xfs_inode_fork.h
index eb329a1ea888..7d3b1ed6dcbe 100644
--- a/fs/xfs/xfs_inode_fork.h
+++ b/fs/xfs/xfs_inode_fork.h
@@ -127,8 +127,7 @@ typedef struct xfs_ifork {
127 127
128int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); 128int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
129void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, 129void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
130 struct xfs_inode_log_item *, int, 130 struct xfs_inode_log_item *, int);
131 struct xfs_buf *);
132void xfs_idestroy_fork(struct xfs_inode *, int); 131void xfs_idestroy_fork(struct xfs_inode *, int);
133void xfs_idata_realloc(struct xfs_inode *, int, int); 132void xfs_idata_realloc(struct xfs_inode *, int, int);
134void xfs_iroot_realloc(struct xfs_inode *, int, int); 133void xfs_iroot_realloc(struct xfs_inode *, int, int);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0b18776b075e..2d8f4fdf07f9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -543,10 +543,11 @@ xfs_attrmulti_by_handle(
543 543
544 ops = memdup_user(am_hreq.ops, size); 544 ops = memdup_user(am_hreq.ops, size);
545 if (IS_ERR(ops)) { 545 if (IS_ERR(ops)) {
546 error = PTR_ERR(ops); 546 error = -PTR_ERR(ops);
547 goto out_dput; 547 goto out_dput;
548 } 548 }
549 549
550 error = ENOMEM;
550 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); 551 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
551 if (!attr_name) 552 if (!attr_name)
552 goto out_kfree_ops; 553 goto out_kfree_ops;
@@ -556,7 +557,7 @@ xfs_attrmulti_by_handle(
556 ops[i].am_error = strncpy_from_user((char *)attr_name, 557 ops[i].am_error = strncpy_from_user((char *)attr_name,
557 ops[i].am_attrname, MAXNAMELEN); 558 ops[i].am_attrname, MAXNAMELEN);
558 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 559 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
559 error = -ERANGE; 560 error = ERANGE;
560 if (ops[i].am_error < 0) 561 if (ops[i].am_error < 0)
561 break; 562 break;
562 563
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index a7992f8de9d3..944d5baa710a 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -424,10 +424,11 @@ xfs_compat_attrmulti_by_handle(
424 424
425 ops = memdup_user(compat_ptr(am_hreq.ops), size); 425 ops = memdup_user(compat_ptr(am_hreq.ops), size);
426 if (IS_ERR(ops)) { 426 if (IS_ERR(ops)) {
427 error = PTR_ERR(ops); 427 error = -PTR_ERR(ops);
428 goto out_dput; 428 goto out_dput;
429 } 429 }
430 430
431 error = ENOMEM;
431 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); 432 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
432 if (!attr_name) 433 if (!attr_name)
433 goto out_kfree_ops; 434 goto out_kfree_ops;
@@ -438,7 +439,7 @@ xfs_compat_attrmulti_by_handle(
438 compat_ptr(ops[i].am_attrname), 439 compat_ptr(ops[i].am_attrname),
439 MAXNAMELEN); 440 MAXNAMELEN);
440 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 441 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
441 error = -ERANGE; 442 error = ERANGE;
442 if (ops[i].am_error < 0) 443 if (ops[i].am_error < 0)
443 break; 444 break;
444 445
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3b80ebae05f5..6c5eb4c551e3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -730,7 +730,7 @@ xfs_iomap_write_allocate(
730 */ 730 */
731 nimaps = 1; 731 nimaps = 1;
732 end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); 732 end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
733 error = xfs_bmap_last_offset(NULL, ip, &last_block, 733 error = xfs_bmap_last_offset(ip, &last_block,
734 XFS_DATA_FORK); 734 XFS_DATA_FORK);
735 if (error) 735 if (error)
736 goto trans_cancel; 736 goto trans_cancel;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 89b07e43ca28..205613a06068 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -72,8 +72,8 @@ xfs_initxattrs(
72 int error = 0; 72 int error = 0;
73 73
74 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 74 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
75 error = xfs_attr_set(ip, xattr->name, xattr->value, 75 error = -xfs_attr_set(ip, xattr->name, xattr->value,
76 xattr->value_len, ATTR_SECURE); 76 xattr->value_len, ATTR_SECURE);
77 if (error < 0) 77 if (error < 0)
78 break; 78 break;
79 } 79 }
@@ -93,8 +93,8 @@ xfs_init_security(
93 struct inode *dir, 93 struct inode *dir,
94 const struct qstr *qstr) 94 const struct qstr *qstr)
95{ 95{
96 return security_inode_init_security(inode, dir, qstr, 96 return -security_inode_init_security(inode, dir, qstr,
97 &xfs_initxattrs, NULL); 97 &xfs_initxattrs, NULL);
98} 98}
99 99
100static void 100static void
@@ -124,15 +124,15 @@ xfs_cleanup_inode(
124 xfs_dentry_to_name(&teardown, dentry, 0); 124 xfs_dentry_to_name(&teardown, dentry, 0);
125 125
126 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); 126 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
127 iput(inode);
128} 127}
129 128
130STATIC int 129STATIC int
131xfs_vn_mknod( 130xfs_generic_create(
132 struct inode *dir, 131 struct inode *dir,
133 struct dentry *dentry, 132 struct dentry *dentry,
134 umode_t mode, 133 umode_t mode,
135 dev_t rdev) 134 dev_t rdev,
135 bool tmpfile) /* unnamed file */
136{ 136{
137 struct inode *inode; 137 struct inode *inode;
138 struct xfs_inode *ip = NULL; 138 struct xfs_inode *ip = NULL;
@@ -156,8 +156,12 @@ xfs_vn_mknod(
156 if (error) 156 if (error)
157 return error; 157 return error;
158 158
159 xfs_dentry_to_name(&name, dentry, mode); 159 if (!tmpfile) {
160 error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); 160 xfs_dentry_to_name(&name, dentry, mode);
161 error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
162 } else {
163 error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
164 }
161 if (unlikely(error)) 165 if (unlikely(error))
162 goto out_free_acl; 166 goto out_free_acl;
163 167
@@ -169,18 +173,22 @@ xfs_vn_mknod(
169 173
170#ifdef CONFIG_XFS_POSIX_ACL 174#ifdef CONFIG_XFS_POSIX_ACL
171 if (default_acl) { 175 if (default_acl) {
172 error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); 176 error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
173 if (error) 177 if (error)
174 goto out_cleanup_inode; 178 goto out_cleanup_inode;
175 } 179 }
176 if (acl) { 180 if (acl) {
177 error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); 181 error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
178 if (error) 182 if (error)
179 goto out_cleanup_inode; 183 goto out_cleanup_inode;
180 } 184 }
181#endif 185#endif
182 186
183 d_instantiate(dentry, inode); 187 if (tmpfile)
188 d_tmpfile(dentry, inode);
189 else
190 d_instantiate(dentry, inode);
191
184 out_free_acl: 192 out_free_acl:
185 if (default_acl) 193 if (default_acl)
186 posix_acl_release(default_acl); 194 posix_acl_release(default_acl);
@@ -189,11 +197,23 @@ xfs_vn_mknod(
189 return -error; 197 return -error;
190 198
191 out_cleanup_inode: 199 out_cleanup_inode:
192 xfs_cleanup_inode(dir, inode, dentry); 200 if (!tmpfile)
201 xfs_cleanup_inode(dir, inode, dentry);
202 iput(inode);
193 goto out_free_acl; 203 goto out_free_acl;
194} 204}
195 205
196STATIC int 206STATIC int
207xfs_vn_mknod(
208 struct inode *dir,
209 struct dentry *dentry,
210 umode_t mode,
211 dev_t rdev)
212{
213 return xfs_generic_create(dir, dentry, mode, rdev, false);
214}
215
216STATIC int
197xfs_vn_create( 217xfs_vn_create(
198 struct inode *dir, 218 struct inode *dir,
199 struct dentry *dentry, 219 struct dentry *dentry,
@@ -353,6 +373,7 @@ xfs_vn_symlink(
353 373
354 out_cleanup_inode: 374 out_cleanup_inode:
355 xfs_cleanup_inode(dir, inode, dentry); 375 xfs_cleanup_inode(dir, inode, dentry);
376 iput(inode);
356 out: 377 out:
357 return -error; 378 return -error;
358} 379}
@@ -808,22 +829,34 @@ xfs_setattr_size(
808 */ 829 */
809 inode_dio_wait(inode); 830 inode_dio_wait(inode);
810 831
832 /*
833 * Do all the page cache truncate work outside the transaction context
834 * as the "lock" order is page lock->log space reservation. i.e.
835 * locking pages inside the transaction can ABBA deadlock with
836 * writeback. We have to do the VFS inode size update before we truncate
837 * the pagecache, however, to avoid racing with page faults beyond the
838 * new EOF they are not serialised against truncate operations except by
839 * page locks and size updates.
840 *
841 * Hence we are in a situation where a truncate can fail with ENOMEM
842 * from xfs_trans_reserve(), but having already truncated the in-memory
843 * version of the file (i.e. made user visible changes). There's not
844 * much we can do about this, except to hope that the caller sees ENOMEM
845 * and retries the truncate operation.
846 */
811 error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); 847 error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
812 if (error) 848 if (error)
813 return error; 849 return error;
850 truncate_setsize(inode, newsize);
814 851
815 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 852 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
816 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 853 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
817 if (error) 854 if (error)
818 goto out_trans_cancel; 855 goto out_trans_cancel;
819 856
820 truncate_setsize(inode, newsize);
821
822 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 857 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
823 lock_flags |= XFS_ILOCK_EXCL; 858 lock_flags |= XFS_ILOCK_EXCL;
824
825 xfs_ilock(ip, XFS_ILOCK_EXCL); 859 xfs_ilock(ip, XFS_ILOCK_EXCL);
826
827 xfs_trans_ijoin(tp, ip, 0); 860 xfs_trans_ijoin(tp, ip, 0);
828 861
829 /* 862 /*
@@ -1053,11 +1086,7 @@ xfs_vn_tmpfile(
1053 struct dentry *dentry, 1086 struct dentry *dentry,
1054 umode_t mode) 1087 umode_t mode)
1055{ 1088{
1056 int error; 1089 return xfs_generic_create(dir, dentry, mode, 0, true);
1057
1058 error = xfs_create_tmpfile(XFS_I(dir), dentry, mode);
1059
1060 return -error;
1061} 1090}
1062 1091
1063static const struct inode_operations xfs_inode_operations = { 1092static const struct inode_operations xfs_inode_operations = {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f46338285152..cb64f222d607 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -270,7 +270,8 @@ xfs_bulkstat(
270 /* 270 /*
271 * Allocate and initialize a btree cursor for ialloc btree. 271 * Allocate and initialize a btree cursor for ialloc btree.
272 */ 272 */
273 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); 273 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
274 XFS_BTNUM_INO);
274 irbp = irbuf; 275 irbp = irbuf;
275 irbufend = irbuf + nirbuf; 276 irbufend = irbuf + nirbuf;
276 end_of_ag = 0; 277 end_of_ag = 0;
@@ -621,7 +622,8 @@ xfs_inumbers(
621 agino = 0; 622 agino = 0;
622 continue; 623 continue;
623 } 624 }
624 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); 625 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
626 XFS_BTNUM_INO);
625 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, 627 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
626 &tmp); 628 &tmp);
627 if (error) { 629 if (error) {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 8497a00e399d..3554098692d8 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -616,11 +616,13 @@ xfs_log_mount(
616 int error = 0; 616 int error = 0;
617 int min_logfsbs; 617 int min_logfsbs;
618 618
619 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) 619 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
620 xfs_notice(mp, "Mounting Filesystem"); 620 xfs_notice(mp, "Mounting V%d Filesystem",
621 else { 621 XFS_SB_VERSION_NUM(&mp->m_sb));
622 } else {
622 xfs_notice(mp, 623 xfs_notice(mp,
623"Mounting filesystem in no-recovery mode. Filesystem will be inconsistent."); 624"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.",
625 XFS_SB_VERSION_NUM(&mp->m_sb));
624 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 626 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
625 } 627 }
626 628
@@ -1181,11 +1183,14 @@ xlog_iodone(xfs_buf_t *bp)
1181 /* log I/O is always issued ASYNC */ 1183 /* log I/O is always issued ASYNC */
1182 ASSERT(XFS_BUF_ISASYNC(bp)); 1184 ASSERT(XFS_BUF_ISASYNC(bp));
1183 xlog_state_done_syncing(iclog, aborted); 1185 xlog_state_done_syncing(iclog, aborted);
1186
1184 /* 1187 /*
1185 * do not reference the buffer (bp) here as we could race 1188 * drop the buffer lock now that we are done. Nothing references
1186 * with it being freed after writing the unmount record to the 1189 * the buffer after this, so an unmount waiting on this lock can now
1187 * log. 1190 * tear it down safely. As such, it is unsafe to reference the buffer
1191 * (bp) after the unlock as we could race with it being freed.
1188 */ 1192 */
1193 xfs_buf_unlock(bp);
1189} 1194}
1190 1195
1191/* 1196/*
@@ -1368,8 +1373,16 @@ xlog_alloc_log(
1368 bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0); 1373 bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);
1369 if (!bp) 1374 if (!bp)
1370 goto out_free_log; 1375 goto out_free_log;
1371 bp->b_iodone = xlog_iodone; 1376
1377 /*
1378 * The iclogbuf buffer locks are held over IO but we are not going to do
1379 * IO yet. Hence unlock the buffer so that the log IO path can grab it
1380 * when appropriately.
1381 */
1372 ASSERT(xfs_buf_islocked(bp)); 1382 ASSERT(xfs_buf_islocked(bp));
1383 xfs_buf_unlock(bp);
1384
1385 bp->b_iodone = xlog_iodone;
1373 log->l_xbuf = bp; 1386 log->l_xbuf = bp;
1374 1387
1375 spin_lock_init(&log->l_icloglock); 1388 spin_lock_init(&log->l_icloglock);
@@ -1398,6 +1411,9 @@ xlog_alloc_log(
1398 if (!bp) 1411 if (!bp)
1399 goto out_free_iclog; 1412 goto out_free_iclog;
1400 1413
1414 ASSERT(xfs_buf_islocked(bp));
1415 xfs_buf_unlock(bp);
1416
1401 bp->b_iodone = xlog_iodone; 1417 bp->b_iodone = xlog_iodone;
1402 iclog->ic_bp = bp; 1418 iclog->ic_bp = bp;
1403 iclog->ic_data = bp->b_addr; 1419 iclog->ic_data = bp->b_addr;
@@ -1422,7 +1438,6 @@ xlog_alloc_log(
1422 iclog->ic_callback_tail = &(iclog->ic_callback); 1438 iclog->ic_callback_tail = &(iclog->ic_callback);
1423 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; 1439 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
1424 1440
1425 ASSERT(xfs_buf_islocked(iclog->ic_bp));
1426 init_waitqueue_head(&iclog->ic_force_wait); 1441 init_waitqueue_head(&iclog->ic_force_wait);
1427 init_waitqueue_head(&iclog->ic_write_wait); 1442 init_waitqueue_head(&iclog->ic_write_wait);
1428 1443
@@ -1631,6 +1646,12 @@ xlog_cksum(
1631 * we transition the iclogs to IOERROR state *after* flushing all existing 1646 * we transition the iclogs to IOERROR state *after* flushing all existing
1632 * iclogs to disk. This is because we don't want anymore new transactions to be 1647 * iclogs to disk. This is because we don't want anymore new transactions to be
1633 * started or completed afterwards. 1648 * started or completed afterwards.
1649 *
1650 * We lock the iclogbufs here so that we can serialise against IO completion
1651 * during unmount. We might be processing a shutdown triggered during unmount,
1652 * and that can occur asynchronously to the unmount thread, and hence we need to
1653 * ensure that completes before tearing down the iclogbufs. Hence we need to
1654 * hold the buffer lock across the log IO to acheive that.
1634 */ 1655 */
1635STATIC int 1656STATIC int
1636xlog_bdstrat( 1657xlog_bdstrat(
@@ -1638,6 +1659,7 @@ xlog_bdstrat(
1638{ 1659{
1639 struct xlog_in_core *iclog = bp->b_fspriv; 1660 struct xlog_in_core *iclog = bp->b_fspriv;
1640 1661
1662 xfs_buf_lock(bp);
1641 if (iclog->ic_state & XLOG_STATE_IOERROR) { 1663 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1642 xfs_buf_ioerror(bp, EIO); 1664 xfs_buf_ioerror(bp, EIO);
1643 xfs_buf_stale(bp); 1665 xfs_buf_stale(bp);
@@ -1645,7 +1667,8 @@ xlog_bdstrat(
1645 /* 1667 /*
1646 * It would seem logical to return EIO here, but we rely on 1668 * It would seem logical to return EIO here, but we rely on
1647 * the log state machine to propagate I/O errors instead of 1669 * the log state machine to propagate I/O errors instead of
1648 * doing it here. 1670 * doing it here. Similarly, IO completion will unlock the
1671 * buffer, so we don't do it here.
1649 */ 1672 */
1650 return 0; 1673 return 0;
1651 } 1674 }
@@ -1847,14 +1870,28 @@ xlog_dealloc_log(
1847 xlog_cil_destroy(log); 1870 xlog_cil_destroy(log);
1848 1871
1849 /* 1872 /*
1850 * always need to ensure that the extra buffer does not point to memory 1873 * Cycle all the iclogbuf locks to make sure all log IO completion
1851 * owned by another log buffer before we free it. 1874 * is done before we tear down these buffers.
1875 */
1876 iclog = log->l_iclog;
1877 for (i = 0; i < log->l_iclog_bufs; i++) {
1878 xfs_buf_lock(iclog->ic_bp);
1879 xfs_buf_unlock(iclog->ic_bp);
1880 iclog = iclog->ic_next;
1881 }
1882
1883 /*
1884 * Always need to ensure that the extra buffer does not point to memory
1885 * owned by another log buffer before we free it. Also, cycle the lock
1886 * first to ensure we've completed IO on it.
1852 */ 1887 */
1888 xfs_buf_lock(log->l_xbuf);
1889 xfs_buf_unlock(log->l_xbuf);
1853 xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); 1890 xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
1854 xfs_buf_free(log->l_xbuf); 1891 xfs_buf_free(log->l_xbuf);
1855 1892
1856 iclog = log->l_iclog; 1893 iclog = log->l_iclog;
1857 for (i=0; i<log->l_iclog_bufs; i++) { 1894 for (i = 0; i < log->l_iclog_bufs; i++) {
1858 xfs_buf_free(iclog->ic_bp); 1895 xfs_buf_free(iclog->ic_bp);
1859 next_iclog = iclog->ic_next; 1896 next_iclog = iclog->ic_next;
1860 kmem_free(iclog); 1897 kmem_free(iclog);
@@ -3915,11 +3952,14 @@ xfs_log_force_umount(
3915 retval = xlog_state_ioerror(log); 3952 retval = xlog_state_ioerror(log);
3916 spin_unlock(&log->l_icloglock); 3953 spin_unlock(&log->l_icloglock);
3917 } 3954 }
3955
3918 /* 3956 /*
3919 * Wake up everybody waiting on xfs_log_force. 3957 * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
3920 * Callback all log item committed functions as if the 3958 * as if the log writes were completed. The abort handling in the log
3921 * log writes were completed. 3959 * item committed callback functions will do this again under lock to
3960 * avoid races.
3922 */ 3961 */
3962 wake_up_all(&log->l_cilp->xc_commit_wait);
3923 xlog_state_do_callback(log, XFS_LI_ABORTED, NULL); 3963 xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
3924 3964
3925#ifdef XFSERRORDEBUG 3965#ifdef XFSERRORDEBUG
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 7e5455391176..039c873e6fb2 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -385,7 +385,15 @@ xlog_cil_committed(
385 xfs_extent_busy_clear(mp, &ctx->busy_extents, 385 xfs_extent_busy_clear(mp, &ctx->busy_extents,
386 (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); 386 (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
387 387
388 /*
389 * If we are aborting the commit, wake up anyone waiting on the
390 * committing list. If we don't, then a shutdown we can leave processes
391 * waiting in xlog_cil_force_lsn() waiting on a sequence commit that
392 * will never happen because we aborted it.
393 */
388 spin_lock(&ctx->cil->xc_push_lock); 394 spin_lock(&ctx->cil->xc_push_lock);
395 if (abort)
396 wake_up_all(&ctx->cil->xc_commit_wait);
389 list_del(&ctx->committing); 397 list_del(&ctx->committing);
390 spin_unlock(&ctx->cil->xc_push_lock); 398 spin_unlock(&ctx->cil->xc_push_lock);
391 399
@@ -564,8 +572,18 @@ restart:
564 spin_lock(&cil->xc_push_lock); 572 spin_lock(&cil->xc_push_lock);
565 list_for_each_entry(new_ctx, &cil->xc_committing, committing) { 573 list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
566 /* 574 /*
575 * Avoid getting stuck in this loop because we were woken by the
576 * shutdown, but then went back to sleep once already in the
577 * shutdown state.
578 */
579 if (XLOG_FORCED_SHUTDOWN(log)) {
580 spin_unlock(&cil->xc_push_lock);
581 goto out_abort_free_ticket;
582 }
583
584 /*
567 * Higher sequences will wait for this one so skip them. 585 * Higher sequences will wait for this one so skip them.
568 * Don't wait for own own sequence, either. 586 * Don't wait for our own sequence, either.
569 */ 587 */
570 if (new_ctx->sequence >= ctx->sequence) 588 if (new_ctx->sequence >= ctx->sequence)
571 continue; 589 continue;
@@ -810,6 +828,13 @@ restart:
810 */ 828 */
811 spin_lock(&cil->xc_push_lock); 829 spin_lock(&cil->xc_push_lock);
812 list_for_each_entry(ctx, &cil->xc_committing, committing) { 830 list_for_each_entry(ctx, &cil->xc_committing, committing) {
831 /*
832 * Avoid getting stuck in this loop because we were woken by the
833 * shutdown, but then went back to sleep once already in the
834 * shutdown state.
835 */
836 if (XLOG_FORCED_SHUTDOWN(log))
837 goto out_shutdown;
813 if (ctx->sequence > sequence) 838 if (ctx->sequence > sequence)
814 continue; 839 continue;
815 if (!ctx->commit_lsn) { 840 if (!ctx->commit_lsn) {
@@ -833,14 +858,12 @@ restart:
833 * push sequence after the above wait loop and the CIL still contains 858 * push sequence after the above wait loop and the CIL still contains
834 * dirty objects. 859 * dirty objects.
835 * 860 *
836 * When the push occurs, it will empty the CIL and 861 * When the push occurs, it will empty the CIL and atomically increment
837 * atomically increment the currect sequence past the push sequence and 862 * the currect sequence past the push sequence and move it into the
838 * move it into the committing list. Of course, if the CIL is clean at 863 * committing list. Of course, if the CIL is clean at the time of the
839 * the time of the push, it won't have pushed the CIL at all, so in that 864 * push, it won't have pushed the CIL at all, so in that case we should
840 * case we should try the push for this sequence again from the start 865 * try the push for this sequence again from the start just in case.
841 * just in case.
842 */ 866 */
843
844 if (sequence == cil->xc_current_sequence && 867 if (sequence == cil->xc_current_sequence &&
845 !list_empty(&cil->xc_cil)) { 868 !list_empty(&cil->xc_cil)) {
846 spin_unlock(&cil->xc_push_lock); 869 spin_unlock(&cil->xc_push_lock);
@@ -849,6 +872,17 @@ restart:
849 872
850 spin_unlock(&cil->xc_push_lock); 873 spin_unlock(&cil->xc_push_lock);
851 return commit_lsn; 874 return commit_lsn;
875
876 /*
877 * We detected a shutdown in progress. We need to trigger the log force
878 * to pass through it's iclog state machine error handling, even though
879 * we are already in a shutdown state. Hence we can't return
880 * NULLCOMMITLSN here as that has special meaning to log forces (i.e.
881 * LSN is already stable), so we return a zero LSN instead.
882 */
883out_shutdown:
884 spin_unlock(&cil->xc_push_lock);
885 return 0;
852} 886}
853 887
854/* 888/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index bce53ac81096..981af0f6504b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2138,7 +2138,9 @@ xlog_recover_validate_buf_type(
2138 bp->b_ops = &xfs_allocbt_buf_ops; 2138 bp->b_ops = &xfs_allocbt_buf_ops;
2139 break; 2139 break;
2140 case XFS_IBT_CRC_MAGIC: 2140 case XFS_IBT_CRC_MAGIC:
2141 case XFS_FIBT_CRC_MAGIC:
2141 case XFS_IBT_MAGIC: 2142 case XFS_IBT_MAGIC:
2143 case XFS_FIBT_MAGIC:
2142 bp->b_ops = &xfs_inobt_buf_ops; 2144 bp->b_ops = &xfs_inobt_buf_ops;
2143 break; 2145 break;
2144 case XFS_BMAP_CRC_MAGIC: 2146 case XFS_BMAP_CRC_MAGIC:
@@ -3145,7 +3147,7 @@ xlog_recover_efd_pass2(
3145 } 3147 }
3146 lip = xfs_trans_ail_cursor_next(ailp, &cur); 3148 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3147 } 3149 }
3148 xfs_trans_ail_cursor_done(ailp, &cur); 3150 xfs_trans_ail_cursor_done(&cur);
3149 spin_unlock(&ailp->xa_lock); 3151 spin_unlock(&ailp->xa_lock);
3150 3152
3151 return 0; 3153 return 0;
@@ -3520,8 +3522,7 @@ out:
3520 3522
3521STATIC int 3523STATIC int
3522xlog_recover_unmount_trans( 3524xlog_recover_unmount_trans(
3523 struct xlog *log, 3525 struct xlog *log)
3524 struct xlog_recover *trans)
3525{ 3526{
3526 /* Do nothing now */ 3527 /* Do nothing now */
3527 xfs_warn(log->l_mp, "%s: Unmount LR", __func__); 3528 xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
@@ -3595,7 +3596,7 @@ xlog_recover_process_data(
3595 trans, pass); 3596 trans, pass);
3596 break; 3597 break;
3597 case XLOG_UNMOUNT_TRANS: 3598 case XLOG_UNMOUNT_TRANS:
3598 error = xlog_recover_unmount_trans(log, trans); 3599 error = xlog_recover_unmount_trans(log);
3599 break; 3600 break;
3600 case XLOG_WAS_CONT_TRANS: 3601 case XLOG_WAS_CONT_TRANS:
3601 error = xlog_recover_add_to_cont_trans(log, 3602 error = xlog_recover_add_to_cont_trans(log,
@@ -3757,7 +3758,7 @@ xlog_recover_process_efis(
3757 lip = xfs_trans_ail_cursor_next(ailp, &cur); 3758 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3758 } 3759 }
3759out: 3760out:
3760 xfs_trans_ail_cursor_done(ailp, &cur); 3761 xfs_trans_ail_cursor_done(&cur);
3761 spin_unlock(&ailp->xa_lock); 3762 spin_unlock(&ailp->xa_lock);
3762 return error; 3763 return error;
3763} 3764}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 993cb19e7d39..944f3d9456a8 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -743,8 +743,6 @@ xfs_mountfs(
743 new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; 743 new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
744 if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) 744 if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
745 mp->m_inode_cluster_size = new_size; 745 mp->m_inode_cluster_size = new_size;
746 xfs_info(mp, "Using inode cluster size of %d bytes",
747 mp->m_inode_cluster_size);
748 } 746 }
749 747
750 /* 748 /*
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 4aff56395732..f99b4933dc22 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -100,14 +100,20 @@
100 * likely result in a loop in one of the lists. That's a sure-fire recipe for 100 * likely result in a loop in one of the lists. That's a sure-fire recipe for
101 * an infinite loop in the code. 101 * an infinite loop in the code.
102 */ 102 */
103typedef struct xfs_mru_cache_elem 103struct xfs_mru_cache {
104{ 104 struct radix_tree_root store; /* Core storage data structure. */
105 struct list_head list_node; 105 struct list_head *lists; /* Array of lists, one per grp. */
106 unsigned long key; 106 struct list_head reap_list; /* Elements overdue for reaping. */
107 void *value; 107 spinlock_t lock; /* Lock to protect this struct. */
108} xfs_mru_cache_elem_t; 108 unsigned int grp_count; /* Number of discrete groups. */
109 unsigned int grp_time; /* Time period spanned by grps. */
110 unsigned int lru_grp; /* Group containing time zero. */
111 unsigned long time_zero; /* Time first element was added. */
112 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
113 struct delayed_work work; /* Workqueue data for reaping. */
114 unsigned int queued; /* work has been queued */
115};
109 116
110static kmem_zone_t *xfs_mru_elem_zone;
111static struct workqueue_struct *xfs_mru_reap_wq; 117static struct workqueue_struct *xfs_mru_reap_wq;
112 118
113/* 119/*
@@ -129,12 +135,12 @@ static struct workqueue_struct *xfs_mru_reap_wq;
129 */ 135 */
130STATIC unsigned long 136STATIC unsigned long
131_xfs_mru_cache_migrate( 137_xfs_mru_cache_migrate(
132 xfs_mru_cache_t *mru, 138 struct xfs_mru_cache *mru,
133 unsigned long now) 139 unsigned long now)
134{ 140{
135 unsigned int grp; 141 unsigned int grp;
136 unsigned int migrated = 0; 142 unsigned int migrated = 0;
137 struct list_head *lru_list; 143 struct list_head *lru_list;
138 144
139 /* Nothing to do if the data store is empty. */ 145 /* Nothing to do if the data store is empty. */
140 if (!mru->time_zero) 146 if (!mru->time_zero)
@@ -193,11 +199,11 @@ _xfs_mru_cache_migrate(
193 */ 199 */
194STATIC void 200STATIC void
195_xfs_mru_cache_list_insert( 201_xfs_mru_cache_list_insert(
196 xfs_mru_cache_t *mru, 202 struct xfs_mru_cache *mru,
197 xfs_mru_cache_elem_t *elem) 203 struct xfs_mru_cache_elem *elem)
198{ 204{
199 unsigned int grp = 0; 205 unsigned int grp = 0;
200 unsigned long now = jiffies; 206 unsigned long now = jiffies;
201 207
202 /* 208 /*
203 * If the data store is empty, initialise time zero, leave grp set to 209 * If the data store is empty, initialise time zero, leave grp set to
@@ -231,10 +237,10 @@ _xfs_mru_cache_list_insert(
231 */ 237 */
232STATIC void 238STATIC void
233_xfs_mru_cache_clear_reap_list( 239_xfs_mru_cache_clear_reap_list(
234 xfs_mru_cache_t *mru) __releases(mru->lock) __acquires(mru->lock) 240 struct xfs_mru_cache *mru)
235 241 __releases(mru->lock) __acquires(mru->lock)
236{ 242{
237 xfs_mru_cache_elem_t *elem, *next; 243 struct xfs_mru_cache_elem *elem, *next;
238 struct list_head tmp; 244 struct list_head tmp;
239 245
240 INIT_LIST_HEAD(&tmp); 246 INIT_LIST_HEAD(&tmp);
@@ -252,15 +258,8 @@ _xfs_mru_cache_clear_reap_list(
252 spin_unlock(&mru->lock); 258 spin_unlock(&mru->lock);
253 259
254 list_for_each_entry_safe(elem, next, &tmp, list_node) { 260 list_for_each_entry_safe(elem, next, &tmp, list_node) {
255
256 /* Remove the element from the reap list. */
257 list_del_init(&elem->list_node); 261 list_del_init(&elem->list_node);
258 262 mru->free_func(elem);
259 /* Call the client's free function with the key and value pointer. */
260 mru->free_func(elem->key, elem->value);
261
262 /* Free the element structure. */
263 kmem_zone_free(xfs_mru_elem_zone, elem);
264 } 263 }
265 264
266 spin_lock(&mru->lock); 265 spin_lock(&mru->lock);
@@ -277,7 +276,8 @@ STATIC void
277_xfs_mru_cache_reap( 276_xfs_mru_cache_reap(
278 struct work_struct *work) 277 struct work_struct *work)
279{ 278{
280 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); 279 struct xfs_mru_cache *mru =
280 container_of(work, struct xfs_mru_cache, work.work);
281 unsigned long now, next; 281 unsigned long now, next;
282 282
283 ASSERT(mru && mru->lists); 283 ASSERT(mru && mru->lists);
@@ -304,28 +304,16 @@ _xfs_mru_cache_reap(
304int 304int
305xfs_mru_cache_init(void) 305xfs_mru_cache_init(void)
306{ 306{
307 xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
308 "xfs_mru_cache_elem");
309 if (!xfs_mru_elem_zone)
310 goto out;
311
312 xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); 307 xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
313 if (!xfs_mru_reap_wq) 308 if (!xfs_mru_reap_wq)
314 goto out_destroy_mru_elem_zone; 309 return -ENOMEM;
315
316 return 0; 310 return 0;
317
318 out_destroy_mru_elem_zone:
319 kmem_zone_destroy(xfs_mru_elem_zone);
320 out:
321 return -ENOMEM;
322} 311}
323 312
324void 313void
325xfs_mru_cache_uninit(void) 314xfs_mru_cache_uninit(void)
326{ 315{
327 destroy_workqueue(xfs_mru_reap_wq); 316 destroy_workqueue(xfs_mru_reap_wq);
328 kmem_zone_destroy(xfs_mru_elem_zone);
329} 317}
330 318
331/* 319/*
@@ -336,14 +324,14 @@ xfs_mru_cache_uninit(void)
336 */ 324 */
337int 325int
338xfs_mru_cache_create( 326xfs_mru_cache_create(
339 xfs_mru_cache_t **mrup, 327 struct xfs_mru_cache **mrup,
340 unsigned int lifetime_ms, 328 unsigned int lifetime_ms,
341 unsigned int grp_count, 329 unsigned int grp_count,
342 xfs_mru_cache_free_func_t free_func) 330 xfs_mru_cache_free_func_t free_func)
343{ 331{
344 xfs_mru_cache_t *mru = NULL; 332 struct xfs_mru_cache *mru = NULL;
345 int err = 0, grp; 333 int err = 0, grp;
346 unsigned int grp_time; 334 unsigned int grp_time;
347 335
348 if (mrup) 336 if (mrup)
349 *mrup = NULL; 337 *mrup = NULL;
@@ -400,7 +388,7 @@ exit:
400 */ 388 */
401static void 389static void
402xfs_mru_cache_flush( 390xfs_mru_cache_flush(
403 xfs_mru_cache_t *mru) 391 struct xfs_mru_cache *mru)
404{ 392{
405 if (!mru || !mru->lists) 393 if (!mru || !mru->lists)
406 return; 394 return;
@@ -420,7 +408,7 @@ xfs_mru_cache_flush(
420 408
421void 409void
422xfs_mru_cache_destroy( 410xfs_mru_cache_destroy(
423 xfs_mru_cache_t *mru) 411 struct xfs_mru_cache *mru)
424{ 412{
425 if (!mru || !mru->lists) 413 if (!mru || !mru->lists)
426 return; 414 return;
@@ -438,38 +426,30 @@ xfs_mru_cache_destroy(
438 */ 426 */
439int 427int
440xfs_mru_cache_insert( 428xfs_mru_cache_insert(
441 xfs_mru_cache_t *mru, 429 struct xfs_mru_cache *mru,
442 unsigned long key, 430 unsigned long key,
443 void *value) 431 struct xfs_mru_cache_elem *elem)
444{ 432{
445 xfs_mru_cache_elem_t *elem; 433 int error;
446 434
447 ASSERT(mru && mru->lists); 435 ASSERT(mru && mru->lists);
448 if (!mru || !mru->lists) 436 if (!mru || !mru->lists)
449 return EINVAL; 437 return EINVAL;
450 438
451 elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP); 439 if (radix_tree_preload(GFP_KERNEL))
452 if (!elem)
453 return ENOMEM; 440 return ENOMEM;
454 441
455 if (radix_tree_preload(GFP_KERNEL)) {
456 kmem_zone_free(xfs_mru_elem_zone, elem);
457 return ENOMEM;
458 }
459
460 INIT_LIST_HEAD(&elem->list_node); 442 INIT_LIST_HEAD(&elem->list_node);
461 elem->key = key; 443 elem->key = key;
462 elem->value = value;
463 444
464 spin_lock(&mru->lock); 445 spin_lock(&mru->lock);
465 446 error = -radix_tree_insert(&mru->store, key, elem);
466 radix_tree_insert(&mru->store, key, elem);
467 radix_tree_preload_end(); 447 radix_tree_preload_end();
468 _xfs_mru_cache_list_insert(mru, elem); 448 if (!error)
469 449 _xfs_mru_cache_list_insert(mru, elem);
470 spin_unlock(&mru->lock); 450 spin_unlock(&mru->lock);
471 451
472 return 0; 452 return error;
473} 453}
474 454
475/* 455/*
@@ -478,13 +458,12 @@ xfs_mru_cache_insert(
478 * the client data pointer for the removed element is returned, otherwise this 458 * the client data pointer for the removed element is returned, otherwise this
479 * function will return a NULL pointer. 459 * function will return a NULL pointer.
480 */ 460 */
481void * 461struct xfs_mru_cache_elem *
482xfs_mru_cache_remove( 462xfs_mru_cache_remove(
483 xfs_mru_cache_t *mru, 463 struct xfs_mru_cache *mru,
484 unsigned long key) 464 unsigned long key)
485{ 465{
486 xfs_mru_cache_elem_t *elem; 466 struct xfs_mru_cache_elem *elem;
487 void *value = NULL;
488 467
489 ASSERT(mru && mru->lists); 468 ASSERT(mru && mru->lists);
490 if (!mru || !mru->lists) 469 if (!mru || !mru->lists)
@@ -492,17 +471,11 @@ xfs_mru_cache_remove(
492 471
493 spin_lock(&mru->lock); 472 spin_lock(&mru->lock);
494 elem = radix_tree_delete(&mru->store, key); 473 elem = radix_tree_delete(&mru->store, key);
495 if (elem) { 474 if (elem)
496 value = elem->value;
497 list_del(&elem->list_node); 475 list_del(&elem->list_node);
498 }
499
500 spin_unlock(&mru->lock); 476 spin_unlock(&mru->lock);
501 477
502 if (elem) 478 return elem;
503 kmem_zone_free(xfs_mru_elem_zone, elem);
504
505 return value;
506} 479}
507 480
508/* 481/*
@@ -511,13 +484,14 @@ xfs_mru_cache_remove(
511 */ 484 */
512void 485void
513xfs_mru_cache_delete( 486xfs_mru_cache_delete(
514 xfs_mru_cache_t *mru, 487 struct xfs_mru_cache *mru,
515 unsigned long key) 488 unsigned long key)
516{ 489{
517 void *value = xfs_mru_cache_remove(mru, key); 490 struct xfs_mru_cache_elem *elem;
518 491
519 if (value) 492 elem = xfs_mru_cache_remove(mru, key);
520 mru->free_func(key, value); 493 if (elem)
494 mru->free_func(elem);
521} 495}
522 496
523/* 497/*
@@ -540,12 +514,12 @@ xfs_mru_cache_delete(
540 * status, we need to help it get it right by annotating the path that does 514 * status, we need to help it get it right by annotating the path that does
541 * not release the lock. 515 * not release the lock.
542 */ 516 */
543void * 517struct xfs_mru_cache_elem *
544xfs_mru_cache_lookup( 518xfs_mru_cache_lookup(
545 xfs_mru_cache_t *mru, 519 struct xfs_mru_cache *mru,
546 unsigned long key) 520 unsigned long key)
547{ 521{
548 xfs_mru_cache_elem_t *elem; 522 struct xfs_mru_cache_elem *elem;
549 523
550 ASSERT(mru && mru->lists); 524 ASSERT(mru && mru->lists);
551 if (!mru || !mru->lists) 525 if (!mru || !mru->lists)
@@ -560,7 +534,7 @@ xfs_mru_cache_lookup(
560 } else 534 } else
561 spin_unlock(&mru->lock); 535 spin_unlock(&mru->lock);
562 536
563 return elem ? elem->value : NULL; 537 return elem;
564} 538}
565 539
566/* 540/*
@@ -570,7 +544,8 @@ xfs_mru_cache_lookup(
570 */ 544 */
571void 545void
572xfs_mru_cache_done( 546xfs_mru_cache_done(
573 xfs_mru_cache_t *mru) __releases(mru->lock) 547 struct xfs_mru_cache *mru)
548 __releases(mru->lock)
574{ 549{
575 spin_unlock(&mru->lock); 550 spin_unlock(&mru->lock);
576} 551}
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 36dd3ec8b4eb..fb5245ba5ff7 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -18,24 +18,15 @@
18#ifndef __XFS_MRU_CACHE_H__ 18#ifndef __XFS_MRU_CACHE_H__
19#define __XFS_MRU_CACHE_H__ 19#define __XFS_MRU_CACHE_H__
20 20
21struct xfs_mru_cache;
21 22
22/* Function pointer type for callback to free a client's data pointer. */ 23struct xfs_mru_cache_elem {
23typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*); 24 struct list_head list_node;
25 unsigned long key;
26};
24 27
25typedef struct xfs_mru_cache 28/* Function pointer type for callback to free a client's data pointer. */
26{ 29typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem);
27 struct radix_tree_root store; /* Core storage data structure. */
28 struct list_head *lists; /* Array of lists, one per grp. */
29 struct list_head reap_list; /* Elements overdue for reaping. */
30 spinlock_t lock; /* Lock to protect this struct. */
31 unsigned int grp_count; /* Number of discrete groups. */
32 unsigned int grp_time; /* Time period spanned by grps. */
33 unsigned int lru_grp; /* Group containing time zero. */
34 unsigned long time_zero; /* Time first element was added. */
35 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
36 struct delayed_work work; /* Workqueue data for reaping. */
37 unsigned int queued; /* work has been queued */
38} xfs_mru_cache_t;
39 30
40int xfs_mru_cache_init(void); 31int xfs_mru_cache_init(void);
41void xfs_mru_cache_uninit(void); 32void xfs_mru_cache_uninit(void);
@@ -44,10 +35,12 @@ int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
44 xfs_mru_cache_free_func_t free_func); 35 xfs_mru_cache_free_func_t free_func);
45void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); 36void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
46int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, 37int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
47 void *value); 38 struct xfs_mru_cache_elem *elem);
48void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); 39struct xfs_mru_cache_elem *
40xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
49void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); 41void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
50void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); 42struct xfs_mru_cache_elem *
43xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
51void xfs_mru_cache_done(struct xfs_mru_cache *mru); 44void xfs_mru_cache_done(struct xfs_mru_cache *mru);
52 45
53#endif /* __XFS_MRU_CACHE_H__ */ 46#endif /* __XFS_MRU_CACHE_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 348e4d2ed6e6..6d26759c779a 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -193,47 +193,6 @@ xfs_qm_dqpurge(
193} 193}
194 194
195/* 195/*
196 * Release the group or project dquot pointers the user dquots maybe carrying
197 * around as a hint, and proceed to purge the user dquot cache if requested.
198*/
199STATIC int
200xfs_qm_dqpurge_hints(
201 struct xfs_dquot *dqp,
202 void *data)
203{
204 struct xfs_dquot *gdqp = NULL;
205 struct xfs_dquot *pdqp = NULL;
206 uint flags = *((uint *)data);
207
208 xfs_dqlock(dqp);
209 if (dqp->dq_flags & XFS_DQ_FREEING) {
210 xfs_dqunlock(dqp);
211 return EAGAIN;
212 }
213
214 /* If this quota has a hint attached, prepare for releasing it now */
215 gdqp = dqp->q_gdquot;
216 if (gdqp)
217 dqp->q_gdquot = NULL;
218
219 pdqp = dqp->q_pdquot;
220 if (pdqp)
221 dqp->q_pdquot = NULL;
222
223 xfs_dqunlock(dqp);
224
225 if (gdqp)
226 xfs_qm_dqrele(gdqp);
227 if (pdqp)
228 xfs_qm_dqrele(pdqp);
229
230 if (flags & XFS_QMOPT_UQUOTA)
231 return xfs_qm_dqpurge(dqp, NULL);
232
233 return 0;
234}
235
236/*
237 * Purge the dquot cache. 196 * Purge the dquot cache.
238 */ 197 */
239void 198void
@@ -241,18 +200,8 @@ xfs_qm_dqpurge_all(
241 struct xfs_mount *mp, 200 struct xfs_mount *mp,
242 uint flags) 201 uint flags)
243{ 202{
244 /* 203 if (flags & XFS_QMOPT_UQUOTA)
245 * We have to release group/project dquot hint(s) from the user dquot 204 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL);
246 * at first if they are there, otherwise we would run into an infinite
247 * loop while walking through radix tree to purge other type of dquots
248 * since their refcount is not zero if the user dquot refers to them
249 * as hint.
250 *
251 * Call the special xfs_qm_dqpurge_hints() will end up go through the
252 * general xfs_qm_dqpurge() against user dquot cache if requested.
253 */
254 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags);
255
256 if (flags & XFS_QMOPT_GQUOTA) 205 if (flags & XFS_QMOPT_GQUOTA)
257 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); 206 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
258 if (flags & XFS_QMOPT_PQUOTA) 207 if (flags & XFS_QMOPT_PQUOTA)
@@ -409,7 +358,6 @@ xfs_qm_dqattach_one(
409 xfs_dqid_t id, 358 xfs_dqid_t id,
410 uint type, 359 uint type,
411 uint doalloc, 360 uint doalloc,
412 xfs_dquot_t *udqhint, /* hint */
413 xfs_dquot_t **IO_idqpp) 361 xfs_dquot_t **IO_idqpp)
414{ 362{
415 xfs_dquot_t *dqp; 363 xfs_dquot_t *dqp;
@@ -419,9 +367,9 @@ xfs_qm_dqattach_one(
419 error = 0; 367 error = 0;
420 368
421 /* 369 /*
422 * See if we already have it in the inode itself. IO_idqpp is 370 * See if we already have it in the inode itself. IO_idqpp is &i_udquot
423 * &i_udquot or &i_gdquot. This made the code look weird, but 371 * or &i_gdquot. This made the code look weird, but made the logic a lot
424 * made the logic a lot simpler. 372 * simpler.
425 */ 373 */
426 dqp = *IO_idqpp; 374 dqp = *IO_idqpp;
427 if (dqp) { 375 if (dqp) {
@@ -430,49 +378,10 @@ xfs_qm_dqattach_one(
430 } 378 }
431 379
432 /* 380 /*
433 * udqhint is the i_udquot field in inode, and is non-NULL only 381 * Find the dquot from somewhere. This bumps the reference count of
434 * when the type arg is group/project. Its purpose is to save a 382 * dquot and returns it locked. This can return ENOENT if dquot didn't
435 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside 383 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got
436 * the user dquot. 384 * turned off suddenly.
437 */
438 if (udqhint) {
439 ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
440 xfs_dqlock(udqhint);
441
442 /*
443 * No need to take dqlock to look at the id.
444 *
445 * The ID can't change until it gets reclaimed, and it won't
446 * be reclaimed as long as we have a ref from inode and we
447 * hold the ilock.
448 */
449 if (type == XFS_DQ_GROUP)
450 dqp = udqhint->q_gdquot;
451 else
452 dqp = udqhint->q_pdquot;
453 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
454 ASSERT(*IO_idqpp == NULL);
455
456 *IO_idqpp = xfs_qm_dqhold(dqp);
457 xfs_dqunlock(udqhint);
458 return 0;
459 }
460
461 /*
462 * We can't hold a dquot lock when we call the dqget code.
463 * We'll deadlock in no time, because of (not conforming to)
464 * lock ordering - the inodelock comes before any dquot lock,
465 * and we may drop and reacquire the ilock in xfs_qm_dqget().
466 */
467 xfs_dqunlock(udqhint);
468 }
469
470 /*
471 * Find the dquot from somewhere. This bumps the
472 * reference count of dquot and returns it locked.
473 * This can return ENOENT if dquot didn't exist on
474 * disk and we didn't ask it to allocate;
475 * ESRCH if quotas got turned off suddenly.
476 */ 385 */
477 error = xfs_qm_dqget(ip->i_mount, ip, id, type, 386 error = xfs_qm_dqget(ip->i_mount, ip, id, type,
478 doalloc | XFS_QMOPT_DOWARN, &dqp); 387 doalloc | XFS_QMOPT_DOWARN, &dqp);
@@ -490,48 +399,6 @@ xfs_qm_dqattach_one(
490 return 0; 399 return 0;
491} 400}
492 401
493
494/*
495 * Given a udquot and group/project type, attach the group/project
496 * dquot pointer to the udquot as a hint for future lookups.
497 */
498STATIC void
499xfs_qm_dqattach_hint(
500 struct xfs_inode *ip,
501 int type)
502{
503 struct xfs_dquot **dqhintp;
504 struct xfs_dquot *dqp;
505 struct xfs_dquot *udq = ip->i_udquot;
506
507 ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
508
509 xfs_dqlock(udq);
510
511 if (type == XFS_DQ_GROUP) {
512 dqp = ip->i_gdquot;
513 dqhintp = &udq->q_gdquot;
514 } else {
515 dqp = ip->i_pdquot;
516 dqhintp = &udq->q_pdquot;
517 }
518
519 if (*dqhintp) {
520 struct xfs_dquot *tmp;
521
522 if (*dqhintp == dqp)
523 goto done;
524
525 tmp = *dqhintp;
526 *dqhintp = NULL;
527 xfs_qm_dqrele(tmp);
528 }
529
530 *dqhintp = xfs_qm_dqhold(dqp);
531done:
532 xfs_dqunlock(udq);
533}
534
535static bool 402static bool
536xfs_qm_need_dqattach( 403xfs_qm_need_dqattach(
537 struct xfs_inode *ip) 404 struct xfs_inode *ip)
@@ -562,7 +429,6 @@ xfs_qm_dqattach_locked(
562 uint flags) 429 uint flags)
563{ 430{
564 xfs_mount_t *mp = ip->i_mount; 431 xfs_mount_t *mp = ip->i_mount;
565 uint nquotas = 0;
566 int error = 0; 432 int error = 0;
567 433
568 if (!xfs_qm_need_dqattach(ip)) 434 if (!xfs_qm_need_dqattach(ip))
@@ -570,77 +436,39 @@ xfs_qm_dqattach_locked(
570 436
571 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 437 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
572 438
573 if (XFS_IS_UQUOTA_ON(mp)) { 439 if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
574 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, 440 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
575 flags & XFS_QMOPT_DQALLOC, 441 flags & XFS_QMOPT_DQALLOC,
576 NULL, &ip->i_udquot); 442 &ip->i_udquot);
577 if (error) 443 if (error)
578 goto done; 444 goto done;
579 nquotas++; 445 ASSERT(ip->i_udquot);
580 } 446 }
581 447
582 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 448 if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
583 if (XFS_IS_GQUOTA_ON(mp)) {
584 error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, 449 error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
585 flags & XFS_QMOPT_DQALLOC, 450 flags & XFS_QMOPT_DQALLOC,
586 ip->i_udquot, &ip->i_gdquot); 451 &ip->i_gdquot);
587 /*
588 * Don't worry about the udquot that we may have
589 * attached above. It'll get detached, if not already.
590 */
591 if (error) 452 if (error)
592 goto done; 453 goto done;
593 nquotas++; 454 ASSERT(ip->i_gdquot);
594 } 455 }
595 456
596 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 457 if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
597 if (XFS_IS_PQUOTA_ON(mp)) {
598 error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, 458 error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
599 flags & XFS_QMOPT_DQALLOC, 459 flags & XFS_QMOPT_DQALLOC,
600 ip->i_udquot, &ip->i_pdquot); 460 &ip->i_pdquot);
601 /*
602 * Don't worry about the udquot that we may have
603 * attached above. It'll get detached, if not already.
604 */
605 if (error) 461 if (error)
606 goto done; 462 goto done;
607 nquotas++; 463 ASSERT(ip->i_pdquot);
608 } 464 }
609 465
466done:
610 /* 467 /*
611 * Attach this group/project quota to the user quota as a hint. 468 * Don't worry about the dquots that we may have attached before any
612 * This WON'T, in general, result in a thrash. 469 * error - they'll get detached later if it has not already been done.
613 */ 470 */
614 if (nquotas > 1 && ip->i_udquot) {
615 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
616 ASSERT(ip->i_gdquot || !XFS_IS_GQUOTA_ON(mp));
617 ASSERT(ip->i_pdquot || !XFS_IS_PQUOTA_ON(mp));
618
619 /*
620 * We do not have i_udquot locked at this point, but this check
621 * is OK since we don't depend on the i_gdquot to be accurate
622 * 100% all the time. It is just a hint, and this will
623 * succeed in general.
624 */
625 if (ip->i_udquot->q_gdquot != ip->i_gdquot)
626 xfs_qm_dqattach_hint(ip, XFS_DQ_GROUP);
627
628 if (ip->i_udquot->q_pdquot != ip->i_pdquot)
629 xfs_qm_dqattach_hint(ip, XFS_DQ_PROJ);
630 }
631
632 done:
633#ifdef DEBUG
634 if (!error) {
635 if (XFS_IS_UQUOTA_ON(mp))
636 ASSERT(ip->i_udquot);
637 if (XFS_IS_GQUOTA_ON(mp))
638 ASSERT(ip->i_gdquot);
639 if (XFS_IS_PQUOTA_ON(mp))
640 ASSERT(ip->i_pdquot);
641 }
642 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 471 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
643#endif
644 return error; 472 return error;
645} 473}
646 474
@@ -843,22 +671,17 @@ xfs_qm_init_quotainfo(
843 671
844 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); 672 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
845 673
846 if ((error = list_lru_init(&qinf->qi_lru))) { 674 error = -list_lru_init(&qinf->qi_lru);
847 kmem_free(qinf); 675 if (error)
848 mp->m_quotainfo = NULL; 676 goto out_free_qinf;
849 return error;
850 }
851 677
852 /* 678 /*
853 * See if quotainodes are setup, and if not, allocate them, 679 * See if quotainodes are setup, and if not, allocate them,
854 * and change the superblock accordingly. 680 * and change the superblock accordingly.
855 */ 681 */
856 if ((error = xfs_qm_init_quotainos(mp))) { 682 error = xfs_qm_init_quotainos(mp);
857 list_lru_destroy(&qinf->qi_lru); 683 if (error)
858 kmem_free(qinf); 684 goto out_free_lru;
859 mp->m_quotainfo = NULL;
860 return error;
861 }
862 685
863 INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); 686 INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
864 INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); 687 INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
@@ -870,8 +693,7 @@ xfs_qm_init_quotainfo(
870 693
871 /* Precalc some constants */ 694 /* Precalc some constants */
872 qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); 695 qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
873 qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(mp, 696 qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen);
874 qinf->qi_dqchunklen);
875 697
876 mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); 698 mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
877 699
@@ -918,7 +740,7 @@ xfs_qm_init_quotainfo(
918 qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); 740 qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
919 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); 741 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
920 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); 742 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
921 743
922 xfs_qm_dqdestroy(dqp); 744 xfs_qm_dqdestroy(dqp);
923 } else { 745 } else {
924 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; 746 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -935,6 +757,13 @@ xfs_qm_init_quotainfo(
935 qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; 757 qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
936 register_shrinker(&qinf->qi_shrinker); 758 register_shrinker(&qinf->qi_shrinker);
937 return 0; 759 return 0;
760
761out_free_lru:
762 list_lru_destroy(&qinf->qi_lru);
763out_free_qinf:
764 kmem_free(qinf);
765 mp->m_quotainfo = NULL;
766 return error;
938} 767}
939 768
940 769
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 3daf5ea1eb8d..bbc813caba4c 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -278,9 +278,10 @@ xfs_qm_scall_trunc_qfiles(
278 xfs_mount_t *mp, 278 xfs_mount_t *mp,
279 uint flags) 279 uint flags)
280{ 280{
281 int error; 281 int error = EINVAL;
282 282
283 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { 283 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 ||
284 (flags & ~XFS_DQ_ALLTYPES)) {
284 xfs_debug(mp, "%s: flags=%x m_qflags=%x", 285 xfs_debug(mp, "%s: flags=%x m_qflags=%x",
285 __func__, flags, mp->m_qflags); 286 __func__, flags, mp->m_qflags);
286 return XFS_ERROR(EINVAL); 287 return XFS_ERROR(EINVAL);
@@ -959,7 +960,6 @@ xfs_qm_export_flags(
959STATIC int 960STATIC int
960xfs_dqrele_inode( 961xfs_dqrele_inode(
961 struct xfs_inode *ip, 962 struct xfs_inode *ip,
962 struct xfs_perag *pag,
963 int flags, 963 int flags,
964 void *args) 964 void *args)
965{ 965{
diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/xfs_quota_defs.h
index b3b2b1065c0f..137e20937077 100644
--- a/fs/xfs/xfs_quota_defs.h
+++ b/fs/xfs/xfs_quota_defs.h
@@ -156,6 +156,6 @@ typedef __uint16_t xfs_qwarncnt_t;
156 156
157extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, 157extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
158 xfs_dqid_t id, uint type, uint flags, char *str); 158 xfs_dqid_t id, uint type, uint flags, char *str);
159extern int xfs_calc_dquots_per_chunk(struct xfs_mount *mp, unsigned int nbblks); 159extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
160 160
161#endif /* __XFS_QUOTA_H__ */ 161#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index af33cafe69b6..2ad1b9822e92 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -100,16 +100,36 @@ xfs_fs_set_xstate(
100 if (!XFS_IS_QUOTA_ON(mp)) 100 if (!XFS_IS_QUOTA_ON(mp))
101 return -EINVAL; 101 return -EINVAL;
102 return -xfs_qm_scall_quotaoff(mp, flags); 102 return -xfs_qm_scall_quotaoff(mp, flags);
103 case Q_XQUOTARM:
104 if (XFS_IS_QUOTA_ON(mp))
105 return -EINVAL;
106 return -xfs_qm_scall_trunc_qfiles(mp, flags);
107 } 103 }
108 104
109 return -EINVAL; 105 return -EINVAL;
110} 106}
111 107
112STATIC int 108STATIC int
109xfs_fs_rm_xquota(
110 struct super_block *sb,
111 unsigned int uflags)
112{
113 struct xfs_mount *mp = XFS_M(sb);
114 unsigned int flags = 0;
115
116 if (sb->s_flags & MS_RDONLY)
117 return -EROFS;
118
119 if (XFS_IS_QUOTA_ON(mp))
120 return -EINVAL;
121
122 if (uflags & FS_USER_QUOTA)
123 flags |= XFS_DQ_USER;
124 if (uflags & FS_GROUP_QUOTA)
125 flags |= XFS_DQ_GROUP;
126 if (uflags & FS_USER_QUOTA)
127 flags |= XFS_DQ_PROJ;
128
129 return -xfs_qm_scall_trunc_qfiles(mp, flags);
130}
131
132STATIC int
113xfs_fs_get_dqblk( 133xfs_fs_get_dqblk(
114 struct super_block *sb, 134 struct super_block *sb,
115 struct kqid qid, 135 struct kqid qid,
@@ -149,6 +169,7 @@ const struct quotactl_ops xfs_quotactl_operations = {
149 .get_xstatev = xfs_fs_get_xstatev, 169 .get_xstatev = xfs_fs_get_xstatev,
150 .get_xstate = xfs_fs_get_xstate, 170 .get_xstate = xfs_fs_get_xstate,
151 .set_xstate = xfs_fs_set_xstate, 171 .set_xstate = xfs_fs_set_xstate,
172 .rm_xquota = xfs_fs_rm_xquota,
152 .get_dqblk = xfs_fs_get_dqblk, 173 .get_dqblk = xfs_fs_get_dqblk,
153 .set_dqblk = xfs_fs_set_dqblk, 174 .set_dqblk = xfs_fs_set_dqblk,
154}; 175};
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index 0c0e41bbe4e3..8baf61afae1d 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -201,10 +201,6 @@ xfs_mount_validate_sb(
201 * write validation, we don't need to check feature masks. 201 * write validation, we don't need to check feature masks.
202 */ 202 */
203 if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) { 203 if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
204 xfs_alert(mp,
205"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
206"Use of these features in this kernel is at your own risk!");
207
208 if (xfs_sb_has_compat_feature(sbp, 204 if (xfs_sb_has_compat_feature(sbp,
209 XFS_SB_FEAT_COMPAT_UNKNOWN)) { 205 XFS_SB_FEAT_COMPAT_UNKNOWN)) {
210 xfs_warn(mp, 206 xfs_warn(mp,
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index f7b2fe77c5a5..950d1ea058b2 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -587,7 +587,9 @@ xfs_sb_has_compat_feature(
587 return (sbp->sb_features_compat & feature) != 0; 587 return (sbp->sb_features_compat & feature) != 0;
588} 588}
589 589
590#define XFS_SB_FEAT_RO_COMPAT_ALL 0 590#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */
591#define XFS_SB_FEAT_RO_COMPAT_ALL \
592 (XFS_SB_FEAT_RO_COMPAT_FINOBT)
591#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL 593#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
592static inline bool 594static inline bool
593xfs_sb_has_ro_compat_feature( 595xfs_sb_has_ro_compat_feature(
@@ -641,6 +643,12 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
641 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); 643 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
642} 644}
643 645
646static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
647{
648 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
649 (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
650}
651
644/* 652/*
645 * end of superblock version macros 653 * end of superblock version macros
646 */ 654 */
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h
index 4484e5151395..82404da2ca67 100644
--- a/fs/xfs/xfs_shared.h
+++ b/fs/xfs/xfs_shared.h
@@ -238,7 +238,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
238int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); 238int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
239int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, 239int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
240 uint32_t size, struct xfs_buf *bp); 240 uint32_t size, struct xfs_buf *bp);
241bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, 241bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
242 uint32_t size, struct xfs_buf *bp); 242 uint32_t size, struct xfs_buf *bp);
243void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, 243void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
244 struct xfs_inode *ip, struct xfs_ifork *ifp); 244 struct xfs_inode *ip, struct xfs_ifork *ifp);
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index ce372b7d5644..f2240383d4bb 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -59,6 +59,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
59 { "abtc2", XFSSTAT_END_ABTC_V2 }, 59 { "abtc2", XFSSTAT_END_ABTC_V2 },
60 { "bmbt2", XFSSTAT_END_BMBT_V2 }, 60 { "bmbt2", XFSSTAT_END_BMBT_V2 },
61 { "ibt2", XFSSTAT_END_IBT_V2 }, 61 { "ibt2", XFSSTAT_END_IBT_V2 },
62 { "fibt2", XFSSTAT_END_FIBT_V2 },
62 /* we print both series of quota information together */ 63 /* we print both series of quota information together */
63 { "qm", XFSSTAT_END_QM }, 64 { "qm", XFSSTAT_END_QM },
64 }; 65 };
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index c03ad38ceaeb..c8f238b8299a 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -183,7 +183,23 @@ struct xfsstats {
183 __uint32_t xs_ibt_2_alloc; 183 __uint32_t xs_ibt_2_alloc;
184 __uint32_t xs_ibt_2_free; 184 __uint32_t xs_ibt_2_free;
185 __uint32_t xs_ibt_2_moves; 185 __uint32_t xs_ibt_2_moves;
186#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6) 186#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15)
187 __uint32_t xs_fibt_2_lookup;
188 __uint32_t xs_fibt_2_compare;
189 __uint32_t xs_fibt_2_insrec;
190 __uint32_t xs_fibt_2_delrec;
191 __uint32_t xs_fibt_2_newroot;
192 __uint32_t xs_fibt_2_killroot;
193 __uint32_t xs_fibt_2_increment;
194 __uint32_t xs_fibt_2_decrement;
195 __uint32_t xs_fibt_2_lshift;
196 __uint32_t xs_fibt_2_rshift;
197 __uint32_t xs_fibt_2_split;
198 __uint32_t xs_fibt_2_join;
199 __uint32_t xs_fibt_2_alloc;
200 __uint32_t xs_fibt_2_free;
201 __uint32_t xs_fibt_2_moves;
202#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6)
187 __uint32_t xs_qm_dqreclaims; 203 __uint32_t xs_qm_dqreclaims;
188 __uint32_t xs_qm_dqreclaim_misses; 204 __uint32_t xs_qm_dqreclaim_misses;
189 __uint32_t xs_qm_dquot_dups; 205 __uint32_t xs_qm_dquot_dups;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 205376776377..8f0333b3f7a0 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -765,20 +765,18 @@ xfs_open_devices(
765 * Setup xfs_mount buffer target pointers 765 * Setup xfs_mount buffer target pointers
766 */ 766 */
767 error = ENOMEM; 767 error = ENOMEM;
768 mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); 768 mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
769 if (!mp->m_ddev_targp) 769 if (!mp->m_ddev_targp)
770 goto out_close_rtdev; 770 goto out_close_rtdev;
771 771
772 if (rtdev) { 772 if (rtdev) {
773 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, 773 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
774 mp->m_fsname);
775 if (!mp->m_rtdev_targp) 774 if (!mp->m_rtdev_targp)
776 goto out_free_ddev_targ; 775 goto out_free_ddev_targ;
777 } 776 }
778 777
779 if (logdev && logdev != ddev) { 778 if (logdev && logdev != ddev) {
780 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, 779 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
781 mp->m_fsname);
782 if (!mp->m_logdev_targp) 780 if (!mp->m_logdev_targp)
783 goto out_free_rtdev_targ; 781 goto out_free_rtdev_targ;
784 } else { 782 } else {
@@ -811,8 +809,7 @@ xfs_setup_devices(
811{ 809{
812 int error; 810 int error;
813 811
814 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize, 812 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
815 mp->m_sb.sb_sectsize);
816 if (error) 813 if (error)
817 return error; 814 return error;
818 815
@@ -822,14 +819,12 @@ xfs_setup_devices(
822 if (xfs_sb_version_hassector(&mp->m_sb)) 819 if (xfs_sb_version_hassector(&mp->m_sb))
823 log_sector_size = mp->m_sb.sb_logsectsize; 820 log_sector_size = mp->m_sb.sb_logsectsize;
824 error = xfs_setsize_buftarg(mp->m_logdev_targp, 821 error = xfs_setsize_buftarg(mp->m_logdev_targp,
825 mp->m_sb.sb_blocksize,
826 log_sector_size); 822 log_sector_size);
827 if (error) 823 if (error)
828 return error; 824 return error;
829 } 825 }
830 if (mp->m_rtdev_targp) { 826 if (mp->m_rtdev_targp) {
831 error = xfs_setsize_buftarg(mp->m_rtdev_targp, 827 error = xfs_setsize_buftarg(mp->m_rtdev_targp,
832 mp->m_sb.sb_blocksize,
833 mp->m_sb.sb_sectsize); 828 mp->m_sb.sb_sectsize);
834 if (error) 829 if (error)
835 return error; 830 return error;
@@ -1433,11 +1428,11 @@ xfs_fs_fill_super(
1433 if (error) 1428 if (error)
1434 goto out_free_fsname; 1429 goto out_free_fsname;
1435 1430
1436 error = xfs_init_mount_workqueues(mp); 1431 error = -xfs_init_mount_workqueues(mp);
1437 if (error) 1432 if (error)
1438 goto out_close_devices; 1433 goto out_close_devices;
1439 1434
1440 error = xfs_icsb_init_counters(mp); 1435 error = -xfs_icsb_init_counters(mp);
1441 if (error) 1436 if (error)
1442 goto out_destroy_workqueues; 1437 goto out_destroy_workqueues;
1443 1438
@@ -1754,13 +1749,9 @@ init_xfs_fs(void)
1754 if (error) 1749 if (error)
1755 goto out_destroy_wq; 1750 goto out_destroy_wq;
1756 1751
1757 error = xfs_filestream_init();
1758 if (error)
1759 goto out_mru_cache_uninit;
1760
1761 error = xfs_buf_init(); 1752 error = xfs_buf_init();
1762 if (error) 1753 if (error)
1763 goto out_filestream_uninit; 1754 goto out_mru_cache_uninit;
1764 1755
1765 error = xfs_init_procfs(); 1756 error = xfs_init_procfs();
1766 if (error) 1757 if (error)
@@ -1787,8 +1778,6 @@ init_xfs_fs(void)
1787 xfs_cleanup_procfs(); 1778 xfs_cleanup_procfs();
1788 out_buf_terminate: 1779 out_buf_terminate:
1789 xfs_buf_terminate(); 1780 xfs_buf_terminate();
1790 out_filestream_uninit:
1791 xfs_filestream_uninit();
1792 out_mru_cache_uninit: 1781 out_mru_cache_uninit:
1793 xfs_mru_cache_uninit(); 1782 xfs_mru_cache_uninit();
1794 out_destroy_wq: 1783 out_destroy_wq:
@@ -1807,7 +1796,6 @@ exit_xfs_fs(void)
1807 xfs_sysctl_unregister(); 1796 xfs_sysctl_unregister();
1808 xfs_cleanup_procfs(); 1797 xfs_cleanup_procfs();
1809 xfs_buf_terminate(); 1798 xfs_buf_terminate();
1810 xfs_filestream_uninit();
1811 xfs_mru_cache_uninit(); 1799 xfs_mru_cache_uninit();
1812 xfs_destroy_workqueues(); 1800 xfs_destroy_workqueues();
1813 xfs_destroy_zones(); 1801 xfs_destroy_zones();
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 52979aa90986..0816b4018dfc 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -92,7 +92,7 @@ xfs_readlink_bmap(
92 92
93 cur_chunk = bp->b_addr; 93 cur_chunk = bp->b_addr;
94 if (xfs_sb_version_hascrc(&mp->m_sb)) { 94 if (xfs_sb_version_hascrc(&mp->m_sb)) {
95 if (!xfs_symlink_hdr_ok(mp, ip->i_ino, offset, 95 if (!xfs_symlink_hdr_ok(ip->i_ino, offset,
96 byte_cnt, bp)) { 96 byte_cnt, bp)) {
97 error = EFSCORRUPTED; 97 error = EFSCORRUPTED;
98 xfs_alert(mp, 98 xfs_alert(mp,
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c
index 9b32052ff65e..23c2f2577c8d 100644
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/xfs_symlink_remote.c
@@ -80,7 +80,6 @@ xfs_symlink_hdr_set(
80 */ 80 */
81bool 81bool
82xfs_symlink_hdr_ok( 82xfs_symlink_hdr_ok(
83 struct xfs_mount *mp,
84 xfs_ino_t ino, 83 xfs_ino_t ino,
85 uint32_t offset, 84 uint32_t offset,
86 uint32_t size, 85 uint32_t size,
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index dee3279c095e..1e85bcd0e418 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -46,6 +46,7 @@
46#include "xfs_log_recover.h" 46#include "xfs_log_recover.h"
47#include "xfs_inode_item.h" 47#include "xfs_inode_item.h"
48#include "xfs_bmap_btree.h" 48#include "xfs_bmap_btree.h"
49#include "xfs_filestream.h"
49 50
50/* 51/*
51 * We include this last to have the helpers above available for the trace 52 * We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a4ae41c179a8..6910458915cf 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -538,6 +538,64 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
538DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); 538DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
539DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered); 539DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered);
540 540
541DECLARE_EVENT_CLASS(xfs_filestream_class,
542 TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno),
543 TP_ARGS(ip, agno),
544 TP_STRUCT__entry(
545 __field(dev_t, dev)
546 __field(xfs_ino_t, ino)
547 __field(xfs_agnumber_t, agno)
548 __field(int, streams)
549 ),
550 TP_fast_assign(
551 __entry->dev = VFS_I(ip)->i_sb->s_dev;
552 __entry->ino = ip->i_ino;
553 __entry->agno = agno;
554 __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
555 ),
556 TP_printk("dev %d:%d ino 0x%llx agno %u streams %d",
557 MAJOR(__entry->dev), MINOR(__entry->dev),
558 __entry->ino,
559 __entry->agno,
560 __entry->streams)
561)
562#define DEFINE_FILESTREAM_EVENT(name) \
563DEFINE_EVENT(xfs_filestream_class, name, \
564 TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \
565 TP_ARGS(ip, agno))
566DEFINE_FILESTREAM_EVENT(xfs_filestream_free);
567DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
568DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
569
570TRACE_EVENT(xfs_filestream_pick,
571 TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno,
572 xfs_extlen_t free, int nscan),
573 TP_ARGS(ip, agno, free, nscan),
574 TP_STRUCT__entry(
575 __field(dev_t, dev)
576 __field(xfs_ino_t, ino)
577 __field(xfs_agnumber_t, agno)
578 __field(int, streams)
579 __field(xfs_extlen_t, free)
580 __field(int, nscan)
581 ),
582 TP_fast_assign(
583 __entry->dev = VFS_I(ip)->i_sb->s_dev;
584 __entry->ino = ip->i_ino;
585 __entry->agno = agno;
586 __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
587 __entry->free = free;
588 __entry->nscan = nscan;
589 ),
590 TP_printk("dev %d:%d ino 0x%llx agno %u streams %d free %d nscan %d",
591 MAJOR(__entry->dev), MINOR(__entry->dev),
592 __entry->ino,
593 __entry->agno,
594 __entry->streams,
595 __entry->free,
596 __entry->nscan)
597);
598
541DECLARE_EVENT_CLASS(xfs_lock_class, 599DECLARE_EVENT_CLASS(xfs_lock_class,
542 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, 600 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
543 unsigned long caller_ip), 601 unsigned long caller_ip),
@@ -603,6 +661,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
603DEFINE_INODE_EVENT(xfs_inactive_symlink); 661DEFINE_INODE_EVENT(xfs_inactive_symlink);
604DEFINE_INODE_EVENT(xfs_alloc_file_space); 662DEFINE_INODE_EVENT(xfs_alloc_file_space);
605DEFINE_INODE_EVENT(xfs_free_file_space); 663DEFINE_INODE_EVENT(xfs_free_file_space);
664DEFINE_INODE_EVENT(xfs_zero_file_space);
606DEFINE_INODE_EVENT(xfs_collapse_file_space); 665DEFINE_INODE_EVENT(xfs_collapse_file_space);
607DEFINE_INODE_EVENT(xfs_readdir); 666DEFINE_INODE_EVENT(xfs_readdir);
608#ifdef CONFIG_XFS_POSIX_ACL 667#ifdef CONFIG_XFS_POSIX_ACL
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 54a57326d85b..d03932564ccb 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -827,7 +827,7 @@ xfs_trans_committed_bulk(
827 xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); 827 xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
828 828
829 spin_lock(&ailp->xa_lock); 829 spin_lock(&ailp->xa_lock);
830 xfs_trans_ail_cursor_done(ailp, &cur); 830 xfs_trans_ail_cursor_done(&cur);
831 spin_unlock(&ailp->xa_lock); 831 spin_unlock(&ailp->xa_lock);
832} 832}
833 833
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index a7287354e535..cb0f3a84cc68 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -173,7 +173,6 @@ xfs_trans_ail_cursor_next(
173 */ 173 */
174void 174void
175xfs_trans_ail_cursor_done( 175xfs_trans_ail_cursor_done(
176 struct xfs_ail *ailp,
177 struct xfs_ail_cursor *cur) 176 struct xfs_ail_cursor *cur)
178{ 177{
179 cur->item = NULL; 178 cur->item = NULL;
@@ -368,7 +367,7 @@ xfsaild_push(
368 * If the AIL is empty or our push has reached the end we are 367 * If the AIL is empty or our push has reached the end we are
369 * done now. 368 * done now.
370 */ 369 */
371 xfs_trans_ail_cursor_done(ailp, &cur); 370 xfs_trans_ail_cursor_done(&cur);
372 spin_unlock(&ailp->xa_lock); 371 spin_unlock(&ailp->xa_lock);
373 goto out_done; 372 goto out_done;
374 } 373 }
@@ -453,7 +452,7 @@ xfsaild_push(
453 break; 452 break;
454 lsn = lip->li_lsn; 453 lsn = lip->li_lsn;
455 } 454 }
456 xfs_trans_ail_cursor_done(ailp, &cur); 455 xfs_trans_ail_cursor_done(&cur);
457 spin_unlock(&ailp->xa_lock); 456 spin_unlock(&ailp->xa_lock);
458 457
459 if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list)) 458 if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list))
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 12e86af9d9b9..bd1281862ad7 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -133,8 +133,7 @@ struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
133 xfs_lsn_t lsn); 133 xfs_lsn_t lsn);
134struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp, 134struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
135 struct xfs_ail_cursor *cur); 135 struct xfs_ail_cursor *cur);
136void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, 136void xfs_trans_ail_cursor_done(struct xfs_ail_cursor *cur);
137 struct xfs_ail_cursor *cur);
138 137
139#if BITS_PER_LONG != 64 138#if BITS_PER_LONG != 64
140static inline void 139static inline void
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index ae368165244d..52b6c3e3203e 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -106,6 +106,47 @@ xfs_calc_inode_res(
106} 106}
107 107
108/* 108/*
109 * The free inode btree is a conditional feature and the log reservation
110 * requirements differ slightly from that of the traditional inode allocation
111 * btree. The finobt tracks records for inode chunks with at least one free
112 * inode. A record can be removed from the tree for an inode allocation
113 * or free and thus the finobt reservation is unconditional across:
114 *
115 * - inode allocation
116 * - inode free
117 * - inode chunk allocation
118 *
119 * The 'modify' param indicates to include the record modification scenario. The
120 * 'alloc' param indicates to include the reservation for free space btree
121 * modifications on behalf of finobt modifications. This is required only for
122 * transactions that do not already account for free space btree modifications.
123 *
124 * the free inode btree: max depth * block size
125 * the allocation btrees: 2 trees * (max depth - 1) * block size
126 * the free inode btree entry: block size
127 */
128STATIC uint
129xfs_calc_finobt_res(
130 struct xfs_mount *mp,
131 int alloc,
132 int modify)
133{
134 uint res;
135
136 if (!xfs_sb_version_hasfinobt(&mp->m_sb))
137 return 0;
138
139 res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
140 if (alloc)
141 res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
142 XFS_FSB_TO_B(mp, 1));
143 if (modify)
144 res += (uint)XFS_FSB_TO_B(mp, 1);
145
146 return res;
147}
148
149/*
109 * Various log reservation values. 150 * Various log reservation values.
110 * 151 *
111 * These are based on the size of the file system block because that is what 152 * These are based on the size of the file system block because that is what
@@ -302,6 +343,7 @@ xfs_calc_remove_reservation(
302 * the superblock for the nlink flag: sector size 343 * the superblock for the nlink flag: sector size
303 * the directory btree: (max depth + v2) * dir block size 344 * the directory btree: (max depth + v2) * dir block size
304 * the directory inode's bmap btree: (max depth + v2) * block size 345 * the directory inode's bmap btree: (max depth + v2) * block size
346 * the finobt (record modification and allocation btrees)
305 */ 347 */
306STATIC uint 348STATIC uint
307xfs_calc_create_resv_modify( 349xfs_calc_create_resv_modify(
@@ -310,7 +352,8 @@ xfs_calc_create_resv_modify(
310 return xfs_calc_inode_res(mp, 2) + 352 return xfs_calc_inode_res(mp, 2) +
311 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 353 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
312 (uint)XFS_FSB_TO_B(mp, 1) + 354 (uint)XFS_FSB_TO_B(mp, 1) +
313 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); 355 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
356 xfs_calc_finobt_res(mp, 1, 1);
314} 357}
315 358
316/* 359/*
@@ -348,6 +391,7 @@ __xfs_calc_create_reservation(
348 * the superblock for the nlink flag: sector size 391 * the superblock for the nlink flag: sector size
349 * the inode btree: max depth * blocksize 392 * the inode btree: max depth * blocksize
350 * the allocation btrees: 2 trees * (max depth - 1) * block size 393 * the allocation btrees: 2 trees * (max depth - 1) * block size
394 * the finobt (record insertion)
351 */ 395 */
352STATIC uint 396STATIC uint
353xfs_calc_icreate_resv_alloc( 397xfs_calc_icreate_resv_alloc(
@@ -357,7 +401,8 @@ xfs_calc_icreate_resv_alloc(
357 mp->m_sb.sb_sectsize + 401 mp->m_sb.sb_sectsize +
358 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 402 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
359 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 403 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
360 XFS_FSB_TO_B(mp, 1)); 404 XFS_FSB_TO_B(mp, 1)) +
405 xfs_calc_finobt_res(mp, 0, 0);
361} 406}
362 407
363STATIC uint 408STATIC uint
@@ -425,6 +470,7 @@ xfs_calc_symlink_reservation(
425 * the on disk inode before ours in the agi hash list: inode cluster size 470 * the on disk inode before ours in the agi hash list: inode cluster size
426 * the inode btree: max depth * blocksize 471 * the inode btree: max depth * blocksize
427 * the allocation btrees: 2 trees * (max depth - 1) * block size 472 * the allocation btrees: 2 trees * (max depth - 1) * block size
473 * the finobt (record insertion, removal or modification)
428 */ 474 */
429STATIC uint 475STATIC uint
430xfs_calc_ifree_reservation( 476xfs_calc_ifree_reservation(
@@ -439,7 +485,8 @@ xfs_calc_ifree_reservation(
439 xfs_calc_buf_res(2 + mp->m_ialloc_blks + 485 xfs_calc_buf_res(2 + mp->m_ialloc_blks +
440 mp->m_in_maxlevels, 0) + 486 mp->m_in_maxlevels, 0) +
441 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 487 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
442 XFS_FSB_TO_B(mp, 1)); 488 XFS_FSB_TO_B(mp, 1)) +
489 xfs_calc_finobt_res(mp, 0, 1);
443} 490}
444 491
445/* 492/*
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h
index af5dbe06cb65..df4c1f81884c 100644
--- a/fs/xfs/xfs_trans_space.h
+++ b/fs/xfs/xfs_trans_space.h
@@ -47,7 +47,9 @@
47#define XFS_DIRREMOVE_SPACE_RES(mp) \ 47#define XFS_DIRREMOVE_SPACE_RES(mp) \
48 XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) 48 XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
49#define XFS_IALLOC_SPACE_RES(mp) \ 49#define XFS_IALLOC_SPACE_RES(mp) \
50 ((mp)->m_ialloc_blks + (mp)->m_in_maxlevels - 1) 50 ((mp)->m_ialloc_blks + \
51 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
52 ((mp)->m_in_maxlevels - 1)))
51 53
52/* 54/*
53 * Space reservation values for various transactions. 55 * Space reservation values for various transactions.
@@ -82,5 +84,8 @@
82 (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) 84 (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
83#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ 85#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \
84 (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) 86 (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
87#define XFS_IFREE_SPACE_RES(mp) \
88 (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0)
89
85 90
86#endif /* __XFS_TRANS_SPACE_H__ */ 91#endif /* __XFS_TRANS_SPACE_H__ */
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 82bbc34d54a3..65c6e6650b1a 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -134,7 +134,7 @@ typedef enum {
134 134
135typedef enum { 135typedef enum {
136 XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, 136 XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
137 XFS_BTNUM_MAX 137 XFS_BTNUM_FINOi, XFS_BTNUM_MAX
138} xfs_btnum_t; 138} xfs_btnum_t;
139 139
140struct xfs_name { 140struct xfs_name {