aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorMark Fasheh <mfasheh@suse.com>2009-01-29 21:17:46 -0500
committerMark Fasheh <mfasheh@suse.com>2009-04-03 14:39:16 -0400
commite7c17e43090afe558c40bfb66637744c27bd2aeb (patch)
tree4c164c4ec59eaa7599c91f865a261517454ba45f /fs/ocfs2
parent4ed8a6bb083bfcc21f1ed66a474b03c0386e4b34 (diff)
ocfs2: Introduce dir free space list
The only operation which doesn't get faster with directory indexing is insert, which still has to walk the entire unindexed directory portion to find a free block. This patch provides an improvement in directory insert performance by maintaining a singly linked list of directory leaf blocks which have space for additional dirents. Signed-off-by: Mark Fasheh <mfasheh@suse.com> Acked-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/dir.c558
-rw-r--r--fs/ocfs2/dir.h8
-rw-r--r--fs/ocfs2/journal.h12
-rw-r--r--fs/ocfs2/ocfs2_fs.h5
4 files changed, 490 insertions, 93 deletions
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 47de64988b11..52df9125a79d 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -80,22 +80,36 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
80 struct ocfs2_alloc_context *data_ac, 80 struct ocfs2_alloc_context *data_ac,
81 struct ocfs2_alloc_context *meta_ac, 81 struct ocfs2_alloc_context *meta_ac,
82 struct buffer_head **new_bh); 82 struct buffer_head **new_bh);
83static int ocfs2_dir_indexed(struct inode *inode);
83 84
84/* 85/*
85 * These are distinct checks because future versions of the file system will 86 * These are distinct checks because future versions of the file system will
86 * want to have a trailing dirent structure independent of indexing. 87 * want to have a trailing dirent structure independent of indexing.
87 */ 88 */
88static int ocfs2_dir_has_trailer(struct inode *dir) 89static int ocfs2_supports_dir_trailer(struct inode *dir)
89{ 90{
91 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
92
90 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 93 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
91 return 0; 94 return 0;
92 95
93 return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb)); 96 return ocfs2_meta_ecc(osb) || ocfs2_dir_indexed(dir);
94} 97}
95 98
96static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb) 99/*
100 * "new' here refers to the point at which we're creating a new
101 * directory via "mkdir()", but also when we're expanding an inline
102 * directory. In either case, we don't yet have the indexing bit set
103 * on the directory, so the standard checks will fail in when metaecc
104 * is turned off. Only directory-initialization type functions should
105 * use this then. Everything else wants ocfs2_supports_dir_trailer()
106 */
107static int ocfs2_new_dir_wants_trailer(struct inode *dir)
97{ 108{
98 return ocfs2_meta_ecc(osb); 109 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
110
111 return ocfs2_meta_ecc(osb) ||
112 ocfs2_supports_indexed_dirs(osb);
99} 113}
100 114
101static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb) 115static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
@@ -127,7 +141,7 @@ static int ocfs2_skip_dir_trailer(struct inode *dir,
127{ 141{
128 unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer); 142 unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
129 143
130 if (!ocfs2_dir_has_trailer(dir)) 144 if (!ocfs2_supports_dir_trailer(dir))
131 return 0; 145 return 0;
132 146
133 if (offset != toff) 147 if (offset != toff)
@@ -137,7 +151,7 @@ static int ocfs2_skip_dir_trailer(struct inode *dir,
137} 151}
138 152
139static void ocfs2_init_dir_trailer(struct inode *inode, 153static void ocfs2_init_dir_trailer(struct inode *inode,
140 struct buffer_head *bh) 154 struct buffer_head *bh, u16 rec_len)
141{ 155{
142 struct ocfs2_dir_block_trailer *trailer; 156 struct ocfs2_dir_block_trailer *trailer;
143 157
@@ -147,6 +161,42 @@ static void ocfs2_init_dir_trailer(struct inode *inode,
147 cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer)); 161 cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
148 trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno); 162 trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
149 trailer->db_blkno = cpu_to_le64(bh->b_blocknr); 163 trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
164 trailer->db_free_rec_len = cpu_to_le16(rec_len);
165}
166/*
167 * Link an unindexed block with a dir trailer structure into the index free
168 * list. This function will modify dirdata_bh, but assumes you've already
169 * passed it to the journal.
170 */
171static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle,
172 struct buffer_head *dx_root_bh,
173 struct buffer_head *dirdata_bh)
174{
175 int ret;
176 struct ocfs2_dx_root_block *dx_root;
177 struct ocfs2_dir_block_trailer *trailer;
178
179 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
180 OCFS2_JOURNAL_ACCESS_WRITE);
181 if (ret) {
182 mlog_errno(ret);
183 goto out;
184 }
185 trailer = ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
186 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
187
188 trailer->db_free_next = dx_root->dr_free_blk;
189 dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
190
191 ocfs2_journal_dirty(handle, dx_root_bh);
192
193out:
194 return ret;
195}
196
197static int ocfs2_free_list_at_root(struct ocfs2_dir_lookup_result *res)
198{
199 return res->dl_prev_leaf_bh == NULL;
150} 200}
151 201
152void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res) 202void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
@@ -154,6 +204,7 @@ void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
154 brelse(res->dl_dx_root_bh); 204 brelse(res->dl_dx_root_bh);
155 brelse(res->dl_leaf_bh); 205 brelse(res->dl_leaf_bh);
156 brelse(res->dl_dx_leaf_bh); 206 brelse(res->dl_dx_leaf_bh);
207 brelse(res->dl_prev_leaf_bh);
157} 208}
158 209
159static int ocfs2_dir_indexed(struct inode *inode) 210static int ocfs2_dir_indexed(struct inode *inode)
@@ -484,7 +535,7 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
484 } 535 }
485 536
486 if (!(flags & OCFS2_BH_READAHEAD) && 537 if (!(flags & OCFS2_BH_READAHEAD) &&
487 ocfs2_dir_has_trailer(inode)) { 538 ocfs2_supports_dir_trailer(inode)) {
488 rc = ocfs2_check_dir_trailer(inode, tmp); 539 rc = ocfs2_check_dir_trailer(inode, tmp);
489 if (rc) { 540 if (rc) {
490 if (!*bh) 541 if (!*bh)
@@ -1150,6 +1201,47 @@ bail:
1150 return status; 1201 return status;
1151} 1202}
1152 1203
1204static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de)
1205{
1206 unsigned int hole;
1207
1208 if (le64_to_cpu(de->inode) == 0)
1209 hole = le16_to_cpu(de->rec_len);
1210 else
1211 hole = le16_to_cpu(de->rec_len) -
1212 OCFS2_DIR_REC_LEN(de->name_len);
1213
1214 return hole;
1215}
1216
1217static int ocfs2_find_max_rec_len(struct super_block *sb,
1218 struct buffer_head *dirblock_bh)
1219{
1220 int size, this_hole, largest_hole = 0;
1221 char *trailer, *de_buf, *limit, *start = dirblock_bh->b_data;
1222 struct ocfs2_dir_entry *de;
1223
1224 trailer = (char *)ocfs2_trailer_from_bh(dirblock_bh, sb);
1225 size = ocfs2_dir_trailer_blk_off(sb);
1226 limit = start + size;
1227 de_buf = start;
1228 de = (struct ocfs2_dir_entry *)de_buf;
1229 do {
1230 if (de_buf != trailer) {
1231 this_hole = ocfs2_figure_dirent_hole(de);
1232 if (this_hole > largest_hole)
1233 largest_hole = this_hole;
1234 }
1235
1236 de_buf += le16_to_cpu(de->rec_len);
1237 de = (struct ocfs2_dir_entry *)de_buf;
1238 } while (de_buf < limit);
1239
1240 if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
1241 return largest_hole;
1242 return 0;
1243}
1244
1153static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list, 1245static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list,
1154 int index) 1246 int index)
1155{ 1247{
@@ -1171,14 +1263,26 @@ clear:
1171static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, 1263static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
1172 struct ocfs2_dir_lookup_result *lookup) 1264 struct ocfs2_dir_lookup_result *lookup)
1173{ 1265{
1174 int ret, index; 1266 int ret, index, max_rec_len, add_to_free_list = 0;
1175 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; 1267 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
1176 struct buffer_head *leaf_bh = lookup->dl_leaf_bh; 1268 struct buffer_head *leaf_bh = lookup->dl_leaf_bh;
1177 struct ocfs2_dx_leaf *dx_leaf; 1269 struct ocfs2_dx_leaf *dx_leaf;
1178 struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry; 1270 struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry;
1271 struct ocfs2_dir_block_trailer *trailer;
1179 struct ocfs2_dx_root_block *dx_root; 1272 struct ocfs2_dx_root_block *dx_root;
1180 struct ocfs2_dx_entry_list *entry_list; 1273 struct ocfs2_dx_entry_list *entry_list;
1181 1274
1275 /*
1276 * This function gets a bit messy because we might have to
1277 * modify the root block, regardless of whether the indexed
1278 * entries are stored inline.
1279 */
1280
1281 /*
1282 * *Only* set 'entry_list' here, based on where we're looking
1283 * for the indexed entries. Later, we might still want to
1284 * journal both blocks, based on free list state.
1285 */
1182 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; 1286 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
1183 if (ocfs2_dx_root_inline(dx_root)) { 1287 if (ocfs2_dx_root_inline(dx_root)) {
1184 entry_list = &dx_root->dr_entries; 1288 entry_list = &dx_root->dr_entries;
@@ -1203,6 +1307,15 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
1203 } 1307 }
1204 1308
1205 /* 1309 /*
1310 * We know that removal of this dirent will leave enough room
1311 * for a new one, so add this block to the free list if it
1312 * isn't already there.
1313 */
1314 trailer = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb);
1315 if (trailer->db_free_rec_len == 0)
1316 add_to_free_list = 1;
1317
1318 /*
1206 * Add the block holding our index into the journal before 1319 * Add the block holding our index into the journal before
1207 * removing the unindexed entry. If we get an error return 1320 * removing the unindexed entry. If we get an error return
1208 * from __ocfs2_delete_entry(), then it hasn't removed the 1321 * from __ocfs2_delete_entry(), then it hasn't removed the
@@ -1212,14 +1325,16 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
1212 * We're also careful to journal the root tree block here if 1325 * We're also careful to journal the root tree block here if
1213 * we're going to be adding to the start of the free list. 1326 * we're going to be adding to the start of the free list.
1214 */ 1327 */
1215 if (ocfs2_dx_root_inline(dx_root)) { 1328 if (add_to_free_list || ocfs2_dx_root_inline(dx_root)) {
1216 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, 1329 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
1217 OCFS2_JOURNAL_ACCESS_WRITE); 1330 OCFS2_JOURNAL_ACCESS_WRITE);
1218 if (ret) { 1331 if (ret) {
1219 mlog_errno(ret); 1332 mlog_errno(ret);
1220 goto out; 1333 goto out;
1221 } 1334 }
1222 } else { 1335 }
1336
1337 if (!ocfs2_dx_root_inline(dx_root)) {
1223 ret = ocfs2_journal_access_dl(handle, dir, 1338 ret = ocfs2_journal_access_dl(handle, dir,
1224 lookup->dl_dx_leaf_bh, 1339 lookup->dl_dx_leaf_bh,
1225 OCFS2_JOURNAL_ACCESS_WRITE); 1340 OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1239,6 +1354,17 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
1239 goto out; 1354 goto out;
1240 } 1355 }
1241 1356
1357 max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, leaf_bh);
1358 trailer->db_free_rec_len = cpu_to_le16(max_rec_len);
1359 if (add_to_free_list) {
1360 trailer->db_free_next = dx_root->dr_free_blk;
1361 dx_root->dr_free_blk = cpu_to_le64(leaf_bh->b_blocknr);
1362 ocfs2_journal_dirty(handle, dx_root_bh);
1363 }
1364
1365 /* leaf_bh was journal_accessed for us in __ocfs2_delete_entry */
1366 ocfs2_journal_dirty(handle, leaf_bh);
1367
1242 ocfs2_dx_list_remove_entry(entry_list, index); 1368 ocfs2_dx_list_remove_entry(entry_list, index);
1243 1369
1244 if (ocfs2_dx_root_inline(dx_root)) 1370 if (ocfs2_dx_root_inline(dx_root))
@@ -1422,6 +1548,59 @@ static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
1422 lookup->dl_dx_leaf_bh); 1548 lookup->dl_dx_leaf_bh);
1423} 1549}
1424 1550
1551static void ocfs2_remove_block_from_free_list(struct inode *dir,
1552 handle_t *handle,
1553 struct ocfs2_dir_lookup_result *lookup)
1554{
1555 struct ocfs2_dir_block_trailer *trailer, *prev;
1556 struct ocfs2_dx_root_block *dx_root;
1557 struct buffer_head *bh;
1558
1559 trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb);
1560
1561 if (ocfs2_free_list_at_root(lookup)) {
1562 bh = lookup->dl_dx_root_bh;
1563 dx_root = (struct ocfs2_dx_root_block *)bh->b_data;
1564 dx_root->dr_free_blk = trailer->db_free_next;
1565 } else {
1566 bh = lookup->dl_prev_leaf_bh;
1567 prev = ocfs2_trailer_from_bh(bh, dir->i_sb);
1568 prev->db_free_next = trailer->db_free_next;
1569 }
1570
1571 trailer->db_free_rec_len = cpu_to_le16(0);
1572 trailer->db_free_next = cpu_to_le64(0);
1573
1574 ocfs2_journal_dirty(handle, bh);
1575 ocfs2_journal_dirty(handle, lookup->dl_leaf_bh);
1576}
1577
1578/*
1579 * This expects that a journal write has been reserved on
1580 * lookup->dl_prev_leaf_bh or lookup->dl_dx_root_bh
1581 */
1582static void ocfs2_recalc_free_list(struct inode *dir, handle_t *handle,
1583 struct ocfs2_dir_lookup_result *lookup)
1584{
1585 int max_rec_len;
1586 struct ocfs2_dir_block_trailer *trailer;
1587
1588 /* Walk dl_leaf_bh to figure out what the new free rec_len is. */
1589 max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, lookup->dl_leaf_bh);
1590 if (max_rec_len) {
1591 /*
1592 * There's still room in this block, so no need to remove it
1593 * from the free list. In this case, we just want to update
1594 * the rec len accounting.
1595 */
1596 trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb);
1597 trailer->db_free_rec_len = cpu_to_le16(max_rec_len);
1598 ocfs2_journal_dirty(handle, lookup->dl_leaf_bh);
1599 } else {
1600 ocfs2_remove_block_from_free_list(dir, handle, lookup);
1601 }
1602}
1603
1425/* we don't always have a dentry for what we want to add, so people 1604/* we don't always have a dentry for what we want to add, so people
1426 * like orphan dir can call this instead. 1605 * like orphan dir can call this instead.
1427 * 1606 *
@@ -1450,7 +1629,31 @@ int __ocfs2_add_entry(handle_t *handle,
1450 if (!namelen) 1629 if (!namelen)
1451 return -EINVAL; 1630 return -EINVAL;
1452 1631
1453 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 1632 if (ocfs2_dir_indexed(dir)) {
1633 struct buffer_head *bh;
1634
1635 /*
1636 * An indexed dir may require that we update the free space
1637 * list. Reserve a write to the previous node in the list so
1638 * that we don't fail later.
1639 *
1640 * XXX: This can be either a dx_root_block, or an unindexed
1641 * directory tree leaf block.
1642 */
1643 if (ocfs2_free_list_at_root(lookup)) {
1644 bh = lookup->dl_dx_root_bh;
1645 retval = ocfs2_journal_access_dr(handle, dir, bh,
1646 OCFS2_JOURNAL_ACCESS_WRITE);
1647 } else {
1648 bh = lookup->dl_prev_leaf_bh;
1649 retval = ocfs2_journal_access_db(handle, dir, bh,
1650 OCFS2_JOURNAL_ACCESS_WRITE);
1651 }
1652 if (retval) {
1653 mlog_errno(retval);
1654 return retval;
1655 }
1656 } else if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1454 data_start = di->id2.i_data.id_data; 1657 data_start = di->id2.i_data.id_data;
1455 size = i_size_read(dir); 1658 size = i_size_read(dir);
1456 1659
@@ -1533,6 +1736,9 @@ int __ocfs2_add_entry(handle_t *handle,
1533 de->name_len = namelen; 1736 de->name_len = namelen;
1534 memcpy(de->name, name, namelen); 1737 memcpy(de->name, name, namelen);
1535 1738
1739 if (ocfs2_dir_indexed(dir))
1740 ocfs2_recalc_free_list(dir, handle, lookup);
1741
1536 dir->i_version++; 1742 dir->i_version++;
1537 status = ocfs2_journal_dirty(handle, insert_bh); 1743 status = ocfs2_journal_dirty(handle, insert_bh);
1538 retval = 0; 1744 retval = 0;
@@ -2056,7 +2262,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2056 2262
2057 mlog_entry_void(); 2263 mlog_entry_void();
2058 2264
2059 if (ocfs2_supports_dir_trailer(osb)) 2265 if (ocfs2_new_dir_wants_trailer(inode))
2060 size = ocfs2_dir_trailer_blk_off(parent->i_sb); 2266 size = ocfs2_dir_trailer_blk_off(parent->i_sb);
2061 2267
2062 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, 2268 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
@@ -2077,8 +2283,19 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2077 memset(new_bh->b_data, 0, osb->sb->s_blocksize); 2283 memset(new_bh->b_data, 0, osb->sb->s_blocksize);
2078 2284
2079 de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size); 2285 de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size);
2080 if (ocfs2_supports_dir_trailer(osb)) 2286 if (ocfs2_new_dir_wants_trailer(inode)) {
2081 ocfs2_init_dir_trailer(inode, new_bh); 2287 int size = le16_to_cpu(de->rec_len);
2288
2289 /*
2290 * Figure out the size of the hole left over after
2291 * insertion of '.' and '..'. The trailer wants this
2292 * information.
2293 */
2294 size -= OCFS2_DIR_REC_LEN(2);
2295 size -= sizeof(struct ocfs2_dir_block_trailer);
2296
2297 ocfs2_init_dir_trailer(inode, new_bh, size);
2298 }
2082 2299
2083 status = ocfs2_journal_dirty(handle, new_bh); 2300 status = ocfs2_journal_dirty(handle, new_bh);
2084 if (status < 0) { 2301 if (status < 0) {
@@ -2110,6 +2327,7 @@ bail:
2110static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, 2327static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2111 handle_t *handle, struct inode *dir, 2328 handle_t *handle, struct inode *dir,
2112 struct buffer_head *di_bh, 2329 struct buffer_head *di_bh,
2330 struct buffer_head *dirdata_bh,
2113 struct ocfs2_alloc_context *meta_ac, 2331 struct ocfs2_alloc_context *meta_ac,
2114 int dx_inline, 2332 int dx_inline,
2115 struct buffer_head **ret_dx_root_bh) 2333 struct buffer_head **ret_dx_root_bh)
@@ -2121,6 +2339,8 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2121 unsigned int num_bits; 2339 unsigned int num_bits;
2122 struct buffer_head *dx_root_bh = NULL; 2340 struct buffer_head *dx_root_bh = NULL;
2123 struct ocfs2_dx_root_block *dx_root; 2341 struct ocfs2_dx_root_block *dx_root;
2342 struct ocfs2_dir_block_trailer *trailer =
2343 ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
2124 2344
2125 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit, 2345 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit,
2126 &num_bits, &dr_blkno); 2346 &num_bits, &dr_blkno);
@@ -2155,6 +2375,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2155 dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); 2375 dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
2156 dx_root->dr_blkno = cpu_to_le64(dr_blkno); 2376 dx_root->dr_blkno = cpu_to_le64(dr_blkno);
2157 dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno); 2377 dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno);
2378 if (le16_to_cpu(trailer->db_free_rec_len))
2379 dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
2380 else
2381 dx_root->dr_free_blk = cpu_to_le64(0);
2158 2382
2159 if (dx_inline) { 2383 if (dx_inline) {
2160 dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE; 2384 dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE;
@@ -2361,7 +2585,7 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
2361 goto out; 2585 goto out;
2362 } 2586 }
2363 2587
2364 ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, 2588 ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, leaf_bh,
2365 meta_ac, 1, &dx_root_bh); 2589 meta_ac, 1, &dx_root_bh);
2366 if (ret) { 2590 if (ret) {
2367 mlog_errno(ret); 2591 mlog_errno(ret);
@@ -2371,6 +2595,7 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
2371 entry_list = &dx_root->dr_entries; 2595 entry_list = &dx_root->dr_entries;
2372 2596
2373 /* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */ 2597 /* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */
2598 ocfs2_dx_dir_name_hash(inode, ".", 1, &hinfo);
2374 ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr); 2599 ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
2375 2600
2376 ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo); 2601 ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo);
@@ -2446,7 +2671,8 @@ inc:
2446out: 2671out:
2447 return ret; 2672 return ret;
2448} 2673}
2449 /* 2674
2675/*
2450 * XXX: This expects dx_root_bh to already be part of the transaction. 2676 * XXX: This expects dx_root_bh to already be part of the transaction.
2451 */ 2677 */
2452static void ocfs2_dx_dir_index_root_block(struct inode *dir, 2678static void ocfs2_dx_dir_index_root_block(struct inode *dir,
@@ -2521,18 +2747,26 @@ static int ocfs2_new_dx_should_be_inline(struct inode *dir,
2521 * expansion from an inline directory to one with extents. The first dir block 2747 * expansion from an inline directory to one with extents. The first dir block
2522 * in that case is taken from the inline data portion of the inode block. 2748 * in that case is taken from the inline data portion of the inode block.
2523 * 2749 *
2750 * This will also return the largest amount of contiguous space for a dirent
2751 * in the block. That value is *not* necessarily the last dirent, even after
2752 * expansion. The directory indexing code wants this value for free space
2753 * accounting. We do this here since we're already walking the entire dir
2754 * block.
2755 *
2524 * We add the dir trailer if this filesystem wants it. 2756 * We add the dir trailer if this filesystem wants it.
2525 */ 2757 */
2526static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, 2758static unsigned int ocfs2_expand_last_dirent(char *start, unsigned int old_size,
2527 struct super_block *sb) 2759 struct inode *dir)
2528{ 2760{
2761 struct super_block *sb = dir->i_sb;
2529 struct ocfs2_dir_entry *de; 2762 struct ocfs2_dir_entry *de;
2530 struct ocfs2_dir_entry *prev_de; 2763 struct ocfs2_dir_entry *prev_de;
2531 char *de_buf, *limit; 2764 char *de_buf, *limit;
2532 unsigned int new_size = sb->s_blocksize; 2765 unsigned int new_size = sb->s_blocksize;
2533 unsigned int bytes; 2766 unsigned int bytes, this_hole;
2767 unsigned int largest_hole = 0;
2534 2768
2535 if (ocfs2_supports_dir_trailer(OCFS2_SB(sb))) 2769 if (ocfs2_new_dir_wants_trailer(dir))
2536 new_size = ocfs2_dir_trailer_blk_off(sb); 2770 new_size = ocfs2_dir_trailer_blk_off(sb);
2537 2771
2538 bytes = new_size - old_size; 2772 bytes = new_size - old_size;
@@ -2541,12 +2775,26 @@ static void ocfs2_expand_last_dirent(char *start, unsigned int old_size,
2541 de_buf = start; 2775 de_buf = start;
2542 de = (struct ocfs2_dir_entry *)de_buf; 2776 de = (struct ocfs2_dir_entry *)de_buf;
2543 do { 2777 do {
2778 this_hole = ocfs2_figure_dirent_hole(de);
2779 if (this_hole > largest_hole)
2780 largest_hole = this_hole;
2781
2544 prev_de = de; 2782 prev_de = de;
2545 de_buf += le16_to_cpu(de->rec_len); 2783 de_buf += le16_to_cpu(de->rec_len);
2546 de = (struct ocfs2_dir_entry *)de_buf; 2784 de = (struct ocfs2_dir_entry *)de_buf;
2547 } while (de_buf < limit); 2785 } while (de_buf < limit);
2548 2786
2549 le16_add_cpu(&prev_de->rec_len, bytes); 2787 le16_add_cpu(&prev_de->rec_len, bytes);
2788
2789 /* We need to double check this after modification of the final
2790 * dirent. */
2791 this_hole = ocfs2_figure_dirent_hole(prev_de);
2792 if (this_hole > largest_hole)
2793 largest_hole = this_hole;
2794
2795 if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
2796 return largest_hole;
2797 return 0;
2550} 2798}
2551 2799
2552/* 2800/*
@@ -2703,9 +2951,16 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2703 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); 2951 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
2704 memset(dirdata_bh->b_data + i_size_read(dir), 0, 2952 memset(dirdata_bh->b_data + i_size_read(dir), 0,
2705 sb->s_blocksize - i_size_read(dir)); 2953 sb->s_blocksize - i_size_read(dir));
2706 ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), sb); 2954 i = ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), dir);
2707 if (ocfs2_supports_dir_trailer(osb)) 2955 if (ocfs2_new_dir_wants_trailer(dir)) {
2708 ocfs2_init_dir_trailer(dir, dirdata_bh); 2956 /*
2957 * Prepare the dir trailer up front. It will otherwise look
2958 * like a valid dirent. Even if inserting the index fails
2959 * (unlikely), then all we'll have done is given first dir
2960 * block a small amount of fragmentation.
2961 */
2962 ocfs2_init_dir_trailer(dir, dirdata_bh, i);
2963 }
2709 2964
2710 ret = ocfs2_journal_dirty(handle, dirdata_bh); 2965 ret = ocfs2_journal_dirty(handle, dirdata_bh);
2711 if (ret) { 2966 if (ret) {
@@ -2781,7 +3036,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2781 3036
2782 if (ocfs2_supports_indexed_dirs(osb)) { 3037 if (ocfs2_supports_indexed_dirs(osb)) {
2783 ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh, 3038 ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
2784 meta_ac, dx_inline, 3039 dirdata_bh, meta_ac, dx_inline,
2785 &dx_root_bh); 3040 &dx_root_bh);
2786 if (ret) { 3041 if (ret) {
2787 mlog_errno(ret); 3042 mlog_errno(ret);
@@ -2933,6 +3188,8 @@ bail:
2933 * is to be turned into an extent based one. The size of the dirent to 3188 * is to be turned into an extent based one. The size of the dirent to
2934 * insert might be larger than the space gained by growing to just one 3189 * insert might be larger than the space gained by growing to just one
2935 * block, so we may have to grow the inode by two blocks in that case. 3190 * block, so we may have to grow the inode by two blocks in that case.
3191 *
3192 * If the directory is already indexed, dx_root_bh must be provided.
2936 */ 3193 */
2937static int ocfs2_extend_dir(struct ocfs2_super *osb, 3194static int ocfs2_extend_dir(struct ocfs2_super *osb,
2938 struct inode *dir, 3195 struct inode *dir,
@@ -2953,10 +3210,17 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
2953 struct ocfs2_dir_entry * de; 3210 struct ocfs2_dir_entry * de;
2954 struct super_block *sb = osb->sb; 3211 struct super_block *sb = osb->sb;
2955 struct ocfs2_extent_tree et; 3212 struct ocfs2_extent_tree et;
3213 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
2956 3214
2957 mlog_entry_void(); 3215 mlog_entry_void();
2958 3216
2959 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 3217 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
3218 /*
3219 * This would be a code error as an inline directory should
3220 * never have an index root.
3221 */
3222 BUG_ON(dx_root_bh);
3223
2960 status = ocfs2_expand_inline_dir(dir, parent_fe_bh, 3224 status = ocfs2_expand_inline_dir(dir, parent_fe_bh,
2961 blocks_wanted, lookup, 3225 blocks_wanted, lookup,
2962 &new_bh); 3226 &new_bh);
@@ -2965,6 +3229,10 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
2965 goto bail; 3229 goto bail;
2966 } 3230 }
2967 3231
3232 /* Expansion from inline to an indexed directory will
3233 * have given us this. */
3234 dx_root_bh = lookup->dl_dx_root_bh;
3235
2968 if (blocks_wanted == 1) { 3236 if (blocks_wanted == 1) {
2969 /* 3237 /*
2970 * If the new dirent will fit inside the space 3238 * If the new dirent will fit inside the space
@@ -3028,6 +3296,10 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
3028 } 3296 }
3029 3297
3030do_extend: 3298do_extend:
3299 if (ocfs2_dir_indexed(dir))
3300 credits++; /* For attaching the new dirent block to the
3301 * dx_root */
3302
3031 down_write(&OCFS2_I(dir)->ip_alloc_sem); 3303 down_write(&OCFS2_I(dir)->ip_alloc_sem);
3032 drop_alloc_sem = 1; 3304 drop_alloc_sem = 1;
3033 3305
@@ -3058,9 +3330,19 @@ do_extend:
3058 3330
3059 de = (struct ocfs2_dir_entry *) new_bh->b_data; 3331 de = (struct ocfs2_dir_entry *) new_bh->b_data;
3060 de->inode = 0; 3332 de->inode = 0;
3061 if (ocfs2_dir_has_trailer(dir)) { 3333 if (ocfs2_supports_dir_trailer(dir)) {
3062 de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb)); 3334 de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb));
3063 ocfs2_init_dir_trailer(dir, new_bh); 3335
3336 ocfs2_init_dir_trailer(dir, new_bh, le16_to_cpu(de->rec_len));
3337
3338 if (ocfs2_dir_indexed(dir)) {
3339 status = ocfs2_dx_dir_link_trailer(dir, handle,
3340 dx_root_bh, new_bh);
3341 if (status) {
3342 mlog_errno(status);
3343 goto bail;
3344 }
3345 }
3064 } else { 3346 } else {
3065 de->rec_len = cpu_to_le16(sb->s_blocksize); 3347 de->rec_len = cpu_to_le16(sb->s_blocksize);
3066 } 3348 }
@@ -3116,7 +3398,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
3116 * This calculates how many free bytes we'd have in block zero, should 3398 * This calculates how many free bytes we'd have in block zero, should
3117 * this function force expansion to an extent tree. 3399 * this function force expansion to an extent tree.
3118 */ 3400 */
3119 if (ocfs2_supports_dir_trailer(OCFS2_SB(sb))) 3401 if (ocfs2_new_dir_wants_trailer(dir))
3120 free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir); 3402 free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir);
3121 else 3403 else
3122 free_space = dir->i_sb->s_blocksize - i_size_read(dir); 3404 free_space = dir->i_sb->s_blocksize - i_size_read(dir);
@@ -3647,6 +3929,127 @@ out:
3647 return ret; 3929 return ret;
3648} 3930}
3649 3931
3932static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
3933 struct buffer_head *di_bh,
3934 struct buffer_head *dx_root_bh,
3935 const char *name, int namelen,
3936 struct ocfs2_dir_lookup_result *lookup)
3937{
3938 int ret, rebalanced = 0;
3939 struct ocfs2_dx_root_block *dx_root;
3940 struct buffer_head *dx_leaf_bh = NULL;
3941 struct ocfs2_dx_leaf *dx_leaf;
3942 u64 blkno;
3943 u32 leaf_cpos;
3944
3945 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
3946
3947restart_search:
3948 ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo,
3949 &leaf_cpos, &blkno);
3950 if (ret) {
3951 mlog_errno(ret);
3952 goto out;
3953 }
3954
3955 ret = ocfs2_read_dx_leaf(dir, blkno, &dx_leaf_bh);
3956 if (ret) {
3957 mlog_errno(ret);
3958 goto out;
3959 }
3960
3961 dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
3962
3963 if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >=
3964 le16_to_cpu(dx_leaf->dl_list.de_count)) {
3965 if (rebalanced) {
3966 /*
3967 * Rebalancing should have provided us with
3968 * space in an appropriate leaf.
3969 *
3970 * XXX: Is this an abnormal condition then?
3971 * Should we print a message here?
3972 */
3973 ret = -ENOSPC;
3974 goto out;
3975 }
3976
3977 ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh,
3978 &lookup->dl_hinfo, leaf_cpos,
3979 blkno);
3980 if (ret) {
3981 if (ret != -ENOSPC)
3982 mlog_errno(ret);
3983 goto out;
3984 }
3985
3986 /*
3987 * Restart the lookup. The rebalance might have
3988 * changed which block our item fits into. Mark our
3989 * progress, so we only execute this once.
3990 */
3991 brelse(dx_leaf_bh);
3992 dx_leaf_bh = NULL;
3993 rebalanced = 1;
3994 goto restart_search;
3995 }
3996
3997 lookup->dl_dx_leaf_bh = dx_leaf_bh;
3998 dx_leaf_bh = NULL;
3999
4000out:
4001 brelse(dx_leaf_bh);
4002 return ret;
4003}
4004
4005static int ocfs2_search_dx_free_list(struct inode *dir,
4006 struct buffer_head *dx_root_bh,
4007 int namelen,
4008 struct ocfs2_dir_lookup_result *lookup)
4009{
4010 int ret = -ENOSPC;
4011 struct buffer_head *leaf_bh = NULL, *prev_leaf_bh = NULL;
4012 struct ocfs2_dir_block_trailer *db;
4013 u64 next_block;
4014 int rec_len = OCFS2_DIR_REC_LEN(namelen);
4015 struct ocfs2_dx_root_block *dx_root;
4016
4017 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
4018 next_block = le64_to_cpu(dx_root->dr_free_blk);
4019
4020 while (next_block) {
4021 brelse(prev_leaf_bh);
4022 prev_leaf_bh = leaf_bh;
4023 leaf_bh = NULL;
4024
4025 ret = ocfs2_read_dir_block_direct(dir, next_block, &leaf_bh);
4026 if (ret) {
4027 mlog_errno(ret);
4028 goto out;
4029 }
4030
4031 db = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb);
4032 if (rec_len <= le16_to_cpu(db->db_free_rec_len)) {
4033 lookup->dl_leaf_bh = leaf_bh;
4034 lookup->dl_prev_leaf_bh = prev_leaf_bh;
4035 leaf_bh = NULL;
4036 prev_leaf_bh = NULL;
4037 break;
4038 }
4039
4040 next_block = le64_to_cpu(db->db_free_next);
4041 }
4042
4043 if (!next_block)
4044 ret = -ENOSPC;
4045
4046out:
4047
4048 brelse(leaf_bh);
4049 brelse(prev_leaf_bh);
4050 return ret;
4051}
4052
3650static int ocfs2_expand_inline_dx_root(struct inode *dir, 4053static int ocfs2_expand_inline_dx_root(struct inode *dir,
3651 struct buffer_head *dx_root_bh) 4054 struct buffer_head *dx_root_bh)
3652{ 4055{
@@ -3779,19 +4182,18 @@ static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh)
3779 return 0; 4182 return 0;
3780} 4183}
3781 4184
3782static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir, 4185static int ocfs2_prepare_dx_dir_for_insert(struct inode *dir,
3783 struct buffer_head *di_bh, const char *name, 4186 struct buffer_head *di_bh,
3784 int namelen, 4187 const char *name,
3785 struct ocfs2_dir_lookup_result *lookup) 4188 int namelen,
4189 struct ocfs2_dir_lookup_result *lookup)
3786{ 4190{
3787 int ret, rebalanced = 0; 4191 int ret, free_dx_root = 1;
4192 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
3788 struct buffer_head *dx_root_bh = NULL; 4193 struct buffer_head *dx_root_bh = NULL;
3789 struct ocfs2_dx_root_block *dx_root; 4194 struct buffer_head *leaf_bh = NULL;
3790 struct buffer_head *dx_leaf_bh = NULL;
3791 struct ocfs2_dx_leaf *dx_leaf;
3792 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 4195 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
3793 u64 blkno; 4196 struct ocfs2_dx_root_block *dx_root;
3794 u32 leaf_cpos;
3795 4197
3796 ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); 4198 ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
3797 if (ret) { 4199 if (ret) {
@@ -3818,65 +4220,55 @@ static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
3818 } 4220 }
3819 } 4221 }
3820 4222
3821restart_search: 4223 /*
3822 ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo, 4224 * Insert preparation for an indexed directory is split into two
3823 &leaf_cpos, &blkno); 4225 * steps. The call to find_dir_space_dx reserves room in the index for
4226 * an additional item. If we run out of space there, it's a real error
4227 * we can't continue on.
4228 */
4229 ret = ocfs2_find_dir_space_dx(osb, dir, di_bh, dx_root_bh, name,
4230 namelen, lookup);
3824 if (ret) { 4231 if (ret) {
3825 mlog_errno(ret); 4232 mlog_errno(ret);
3826 goto out; 4233 goto out;
3827 } 4234 }
3828 4235
3829 ret = ocfs2_read_dx_leaf(dir, blkno, &dx_leaf_bh); 4236search_el:
3830 if (ret) { 4237 /*
4238 * Next, we need to find space in the unindexed tree. This call
4239 * searches using the free space linked list. If the unindexed tree
4240 * lacks sufficient space, we'll expand it below. The expansion code
4241 * is smart enough to add any new blocks to the free space list.
4242 */
4243 ret = ocfs2_search_dx_free_list(dir, dx_root_bh, namelen, lookup);
4244 if (ret && ret != -ENOSPC) {
3831 mlog_errno(ret); 4245 mlog_errno(ret);
3832 goto out; 4246 goto out;
3833 } 4247 }
3834 4248
3835 dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; 4249 /* Do this up here - ocfs2_extend_dir might need the dx_root */
4250 lookup->dl_dx_root_bh = dx_root_bh;
4251 free_dx_root = 0;
3836 4252
3837 if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >= 4253 if (ret == -ENOSPC) {
3838 le16_to_cpu(dx_leaf->dl_list.de_count)) { 4254 ret = ocfs2_extend_dir(osb, dir, di_bh, 1, lookup, &leaf_bh);
3839 if (rebalanced) {
3840 /*
3841 * Rebalancing should have provided us with
3842 * space in an appropriate leaf.
3843 *
3844 * XXX: Is this an abnormal condition then?
3845 * Should we print a message here?
3846 */
3847 ret = -ENOSPC;
3848 goto out;
3849 }
3850 4255
3851 ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh,
3852 &lookup->dl_hinfo, leaf_cpos,
3853 blkno);
3854 if (ret) { 4256 if (ret) {
3855 if (ret != -ENOSPC) 4257 mlog_errno(ret);
3856 mlog_errno(ret);
3857 goto out; 4258 goto out;
3858 } 4259 }
3859 4260
3860 /* 4261 /*
3861 * Restart the lookup. The rebalance might have 4262 * We make the assumption here that new leaf blocks are added
3862 * changed which block our item fits into. Mark our 4263 * to the front of our free list.
3863 * progress, so we only execute this once.
3864 */ 4264 */
3865 brelse(dx_leaf_bh); 4265 lookup->dl_prev_leaf_bh = NULL;
3866 dx_leaf_bh = NULL; 4266 lookup->dl_leaf_bh = leaf_bh;
3867 rebalanced = 1;
3868 goto restart_search;
3869 } 4267 }
3870 4268
3871search_el:
3872 lookup->dl_dx_leaf_bh = dx_leaf_bh;
3873 dx_leaf_bh = NULL;
3874 lookup->dl_dx_root_bh = dx_root_bh;
3875 dx_root_bh = NULL;
3876
3877out: 4269out:
3878 brelse(dx_leaf_bh); 4270 if (free_dx_root)
3879 brelse(dx_root_bh); 4271 brelse(dx_root_bh);
3880 return ret; 4272 return ret;
3881} 4273}
3882 4274
@@ -3921,17 +4313,11 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
3921 ocfs2_dx_dir_name_hash(dir, name, namelen, &lookup->dl_hinfo); 4313 ocfs2_dx_dir_name_hash(dir, name, namelen, &lookup->dl_hinfo);
3922 4314
3923 if (ocfs2_dir_indexed(dir)) { 4315 if (ocfs2_dir_indexed(dir)) {
3924 ret = ocfs2_find_dir_space_dx(osb, dir, parent_fe_bh, name, 4316 ret = ocfs2_prepare_dx_dir_for_insert(dir, parent_fe_bh,
3925 namelen, lookup); 4317 name, namelen, lookup);
3926 if (ret) { 4318 if (ret)
3927 mlog_errno(ret); 4319 mlog_errno(ret);
3928 goto out; 4320 goto out;
3929 }
3930
3931 /*
3932 * We intentionally fall through so that the unindexed
3933 * tree can also be prepared.
3934 */
3935 } 4321 }
3936 4322
3937 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 4323 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h
index 07b0416cdd42..e683f3deb645 100644
--- a/fs/ocfs2/dir.h
+++ b/fs/ocfs2/dir.h
@@ -39,11 +39,19 @@ struct ocfs2_dir_lookup_result {
39 39
40 struct buffer_head *dl_dx_root_bh; /* Root of indexed 40 struct buffer_head *dl_dx_root_bh; /* Root of indexed
41 * tree */ 41 * tree */
42
42 struct buffer_head *dl_dx_leaf_bh; /* Indexed leaf block */ 43 struct buffer_head *dl_dx_leaf_bh; /* Indexed leaf block */
43 struct ocfs2_dx_entry *dl_dx_entry; /* Target dx_entry in 44 struct ocfs2_dx_entry *dl_dx_entry; /* Target dx_entry in
44 * indexed leaf */ 45 * indexed leaf */
45 struct ocfs2_dx_hinfo dl_hinfo; /* Name hash results */ 46 struct ocfs2_dx_hinfo dl_hinfo; /* Name hash results */
47
48 struct buffer_head *dl_prev_leaf_bh;/* Previous entry in
49 * dir free space
50 * list. NULL if
51 * previous entry is
52 * dx root block. */
46}; 53};
54
47void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res); 55void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res);
48 56
49int ocfs2_find_entry(const char *name, int namelen, 57int ocfs2_find_entry(const char *name, int namelen,
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 5585dde91344..582e27e57f34 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -385,8 +385,8 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
385} 385}
386 386
387/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + 387/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
388 * bitmap block for the new bit) */ 388 * bitmap block for the new bit) dx_root update for free list */
389#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) 389#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1)
390 390
391static inline int ocfs2_add_dir_index_credits(struct super_block *sb) 391static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
392{ 392{
@@ -420,19 +420,19 @@ static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir,
420#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) 420#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
421 421
422/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota 422/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota
423 * update on dir + index leaf */ 423 * update on dir + index leaf + dx root update for free list */
424static inline int ocfs2_link_credits(struct super_block *sb) 424static inline int ocfs2_link_credits(struct super_block *sb)
425{ 425{
426 return 2*OCFS2_INODE_UPDATE_CREDITS + 2 + 426 return 2*OCFS2_INODE_UPDATE_CREDITS + 3 +
427 ocfs2_quota_trans_credits(sb); 427 ocfs2_quota_trans_credits(sb);
428} 428}
429 429
430/* inode + dir inode (if we unlink a dir), + dir entry block + orphan 430/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
431 * dir inode link + dir inode index leaf */ 431 * dir inode link + dir inode index leaf + dir index root */
432static inline int ocfs2_unlink_credits(struct super_block *sb) 432static inline int ocfs2_unlink_credits(struct super_block *sb)
433{ 433{
434 /* The quota update from ocfs2_link_credits is unused here... */ 434 /* The quota update from ocfs2_link_credits is unused here... */
435 return 2 * OCFS2_INODE_UPDATE_CREDITS + 2 + ocfs2_link_credits(sb); 435 return 2 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_link_credits(sb);
436} 436}
437 437
438/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + 438/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 1d1c54ea5bc4..d549b1799a61 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -416,6 +416,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
416#define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \ 416#define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \
417 OCFS2_DIR_ROUND) & \ 417 OCFS2_DIR_ROUND) & \
418 ~OCFS2_DIR_ROUND) 418 ~OCFS2_DIR_ROUND)
419#define OCFS2_DIR_MIN_REC_LEN OCFS2_DIR_REC_LEN(1)
419 420
420#define OCFS2_LINK_MAX 32000 421#define OCFS2_LINK_MAX 32000
421 422
@@ -842,7 +843,9 @@ struct ocfs2_dx_root_block {
842 __le16 dr_reserved1; 843 __le16 dr_reserved1;
843 __le64 dr_dir_blkno; /* Pointer to parent inode */ 844 __le64 dr_dir_blkno; /* Pointer to parent inode */
844 __le64 dr_reserved2; 845 __le64 dr_reserved2;
845 __le64 dr_reserved3[16]; 846 __le64 dr_free_blk; /* Pointer to head of free
847 * unindexed block list. */
848 __le64 dr_reserved3[15];
846 union { 849 union {
847 struct ocfs2_extent_list dr_list; /* Keep this aligned to 128 850 struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
848 * bits for maximum space 851 * bits for maximum space