diff options
author | Mark Fasheh <mfasheh@suse.com> | 2009-01-29 21:17:46 -0500 |
---|---|---|
committer | Mark Fasheh <mfasheh@suse.com> | 2009-04-03 14:39:16 -0400 |
commit | e7c17e43090afe558c40bfb66637744c27bd2aeb (patch) | |
tree | 4c164c4ec59eaa7599c91f865a261517454ba45f /fs/ocfs2 | |
parent | 4ed8a6bb083bfcc21f1ed66a474b03c0386e4b34 (diff) |
ocfs2: Introduce dir free space list
The only operation which doesn't get faster with directory indexing is
insert, which still has to walk the entire unindexed directory portion to
find a free block. This patch provides an improvement in directory insert
performance by maintaining a singly linked list of directory leaf blocks
which have space for additional dirents.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Acked-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/dir.c | 558 | ||||
-rw-r--r-- | fs/ocfs2/dir.h | 8 | ||||
-rw-r--r-- | fs/ocfs2/journal.h | 12 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_fs.h | 5 |
4 files changed, 490 insertions, 93 deletions
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 47de64988b11..52df9125a79d 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -80,22 +80,36 @@ static int ocfs2_do_extend_dir(struct super_block *sb, | |||
80 | struct ocfs2_alloc_context *data_ac, | 80 | struct ocfs2_alloc_context *data_ac, |
81 | struct ocfs2_alloc_context *meta_ac, | 81 | struct ocfs2_alloc_context *meta_ac, |
82 | struct buffer_head **new_bh); | 82 | struct buffer_head **new_bh); |
83 | static int ocfs2_dir_indexed(struct inode *inode); | ||
83 | 84 | ||
84 | /* | 85 | /* |
85 | * These are distinct checks because future versions of the file system will | 86 | * These are distinct checks because future versions of the file system will |
86 | * want to have a trailing dirent structure independent of indexing. | 87 | * want to have a trailing dirent structure independent of indexing. |
87 | */ | 88 | */ |
88 | static int ocfs2_dir_has_trailer(struct inode *dir) | 89 | static int ocfs2_supports_dir_trailer(struct inode *dir) |
89 | { | 90 | { |
91 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
92 | |||
90 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 93 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
91 | return 0; | 94 | return 0; |
92 | 95 | ||
93 | return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb)); | 96 | return ocfs2_meta_ecc(osb) || ocfs2_dir_indexed(dir); |
94 | } | 97 | } |
95 | 98 | ||
96 | static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb) | 99 | /* |
100 | * "new' here refers to the point at which we're creating a new | ||
101 | * directory via "mkdir()", but also when we're expanding an inline | ||
102 | * directory. In either case, we don't yet have the indexing bit set | ||
103 | * on the directory, so the standard checks will fail in when metaecc | ||
104 | * is turned off. Only directory-initialization type functions should | ||
105 | * use this then. Everything else wants ocfs2_supports_dir_trailer() | ||
106 | */ | ||
107 | static int ocfs2_new_dir_wants_trailer(struct inode *dir) | ||
97 | { | 108 | { |
98 | return ocfs2_meta_ecc(osb); | 109 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
110 | |||
111 | return ocfs2_meta_ecc(osb) || | ||
112 | ocfs2_supports_indexed_dirs(osb); | ||
99 | } | 113 | } |
100 | 114 | ||
101 | static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb) | 115 | static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb) |
@@ -127,7 +141,7 @@ static int ocfs2_skip_dir_trailer(struct inode *dir, | |||
127 | { | 141 | { |
128 | unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer); | 142 | unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer); |
129 | 143 | ||
130 | if (!ocfs2_dir_has_trailer(dir)) | 144 | if (!ocfs2_supports_dir_trailer(dir)) |
131 | return 0; | 145 | return 0; |
132 | 146 | ||
133 | if (offset != toff) | 147 | if (offset != toff) |
@@ -137,7 +151,7 @@ static int ocfs2_skip_dir_trailer(struct inode *dir, | |||
137 | } | 151 | } |
138 | 152 | ||
139 | static void ocfs2_init_dir_trailer(struct inode *inode, | 153 | static void ocfs2_init_dir_trailer(struct inode *inode, |
140 | struct buffer_head *bh) | 154 | struct buffer_head *bh, u16 rec_len) |
141 | { | 155 | { |
142 | struct ocfs2_dir_block_trailer *trailer; | 156 | struct ocfs2_dir_block_trailer *trailer; |
143 | 157 | ||
@@ -147,6 +161,42 @@ static void ocfs2_init_dir_trailer(struct inode *inode, | |||
147 | cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer)); | 161 | cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer)); |
148 | trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno); | 162 | trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno); |
149 | trailer->db_blkno = cpu_to_le64(bh->b_blocknr); | 163 | trailer->db_blkno = cpu_to_le64(bh->b_blocknr); |
164 | trailer->db_free_rec_len = cpu_to_le16(rec_len); | ||
165 | } | ||
166 | /* | ||
167 | * Link an unindexed block with a dir trailer structure into the index free | ||
168 | * list. This function will modify dirdata_bh, but assumes you've already | ||
169 | * passed it to the journal. | ||
170 | */ | ||
171 | static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle, | ||
172 | struct buffer_head *dx_root_bh, | ||
173 | struct buffer_head *dirdata_bh) | ||
174 | { | ||
175 | int ret; | ||
176 | struct ocfs2_dx_root_block *dx_root; | ||
177 | struct ocfs2_dir_block_trailer *trailer; | ||
178 | |||
179 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
180 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
181 | if (ret) { | ||
182 | mlog_errno(ret); | ||
183 | goto out; | ||
184 | } | ||
185 | trailer = ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb); | ||
186 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
187 | |||
188 | trailer->db_free_next = dx_root->dr_free_blk; | ||
189 | dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr); | ||
190 | |||
191 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
192 | |||
193 | out: | ||
194 | return ret; | ||
195 | } | ||
196 | |||
197 | static int ocfs2_free_list_at_root(struct ocfs2_dir_lookup_result *res) | ||
198 | { | ||
199 | return res->dl_prev_leaf_bh == NULL; | ||
150 | } | 200 | } |
151 | 201 | ||
152 | void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res) | 202 | void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res) |
@@ -154,6 +204,7 @@ void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res) | |||
154 | brelse(res->dl_dx_root_bh); | 204 | brelse(res->dl_dx_root_bh); |
155 | brelse(res->dl_leaf_bh); | 205 | brelse(res->dl_leaf_bh); |
156 | brelse(res->dl_dx_leaf_bh); | 206 | brelse(res->dl_dx_leaf_bh); |
207 | brelse(res->dl_prev_leaf_bh); | ||
157 | } | 208 | } |
158 | 209 | ||
159 | static int ocfs2_dir_indexed(struct inode *inode) | 210 | static int ocfs2_dir_indexed(struct inode *inode) |
@@ -484,7 +535,7 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, | |||
484 | } | 535 | } |
485 | 536 | ||
486 | if (!(flags & OCFS2_BH_READAHEAD) && | 537 | if (!(flags & OCFS2_BH_READAHEAD) && |
487 | ocfs2_dir_has_trailer(inode)) { | 538 | ocfs2_supports_dir_trailer(inode)) { |
488 | rc = ocfs2_check_dir_trailer(inode, tmp); | 539 | rc = ocfs2_check_dir_trailer(inode, tmp); |
489 | if (rc) { | 540 | if (rc) { |
490 | if (!*bh) | 541 | if (!*bh) |
@@ -1150,6 +1201,47 @@ bail: | |||
1150 | return status; | 1201 | return status; |
1151 | } | 1202 | } |
1152 | 1203 | ||
1204 | static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de) | ||
1205 | { | ||
1206 | unsigned int hole; | ||
1207 | |||
1208 | if (le64_to_cpu(de->inode) == 0) | ||
1209 | hole = le16_to_cpu(de->rec_len); | ||
1210 | else | ||
1211 | hole = le16_to_cpu(de->rec_len) - | ||
1212 | OCFS2_DIR_REC_LEN(de->name_len); | ||
1213 | |||
1214 | return hole; | ||
1215 | } | ||
1216 | |||
1217 | static int ocfs2_find_max_rec_len(struct super_block *sb, | ||
1218 | struct buffer_head *dirblock_bh) | ||
1219 | { | ||
1220 | int size, this_hole, largest_hole = 0; | ||
1221 | char *trailer, *de_buf, *limit, *start = dirblock_bh->b_data; | ||
1222 | struct ocfs2_dir_entry *de; | ||
1223 | |||
1224 | trailer = (char *)ocfs2_trailer_from_bh(dirblock_bh, sb); | ||
1225 | size = ocfs2_dir_trailer_blk_off(sb); | ||
1226 | limit = start + size; | ||
1227 | de_buf = start; | ||
1228 | de = (struct ocfs2_dir_entry *)de_buf; | ||
1229 | do { | ||
1230 | if (de_buf != trailer) { | ||
1231 | this_hole = ocfs2_figure_dirent_hole(de); | ||
1232 | if (this_hole > largest_hole) | ||
1233 | largest_hole = this_hole; | ||
1234 | } | ||
1235 | |||
1236 | de_buf += le16_to_cpu(de->rec_len); | ||
1237 | de = (struct ocfs2_dir_entry *)de_buf; | ||
1238 | } while (de_buf < limit); | ||
1239 | |||
1240 | if (largest_hole >= OCFS2_DIR_MIN_REC_LEN) | ||
1241 | return largest_hole; | ||
1242 | return 0; | ||
1243 | } | ||
1244 | |||
1153 | static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list, | 1245 | static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list, |
1154 | int index) | 1246 | int index) |
1155 | { | 1247 | { |
@@ -1171,14 +1263,26 @@ clear: | |||
1171 | static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, | 1263 | static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, |
1172 | struct ocfs2_dir_lookup_result *lookup) | 1264 | struct ocfs2_dir_lookup_result *lookup) |
1173 | { | 1265 | { |
1174 | int ret, index; | 1266 | int ret, index, max_rec_len, add_to_free_list = 0; |
1175 | struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; | 1267 | struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; |
1176 | struct buffer_head *leaf_bh = lookup->dl_leaf_bh; | 1268 | struct buffer_head *leaf_bh = lookup->dl_leaf_bh; |
1177 | struct ocfs2_dx_leaf *dx_leaf; | 1269 | struct ocfs2_dx_leaf *dx_leaf; |
1178 | struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry; | 1270 | struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry; |
1271 | struct ocfs2_dir_block_trailer *trailer; | ||
1179 | struct ocfs2_dx_root_block *dx_root; | 1272 | struct ocfs2_dx_root_block *dx_root; |
1180 | struct ocfs2_dx_entry_list *entry_list; | 1273 | struct ocfs2_dx_entry_list *entry_list; |
1181 | 1274 | ||
1275 | /* | ||
1276 | * This function gets a bit messy because we might have to | ||
1277 | * modify the root block, regardless of whether the indexed | ||
1278 | * entries are stored inline. | ||
1279 | */ | ||
1280 | |||
1281 | /* | ||
1282 | * *Only* set 'entry_list' here, based on where we're looking | ||
1283 | * for the indexed entries. Later, we might still want to | ||
1284 | * journal both blocks, based on free list state. | ||
1285 | */ | ||
1182 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | 1286 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; |
1183 | if (ocfs2_dx_root_inline(dx_root)) { | 1287 | if (ocfs2_dx_root_inline(dx_root)) { |
1184 | entry_list = &dx_root->dr_entries; | 1288 | entry_list = &dx_root->dr_entries; |
@@ -1203,6 +1307,15 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, | |||
1203 | } | 1307 | } |
1204 | 1308 | ||
1205 | /* | 1309 | /* |
1310 | * We know that removal of this dirent will leave enough room | ||
1311 | * for a new one, so add this block to the free list if it | ||
1312 | * isn't already there. | ||
1313 | */ | ||
1314 | trailer = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb); | ||
1315 | if (trailer->db_free_rec_len == 0) | ||
1316 | add_to_free_list = 1; | ||
1317 | |||
1318 | /* | ||
1206 | * Add the block holding our index into the journal before | 1319 | * Add the block holding our index into the journal before |
1207 | * removing the unindexed entry. If we get an error return | 1320 | * removing the unindexed entry. If we get an error return |
1208 | * from __ocfs2_delete_entry(), then it hasn't removed the | 1321 | * from __ocfs2_delete_entry(), then it hasn't removed the |
@@ -1212,14 +1325,16 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, | |||
1212 | * We're also careful to journal the root tree block here if | 1325 | * We're also careful to journal the root tree block here if |
1213 | * we're going to be adding to the start of the free list. | 1326 | * we're going to be adding to the start of the free list. |
1214 | */ | 1327 | */ |
1215 | if (ocfs2_dx_root_inline(dx_root)) { | 1328 | if (add_to_free_list || ocfs2_dx_root_inline(dx_root)) { |
1216 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | 1329 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, |
1217 | OCFS2_JOURNAL_ACCESS_WRITE); | 1330 | OCFS2_JOURNAL_ACCESS_WRITE); |
1218 | if (ret) { | 1331 | if (ret) { |
1219 | mlog_errno(ret); | 1332 | mlog_errno(ret); |
1220 | goto out; | 1333 | goto out; |
1221 | } | 1334 | } |
1222 | } else { | 1335 | } |
1336 | |||
1337 | if (!ocfs2_dx_root_inline(dx_root)) { | ||
1223 | ret = ocfs2_journal_access_dl(handle, dir, | 1338 | ret = ocfs2_journal_access_dl(handle, dir, |
1224 | lookup->dl_dx_leaf_bh, | 1339 | lookup->dl_dx_leaf_bh, |
1225 | OCFS2_JOURNAL_ACCESS_WRITE); | 1340 | OCFS2_JOURNAL_ACCESS_WRITE); |
@@ -1239,6 +1354,17 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, | |||
1239 | goto out; | 1354 | goto out; |
1240 | } | 1355 | } |
1241 | 1356 | ||
1357 | max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, leaf_bh); | ||
1358 | trailer->db_free_rec_len = cpu_to_le16(max_rec_len); | ||
1359 | if (add_to_free_list) { | ||
1360 | trailer->db_free_next = dx_root->dr_free_blk; | ||
1361 | dx_root->dr_free_blk = cpu_to_le64(leaf_bh->b_blocknr); | ||
1362 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
1363 | } | ||
1364 | |||
1365 | /* leaf_bh was journal_accessed for us in __ocfs2_delete_entry */ | ||
1366 | ocfs2_journal_dirty(handle, leaf_bh); | ||
1367 | |||
1242 | ocfs2_dx_list_remove_entry(entry_list, index); | 1368 | ocfs2_dx_list_remove_entry(entry_list, index); |
1243 | 1369 | ||
1244 | if (ocfs2_dx_root_inline(dx_root)) | 1370 | if (ocfs2_dx_root_inline(dx_root)) |
@@ -1422,6 +1548,59 @@ static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle, | |||
1422 | lookup->dl_dx_leaf_bh); | 1548 | lookup->dl_dx_leaf_bh); |
1423 | } | 1549 | } |
1424 | 1550 | ||
1551 | static void ocfs2_remove_block_from_free_list(struct inode *dir, | ||
1552 | handle_t *handle, | ||
1553 | struct ocfs2_dir_lookup_result *lookup) | ||
1554 | { | ||
1555 | struct ocfs2_dir_block_trailer *trailer, *prev; | ||
1556 | struct ocfs2_dx_root_block *dx_root; | ||
1557 | struct buffer_head *bh; | ||
1558 | |||
1559 | trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb); | ||
1560 | |||
1561 | if (ocfs2_free_list_at_root(lookup)) { | ||
1562 | bh = lookup->dl_dx_root_bh; | ||
1563 | dx_root = (struct ocfs2_dx_root_block *)bh->b_data; | ||
1564 | dx_root->dr_free_blk = trailer->db_free_next; | ||
1565 | } else { | ||
1566 | bh = lookup->dl_prev_leaf_bh; | ||
1567 | prev = ocfs2_trailer_from_bh(bh, dir->i_sb); | ||
1568 | prev->db_free_next = trailer->db_free_next; | ||
1569 | } | ||
1570 | |||
1571 | trailer->db_free_rec_len = cpu_to_le16(0); | ||
1572 | trailer->db_free_next = cpu_to_le64(0); | ||
1573 | |||
1574 | ocfs2_journal_dirty(handle, bh); | ||
1575 | ocfs2_journal_dirty(handle, lookup->dl_leaf_bh); | ||
1576 | } | ||
1577 | |||
1578 | /* | ||
1579 | * This expects that a journal write has been reserved on | ||
1580 | * lookup->dl_prev_leaf_bh or lookup->dl_dx_root_bh | ||
1581 | */ | ||
1582 | static void ocfs2_recalc_free_list(struct inode *dir, handle_t *handle, | ||
1583 | struct ocfs2_dir_lookup_result *lookup) | ||
1584 | { | ||
1585 | int max_rec_len; | ||
1586 | struct ocfs2_dir_block_trailer *trailer; | ||
1587 | |||
1588 | /* Walk dl_leaf_bh to figure out what the new free rec_len is. */ | ||
1589 | max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, lookup->dl_leaf_bh); | ||
1590 | if (max_rec_len) { | ||
1591 | /* | ||
1592 | * There's still room in this block, so no need to remove it | ||
1593 | * from the free list. In this case, we just want to update | ||
1594 | * the rec len accounting. | ||
1595 | */ | ||
1596 | trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb); | ||
1597 | trailer->db_free_rec_len = cpu_to_le16(max_rec_len); | ||
1598 | ocfs2_journal_dirty(handle, lookup->dl_leaf_bh); | ||
1599 | } else { | ||
1600 | ocfs2_remove_block_from_free_list(dir, handle, lookup); | ||
1601 | } | ||
1602 | } | ||
1603 | |||
1425 | /* we don't always have a dentry for what we want to add, so people | 1604 | /* we don't always have a dentry for what we want to add, so people |
1426 | * like orphan dir can call this instead. | 1605 | * like orphan dir can call this instead. |
1427 | * | 1606 | * |
@@ -1450,7 +1629,31 @@ int __ocfs2_add_entry(handle_t *handle, | |||
1450 | if (!namelen) | 1629 | if (!namelen) |
1451 | return -EINVAL; | 1630 | return -EINVAL; |
1452 | 1631 | ||
1453 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 1632 | if (ocfs2_dir_indexed(dir)) { |
1633 | struct buffer_head *bh; | ||
1634 | |||
1635 | /* | ||
1636 | * An indexed dir may require that we update the free space | ||
1637 | * list. Reserve a write to the previous node in the list so | ||
1638 | * that we don't fail later. | ||
1639 | * | ||
1640 | * XXX: This can be either a dx_root_block, or an unindexed | ||
1641 | * directory tree leaf block. | ||
1642 | */ | ||
1643 | if (ocfs2_free_list_at_root(lookup)) { | ||
1644 | bh = lookup->dl_dx_root_bh; | ||
1645 | retval = ocfs2_journal_access_dr(handle, dir, bh, | ||
1646 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1647 | } else { | ||
1648 | bh = lookup->dl_prev_leaf_bh; | ||
1649 | retval = ocfs2_journal_access_db(handle, dir, bh, | ||
1650 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1651 | } | ||
1652 | if (retval) { | ||
1653 | mlog_errno(retval); | ||
1654 | return retval; | ||
1655 | } | ||
1656 | } else if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
1454 | data_start = di->id2.i_data.id_data; | 1657 | data_start = di->id2.i_data.id_data; |
1455 | size = i_size_read(dir); | 1658 | size = i_size_read(dir); |
1456 | 1659 | ||
@@ -1533,6 +1736,9 @@ int __ocfs2_add_entry(handle_t *handle, | |||
1533 | de->name_len = namelen; | 1736 | de->name_len = namelen; |
1534 | memcpy(de->name, name, namelen); | 1737 | memcpy(de->name, name, namelen); |
1535 | 1738 | ||
1739 | if (ocfs2_dir_indexed(dir)) | ||
1740 | ocfs2_recalc_free_list(dir, handle, lookup); | ||
1741 | |||
1536 | dir->i_version++; | 1742 | dir->i_version++; |
1537 | status = ocfs2_journal_dirty(handle, insert_bh); | 1743 | status = ocfs2_journal_dirty(handle, insert_bh); |
1538 | retval = 0; | 1744 | retval = 0; |
@@ -2056,7 +2262,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
2056 | 2262 | ||
2057 | mlog_entry_void(); | 2263 | mlog_entry_void(); |
2058 | 2264 | ||
2059 | if (ocfs2_supports_dir_trailer(osb)) | 2265 | if (ocfs2_new_dir_wants_trailer(inode)) |
2060 | size = ocfs2_dir_trailer_blk_off(parent->i_sb); | 2266 | size = ocfs2_dir_trailer_blk_off(parent->i_sb); |
2061 | 2267 | ||
2062 | status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, | 2268 | status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, |
@@ -2077,8 +2283,19 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
2077 | memset(new_bh->b_data, 0, osb->sb->s_blocksize); | 2283 | memset(new_bh->b_data, 0, osb->sb->s_blocksize); |
2078 | 2284 | ||
2079 | de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size); | 2285 | de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size); |
2080 | if (ocfs2_supports_dir_trailer(osb)) | 2286 | if (ocfs2_new_dir_wants_trailer(inode)) { |
2081 | ocfs2_init_dir_trailer(inode, new_bh); | 2287 | int size = le16_to_cpu(de->rec_len); |
2288 | |||
2289 | /* | ||
2290 | * Figure out the size of the hole left over after | ||
2291 | * insertion of '.' and '..'. The trailer wants this | ||
2292 | * information. | ||
2293 | */ | ||
2294 | size -= OCFS2_DIR_REC_LEN(2); | ||
2295 | size -= sizeof(struct ocfs2_dir_block_trailer); | ||
2296 | |||
2297 | ocfs2_init_dir_trailer(inode, new_bh, size); | ||
2298 | } | ||
2082 | 2299 | ||
2083 | status = ocfs2_journal_dirty(handle, new_bh); | 2300 | status = ocfs2_journal_dirty(handle, new_bh); |
2084 | if (status < 0) { | 2301 | if (status < 0) { |
@@ -2110,6 +2327,7 @@ bail: | |||
2110 | static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, | 2327 | static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, |
2111 | handle_t *handle, struct inode *dir, | 2328 | handle_t *handle, struct inode *dir, |
2112 | struct buffer_head *di_bh, | 2329 | struct buffer_head *di_bh, |
2330 | struct buffer_head *dirdata_bh, | ||
2113 | struct ocfs2_alloc_context *meta_ac, | 2331 | struct ocfs2_alloc_context *meta_ac, |
2114 | int dx_inline, | 2332 | int dx_inline, |
2115 | struct buffer_head **ret_dx_root_bh) | 2333 | struct buffer_head **ret_dx_root_bh) |
@@ -2121,6 +2339,8 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, | |||
2121 | unsigned int num_bits; | 2339 | unsigned int num_bits; |
2122 | struct buffer_head *dx_root_bh = NULL; | 2340 | struct buffer_head *dx_root_bh = NULL; |
2123 | struct ocfs2_dx_root_block *dx_root; | 2341 | struct ocfs2_dx_root_block *dx_root; |
2342 | struct ocfs2_dir_block_trailer *trailer = | ||
2343 | ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb); | ||
2124 | 2344 | ||
2125 | ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit, | 2345 | ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit, |
2126 | &num_bits, &dr_blkno); | 2346 | &num_bits, &dr_blkno); |
@@ -2155,6 +2375,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, | |||
2155 | dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); | 2375 | dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); |
2156 | dx_root->dr_blkno = cpu_to_le64(dr_blkno); | 2376 | dx_root->dr_blkno = cpu_to_le64(dr_blkno); |
2157 | dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno); | 2377 | dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno); |
2378 | if (le16_to_cpu(trailer->db_free_rec_len)) | ||
2379 | dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr); | ||
2380 | else | ||
2381 | dx_root->dr_free_blk = cpu_to_le64(0); | ||
2158 | 2382 | ||
2159 | if (dx_inline) { | 2383 | if (dx_inline) { |
2160 | dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE; | 2384 | dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE; |
@@ -2361,7 +2585,7 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb, | |||
2361 | goto out; | 2585 | goto out; |
2362 | } | 2586 | } |
2363 | 2587 | ||
2364 | ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, | 2588 | ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, leaf_bh, |
2365 | meta_ac, 1, &dx_root_bh); | 2589 | meta_ac, 1, &dx_root_bh); |
2366 | if (ret) { | 2590 | if (ret) { |
2367 | mlog_errno(ret); | 2591 | mlog_errno(ret); |
@@ -2371,6 +2595,7 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb, | |||
2371 | entry_list = &dx_root->dr_entries; | 2595 | entry_list = &dx_root->dr_entries; |
2372 | 2596 | ||
2373 | /* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */ | 2597 | /* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */ |
2598 | ocfs2_dx_dir_name_hash(inode, ".", 1, &hinfo); | ||
2374 | ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr); | 2599 | ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr); |
2375 | 2600 | ||
2376 | ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo); | 2601 | ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo); |
@@ -2446,7 +2671,8 @@ inc: | |||
2446 | out: | 2671 | out: |
2447 | return ret; | 2672 | return ret; |
2448 | } | 2673 | } |
2449 | /* | 2674 | |
2675 | /* | ||
2450 | * XXX: This expects dx_root_bh to already be part of the transaction. | 2676 | * XXX: This expects dx_root_bh to already be part of the transaction. |
2451 | */ | 2677 | */ |
2452 | static void ocfs2_dx_dir_index_root_block(struct inode *dir, | 2678 | static void ocfs2_dx_dir_index_root_block(struct inode *dir, |
@@ -2521,18 +2747,26 @@ static int ocfs2_new_dx_should_be_inline(struct inode *dir, | |||
2521 | * expansion from an inline directory to one with extents. The first dir block | 2747 | * expansion from an inline directory to one with extents. The first dir block |
2522 | * in that case is taken from the inline data portion of the inode block. | 2748 | * in that case is taken from the inline data portion of the inode block. |
2523 | * | 2749 | * |
2750 | * This will also return the largest amount of contiguous space for a dirent | ||
2751 | * in the block. That value is *not* necessarily the last dirent, even after | ||
2752 | * expansion. The directory indexing code wants this value for free space | ||
2753 | * accounting. We do this here since we're already walking the entire dir | ||
2754 | * block. | ||
2755 | * | ||
2524 | * We add the dir trailer if this filesystem wants it. | 2756 | * We add the dir trailer if this filesystem wants it. |
2525 | */ | 2757 | */ |
2526 | static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, | 2758 | static unsigned int ocfs2_expand_last_dirent(char *start, unsigned int old_size, |
2527 | struct super_block *sb) | 2759 | struct inode *dir) |
2528 | { | 2760 | { |
2761 | struct super_block *sb = dir->i_sb; | ||
2529 | struct ocfs2_dir_entry *de; | 2762 | struct ocfs2_dir_entry *de; |
2530 | struct ocfs2_dir_entry *prev_de; | 2763 | struct ocfs2_dir_entry *prev_de; |
2531 | char *de_buf, *limit; | 2764 | char *de_buf, *limit; |
2532 | unsigned int new_size = sb->s_blocksize; | 2765 | unsigned int new_size = sb->s_blocksize; |
2533 | unsigned int bytes; | 2766 | unsigned int bytes, this_hole; |
2767 | unsigned int largest_hole = 0; | ||
2534 | 2768 | ||
2535 | if (ocfs2_supports_dir_trailer(OCFS2_SB(sb))) | 2769 | if (ocfs2_new_dir_wants_trailer(dir)) |
2536 | new_size = ocfs2_dir_trailer_blk_off(sb); | 2770 | new_size = ocfs2_dir_trailer_blk_off(sb); |
2537 | 2771 | ||
2538 | bytes = new_size - old_size; | 2772 | bytes = new_size - old_size; |
@@ -2541,12 +2775,26 @@ static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, | |||
2541 | de_buf = start; | 2775 | de_buf = start; |
2542 | de = (struct ocfs2_dir_entry *)de_buf; | 2776 | de = (struct ocfs2_dir_entry *)de_buf; |
2543 | do { | 2777 | do { |
2778 | this_hole = ocfs2_figure_dirent_hole(de); | ||
2779 | if (this_hole > largest_hole) | ||
2780 | largest_hole = this_hole; | ||
2781 | |||
2544 | prev_de = de; | 2782 | prev_de = de; |
2545 | de_buf += le16_to_cpu(de->rec_len); | 2783 | de_buf += le16_to_cpu(de->rec_len); |
2546 | de = (struct ocfs2_dir_entry *)de_buf; | 2784 | de = (struct ocfs2_dir_entry *)de_buf; |
2547 | } while (de_buf < limit); | 2785 | } while (de_buf < limit); |
2548 | 2786 | ||
2549 | le16_add_cpu(&prev_de->rec_len, bytes); | 2787 | le16_add_cpu(&prev_de->rec_len, bytes); |
2788 | |||
2789 | /* We need to double check this after modification of the final | ||
2790 | * dirent. */ | ||
2791 | this_hole = ocfs2_figure_dirent_hole(prev_de); | ||
2792 | if (this_hole > largest_hole) | ||
2793 | largest_hole = this_hole; | ||
2794 | |||
2795 | if (largest_hole >= OCFS2_DIR_MIN_REC_LEN) | ||
2796 | return largest_hole; | ||
2797 | return 0; | ||
2550 | } | 2798 | } |
2551 | 2799 | ||
2552 | /* | 2800 | /* |
@@ -2703,9 +2951,16 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
2703 | memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); | 2951 | memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); |
2704 | memset(dirdata_bh->b_data + i_size_read(dir), 0, | 2952 | memset(dirdata_bh->b_data + i_size_read(dir), 0, |
2705 | sb->s_blocksize - i_size_read(dir)); | 2953 | sb->s_blocksize - i_size_read(dir)); |
2706 | ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), sb); | 2954 | i = ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), dir); |
2707 | if (ocfs2_supports_dir_trailer(osb)) | 2955 | if (ocfs2_new_dir_wants_trailer(dir)) { |
2708 | ocfs2_init_dir_trailer(dir, dirdata_bh); | 2956 | /* |
2957 | * Prepare the dir trailer up front. It will otherwise look | ||
2958 | * like a valid dirent. Even if inserting the index fails | ||
2959 | * (unlikely), then all we'll have done is given first dir | ||
2960 | * block a small amount of fragmentation. | ||
2961 | */ | ||
2962 | ocfs2_init_dir_trailer(dir, dirdata_bh, i); | ||
2963 | } | ||
2709 | 2964 | ||
2710 | ret = ocfs2_journal_dirty(handle, dirdata_bh); | 2965 | ret = ocfs2_journal_dirty(handle, dirdata_bh); |
2711 | if (ret) { | 2966 | if (ret) { |
@@ -2781,7 +3036,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
2781 | 3036 | ||
2782 | if (ocfs2_supports_indexed_dirs(osb)) { | 3037 | if (ocfs2_supports_indexed_dirs(osb)) { |
2783 | ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh, | 3038 | ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh, |
2784 | meta_ac, dx_inline, | 3039 | dirdata_bh, meta_ac, dx_inline, |
2785 | &dx_root_bh); | 3040 | &dx_root_bh); |
2786 | if (ret) { | 3041 | if (ret) { |
2787 | mlog_errno(ret); | 3042 | mlog_errno(ret); |
@@ -2933,6 +3188,8 @@ bail: | |||
2933 | * is to be turned into an extent based one. The size of the dirent to | 3188 | * is to be turned into an extent based one. The size of the dirent to |
2934 | * insert might be larger than the space gained by growing to just one | 3189 | * insert might be larger than the space gained by growing to just one |
2935 | * block, so we may have to grow the inode by two blocks in that case. | 3190 | * block, so we may have to grow the inode by two blocks in that case. |
3191 | * | ||
3192 | * If the directory is already indexed, dx_root_bh must be provided. | ||
2936 | */ | 3193 | */ |
2937 | static int ocfs2_extend_dir(struct ocfs2_super *osb, | 3194 | static int ocfs2_extend_dir(struct ocfs2_super *osb, |
2938 | struct inode *dir, | 3195 | struct inode *dir, |
@@ -2953,10 +3210,17 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
2953 | struct ocfs2_dir_entry * de; | 3210 | struct ocfs2_dir_entry * de; |
2954 | struct super_block *sb = osb->sb; | 3211 | struct super_block *sb = osb->sb; |
2955 | struct ocfs2_extent_tree et; | 3212 | struct ocfs2_extent_tree et; |
3213 | struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; | ||
2956 | 3214 | ||
2957 | mlog_entry_void(); | 3215 | mlog_entry_void(); |
2958 | 3216 | ||
2959 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 3217 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
3218 | /* | ||
3219 | * This would be a code error as an inline directory should | ||
3220 | * never have an index root. | ||
3221 | */ | ||
3222 | BUG_ON(dx_root_bh); | ||
3223 | |||
2960 | status = ocfs2_expand_inline_dir(dir, parent_fe_bh, | 3224 | status = ocfs2_expand_inline_dir(dir, parent_fe_bh, |
2961 | blocks_wanted, lookup, | 3225 | blocks_wanted, lookup, |
2962 | &new_bh); | 3226 | &new_bh); |
@@ -2965,6 +3229,10 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
2965 | goto bail; | 3229 | goto bail; |
2966 | } | 3230 | } |
2967 | 3231 | ||
3232 | /* Expansion from inline to an indexed directory will | ||
3233 | * have given us this. */ | ||
3234 | dx_root_bh = lookup->dl_dx_root_bh; | ||
3235 | |||
2968 | if (blocks_wanted == 1) { | 3236 | if (blocks_wanted == 1) { |
2969 | /* | 3237 | /* |
2970 | * If the new dirent will fit inside the space | 3238 | * If the new dirent will fit inside the space |
@@ -3028,6 +3296,10 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
3028 | } | 3296 | } |
3029 | 3297 | ||
3030 | do_extend: | 3298 | do_extend: |
3299 | if (ocfs2_dir_indexed(dir)) | ||
3300 | credits++; /* For attaching the new dirent block to the | ||
3301 | * dx_root */ | ||
3302 | |||
3031 | down_write(&OCFS2_I(dir)->ip_alloc_sem); | 3303 | down_write(&OCFS2_I(dir)->ip_alloc_sem); |
3032 | drop_alloc_sem = 1; | 3304 | drop_alloc_sem = 1; |
3033 | 3305 | ||
@@ -3058,9 +3330,19 @@ do_extend: | |||
3058 | 3330 | ||
3059 | de = (struct ocfs2_dir_entry *) new_bh->b_data; | 3331 | de = (struct ocfs2_dir_entry *) new_bh->b_data; |
3060 | de->inode = 0; | 3332 | de->inode = 0; |
3061 | if (ocfs2_dir_has_trailer(dir)) { | 3333 | if (ocfs2_supports_dir_trailer(dir)) { |
3062 | de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb)); | 3334 | de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb)); |
3063 | ocfs2_init_dir_trailer(dir, new_bh); | 3335 | |
3336 | ocfs2_init_dir_trailer(dir, new_bh, le16_to_cpu(de->rec_len)); | ||
3337 | |||
3338 | if (ocfs2_dir_indexed(dir)) { | ||
3339 | status = ocfs2_dx_dir_link_trailer(dir, handle, | ||
3340 | dx_root_bh, new_bh); | ||
3341 | if (status) { | ||
3342 | mlog_errno(status); | ||
3343 | goto bail; | ||
3344 | } | ||
3345 | } | ||
3064 | } else { | 3346 | } else { |
3065 | de->rec_len = cpu_to_le16(sb->s_blocksize); | 3347 | de->rec_len = cpu_to_le16(sb->s_blocksize); |
3066 | } | 3348 | } |
@@ -3116,7 +3398,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, | |||
3116 | * This calculates how many free bytes we'd have in block zero, should | 3398 | * This calculates how many free bytes we'd have in block zero, should |
3117 | * this function force expansion to an extent tree. | 3399 | * this function force expansion to an extent tree. |
3118 | */ | 3400 | */ |
3119 | if (ocfs2_supports_dir_trailer(OCFS2_SB(sb))) | 3401 | if (ocfs2_new_dir_wants_trailer(dir)) |
3120 | free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir); | 3402 | free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir); |
3121 | else | 3403 | else |
3122 | free_space = dir->i_sb->s_blocksize - i_size_read(dir); | 3404 | free_space = dir->i_sb->s_blocksize - i_size_read(dir); |
@@ -3647,6 +3929,127 @@ out: | |||
3647 | return ret; | 3929 | return ret; |
3648 | } | 3930 | } |
3649 | 3931 | ||
3932 | static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir, | ||
3933 | struct buffer_head *di_bh, | ||
3934 | struct buffer_head *dx_root_bh, | ||
3935 | const char *name, int namelen, | ||
3936 | struct ocfs2_dir_lookup_result *lookup) | ||
3937 | { | ||
3938 | int ret, rebalanced = 0; | ||
3939 | struct ocfs2_dx_root_block *dx_root; | ||
3940 | struct buffer_head *dx_leaf_bh = NULL; | ||
3941 | struct ocfs2_dx_leaf *dx_leaf; | ||
3942 | u64 blkno; | ||
3943 | u32 leaf_cpos; | ||
3944 | |||
3945 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
3946 | |||
3947 | restart_search: | ||
3948 | ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo, | ||
3949 | &leaf_cpos, &blkno); | ||
3950 | if (ret) { | ||
3951 | mlog_errno(ret); | ||
3952 | goto out; | ||
3953 | } | ||
3954 | |||
3955 | ret = ocfs2_read_dx_leaf(dir, blkno, &dx_leaf_bh); | ||
3956 | if (ret) { | ||
3957 | mlog_errno(ret); | ||
3958 | goto out; | ||
3959 | } | ||
3960 | |||
3961 | dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; | ||
3962 | |||
3963 | if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >= | ||
3964 | le16_to_cpu(dx_leaf->dl_list.de_count)) { | ||
3965 | if (rebalanced) { | ||
3966 | /* | ||
3967 | * Rebalancing should have provided us with | ||
3968 | * space in an appropriate leaf. | ||
3969 | * | ||
3970 | * XXX: Is this an abnormal condition then? | ||
3971 | * Should we print a message here? | ||
3972 | */ | ||
3973 | ret = -ENOSPC; | ||
3974 | goto out; | ||
3975 | } | ||
3976 | |||
3977 | ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh, | ||
3978 | &lookup->dl_hinfo, leaf_cpos, | ||
3979 | blkno); | ||
3980 | if (ret) { | ||
3981 | if (ret != -ENOSPC) | ||
3982 | mlog_errno(ret); | ||
3983 | goto out; | ||
3984 | } | ||
3985 | |||
3986 | /* | ||
3987 | * Restart the lookup. The rebalance might have | ||
3988 | * changed which block our item fits into. Mark our | ||
3989 | * progress, so we only execute this once. | ||
3990 | */ | ||
3991 | brelse(dx_leaf_bh); | ||
3992 | dx_leaf_bh = NULL; | ||
3993 | rebalanced = 1; | ||
3994 | goto restart_search; | ||
3995 | } | ||
3996 | |||
3997 | lookup->dl_dx_leaf_bh = dx_leaf_bh; | ||
3998 | dx_leaf_bh = NULL; | ||
3999 | |||
4000 | out: | ||
4001 | brelse(dx_leaf_bh); | ||
4002 | return ret; | ||
4003 | } | ||
4004 | |||
4005 | static int ocfs2_search_dx_free_list(struct inode *dir, | ||
4006 | struct buffer_head *dx_root_bh, | ||
4007 | int namelen, | ||
4008 | struct ocfs2_dir_lookup_result *lookup) | ||
4009 | { | ||
4010 | int ret = -ENOSPC; | ||
4011 | struct buffer_head *leaf_bh = NULL, *prev_leaf_bh = NULL; | ||
4012 | struct ocfs2_dir_block_trailer *db; | ||
4013 | u64 next_block; | ||
4014 | int rec_len = OCFS2_DIR_REC_LEN(namelen); | ||
4015 | struct ocfs2_dx_root_block *dx_root; | ||
4016 | |||
4017 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
4018 | next_block = le64_to_cpu(dx_root->dr_free_blk); | ||
4019 | |||
4020 | while (next_block) { | ||
4021 | brelse(prev_leaf_bh); | ||
4022 | prev_leaf_bh = leaf_bh; | ||
4023 | leaf_bh = NULL; | ||
4024 | |||
4025 | ret = ocfs2_read_dir_block_direct(dir, next_block, &leaf_bh); | ||
4026 | if (ret) { | ||
4027 | mlog_errno(ret); | ||
4028 | goto out; | ||
4029 | } | ||
4030 | |||
4031 | db = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb); | ||
4032 | if (rec_len <= le16_to_cpu(db->db_free_rec_len)) { | ||
4033 | lookup->dl_leaf_bh = leaf_bh; | ||
4034 | lookup->dl_prev_leaf_bh = prev_leaf_bh; | ||
4035 | leaf_bh = NULL; | ||
4036 | prev_leaf_bh = NULL; | ||
4037 | break; | ||
4038 | } | ||
4039 | |||
4040 | next_block = le64_to_cpu(db->db_free_next); | ||
4041 | } | ||
4042 | |||
4043 | if (!next_block) | ||
4044 | ret = -ENOSPC; | ||
4045 | |||
4046 | out: | ||
4047 | |||
4048 | brelse(leaf_bh); | ||
4049 | brelse(prev_leaf_bh); | ||
4050 | return ret; | ||
4051 | } | ||
4052 | |||
3650 | static int ocfs2_expand_inline_dx_root(struct inode *dir, | 4053 | static int ocfs2_expand_inline_dx_root(struct inode *dir, |
3651 | struct buffer_head *dx_root_bh) | 4054 | struct buffer_head *dx_root_bh) |
3652 | { | 4055 | { |
@@ -3779,19 +4182,18 @@ static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh) | |||
3779 | return 0; | 4182 | return 0; |
3780 | } | 4183 | } |
3781 | 4184 | ||
3782 | static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir, | 4185 | static int ocfs2_prepare_dx_dir_for_insert(struct inode *dir, |
3783 | struct buffer_head *di_bh, const char *name, | 4186 | struct buffer_head *di_bh, |
3784 | int namelen, | 4187 | const char *name, |
3785 | struct ocfs2_dir_lookup_result *lookup) | 4188 | int namelen, |
4189 | struct ocfs2_dir_lookup_result *lookup) | ||
3786 | { | 4190 | { |
3787 | int ret, rebalanced = 0; | 4191 | int ret, free_dx_root = 1; |
4192 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
3788 | struct buffer_head *dx_root_bh = NULL; | 4193 | struct buffer_head *dx_root_bh = NULL; |
3789 | struct ocfs2_dx_root_block *dx_root; | 4194 | struct buffer_head *leaf_bh = NULL; |
3790 | struct buffer_head *dx_leaf_bh = NULL; | ||
3791 | struct ocfs2_dx_leaf *dx_leaf; | ||
3792 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 4195 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
3793 | u64 blkno; | 4196 | struct ocfs2_dx_root_block *dx_root; |
3794 | u32 leaf_cpos; | ||
3795 | 4197 | ||
3796 | ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); | 4198 | ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); |
3797 | if (ret) { | 4199 | if (ret) { |
@@ -3818,65 +4220,55 @@ static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir, | |||
3818 | } | 4220 | } |
3819 | } | 4221 | } |
3820 | 4222 | ||
3821 | restart_search: | 4223 | /* |
3822 | ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo, | 4224 | * Insert preparation for an indexed directory is split into two |
3823 | &leaf_cpos, &blkno); | 4225 | * steps. The call to find_dir_space_dx reserves room in the index for |
4226 | * an additional item. If we run out of space there, it's a real error | ||
4227 | * we can't continue on. | ||
4228 | */ | ||
4229 | ret = ocfs2_find_dir_space_dx(osb, dir, di_bh, dx_root_bh, name, | ||
4230 | namelen, lookup); | ||
3824 | if (ret) { | 4231 | if (ret) { |
3825 | mlog_errno(ret); | 4232 | mlog_errno(ret); |
3826 | goto out; | 4233 | goto out; |
3827 | } | 4234 | } |
3828 | 4235 | ||
3829 | ret = ocfs2_read_dx_leaf(dir, blkno, &dx_leaf_bh); | 4236 | search_el: |
3830 | if (ret) { | 4237 | /* |
4238 | * Next, we need to find space in the unindexed tree. This call | ||
4239 | * searches using the free space linked list. If the unindexed tree | ||
4240 | * lacks sufficient space, we'll expand it below. The expansion code | ||
4241 | * is smart enough to add any new blocks to the free space list. | ||
4242 | */ | ||
4243 | ret = ocfs2_search_dx_free_list(dir, dx_root_bh, namelen, lookup); | ||
4244 | if (ret && ret != -ENOSPC) { | ||
3831 | mlog_errno(ret); | 4245 | mlog_errno(ret); |
3832 | goto out; | 4246 | goto out; |
3833 | } | 4247 | } |
3834 | 4248 | ||
3835 | dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; | 4249 | /* Do this up here - ocfs2_extend_dir might need the dx_root */ |
4250 | lookup->dl_dx_root_bh = dx_root_bh; | ||
4251 | free_dx_root = 0; | ||
3836 | 4252 | ||
3837 | if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >= | 4253 | if (ret == -ENOSPC) { |
3838 | le16_to_cpu(dx_leaf->dl_list.de_count)) { | 4254 | ret = ocfs2_extend_dir(osb, dir, di_bh, 1, lookup, &leaf_bh); |
3839 | if (rebalanced) { | ||
3840 | /* | ||
3841 | * Rebalancing should have provided us with | ||
3842 | * space in an appropriate leaf. | ||
3843 | * | ||
3844 | * XXX: Is this an abnormal condition then? | ||
3845 | * Should we print a message here? | ||
3846 | */ | ||
3847 | ret = -ENOSPC; | ||
3848 | goto out; | ||
3849 | } | ||
3850 | 4255 | ||
3851 | ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh, | ||
3852 | &lookup->dl_hinfo, leaf_cpos, | ||
3853 | blkno); | ||
3854 | if (ret) { | 4256 | if (ret) { |
3855 | if (ret != -ENOSPC) | 4257 | mlog_errno(ret); |
3856 | mlog_errno(ret); | ||
3857 | goto out; | 4258 | goto out; |
3858 | } | 4259 | } |
3859 | 4260 | ||
3860 | /* | 4261 | /* |
3861 | * Restart the lookup. The rebalance might have | 4262 | * We make the assumption here that new leaf blocks are added |
3862 | * changed which block our item fits into. Mark our | 4263 | * to the front of our free list. |
3863 | * progress, so we only execute this once. | ||
3864 | */ | 4264 | */ |
3865 | brelse(dx_leaf_bh); | 4265 | lookup->dl_prev_leaf_bh = NULL; |
3866 | dx_leaf_bh = NULL; | 4266 | lookup->dl_leaf_bh = leaf_bh; |
3867 | rebalanced = 1; | ||
3868 | goto restart_search; | ||
3869 | } | 4267 | } |
3870 | 4268 | ||
3871 | search_el: | ||
3872 | lookup->dl_dx_leaf_bh = dx_leaf_bh; | ||
3873 | dx_leaf_bh = NULL; | ||
3874 | lookup->dl_dx_root_bh = dx_root_bh; | ||
3875 | dx_root_bh = NULL; | ||
3876 | |||
3877 | out: | 4269 | out: |
3878 | brelse(dx_leaf_bh); | 4270 | if (free_dx_root) |
3879 | brelse(dx_root_bh); | 4271 | brelse(dx_root_bh); |
3880 | return ret; | 4272 | return ret; |
3881 | } | 4273 | } |
3882 | 4274 | ||
@@ -3921,17 +4313,11 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
3921 | ocfs2_dx_dir_name_hash(dir, name, namelen, &lookup->dl_hinfo); | 4313 | ocfs2_dx_dir_name_hash(dir, name, namelen, &lookup->dl_hinfo); |
3922 | 4314 | ||
3923 | if (ocfs2_dir_indexed(dir)) { | 4315 | if (ocfs2_dir_indexed(dir)) { |
3924 | ret = ocfs2_find_dir_space_dx(osb, dir, parent_fe_bh, name, | 4316 | ret = ocfs2_prepare_dx_dir_for_insert(dir, parent_fe_bh, |
3925 | namelen, lookup); | 4317 | name, namelen, lookup); |
3926 | if (ret) { | 4318 | if (ret) |
3927 | mlog_errno(ret); | 4319 | mlog_errno(ret); |
3928 | goto out; | 4320 | goto out; |
3929 | } | ||
3930 | |||
3931 | /* | ||
3932 | * We intentionally fall through so that the unindexed | ||
3933 | * tree can also be prepared. | ||
3934 | */ | ||
3935 | } | 4321 | } |
3936 | 4322 | ||
3937 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 4323 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h index 07b0416cdd42..e683f3deb645 100644 --- a/fs/ocfs2/dir.h +++ b/fs/ocfs2/dir.h | |||
@@ -39,11 +39,19 @@ struct ocfs2_dir_lookup_result { | |||
39 | 39 | ||
40 | struct buffer_head *dl_dx_root_bh; /* Root of indexed | 40 | struct buffer_head *dl_dx_root_bh; /* Root of indexed |
41 | * tree */ | 41 | * tree */ |
42 | |||
42 | struct buffer_head *dl_dx_leaf_bh; /* Indexed leaf block */ | 43 | struct buffer_head *dl_dx_leaf_bh; /* Indexed leaf block */ |
43 | struct ocfs2_dx_entry *dl_dx_entry; /* Target dx_entry in | 44 | struct ocfs2_dx_entry *dl_dx_entry; /* Target dx_entry in |
44 | * indexed leaf */ | 45 | * indexed leaf */ |
45 | struct ocfs2_dx_hinfo dl_hinfo; /* Name hash results */ | 46 | struct ocfs2_dx_hinfo dl_hinfo; /* Name hash results */ |
47 | |||
48 | struct buffer_head *dl_prev_leaf_bh;/* Previous entry in | ||
49 | * dir free space | ||
50 | * list. NULL if | ||
51 | * previous entry is | ||
52 | * dx root block. */ | ||
46 | }; | 53 | }; |
54 | |||
47 | void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res); | 55 | void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res); |
48 | 56 | ||
49 | int ocfs2_find_entry(const char *name, int namelen, | 57 | int ocfs2_find_entry(const char *name, int namelen, |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 5585dde91344..582e27e57f34 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -385,8 +385,8 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb) | |||
385 | } | 385 | } |
386 | 386 | ||
387 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + | 387 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + |
388 | * bitmap block for the new bit) */ | 388 | * bitmap block for the new bit) dx_root update for free list */ |
389 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) | 389 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1) |
390 | 390 | ||
391 | static inline int ocfs2_add_dir_index_credits(struct super_block *sb) | 391 | static inline int ocfs2_add_dir_index_credits(struct super_block *sb) |
392 | { | 392 | { |
@@ -420,19 +420,19 @@ static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir, | |||
420 | #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) | 420 | #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) |
421 | 421 | ||
422 | /* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota | 422 | /* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota |
423 | * update on dir + index leaf */ | 423 | * update on dir + index leaf + dx root update for free list */ |
424 | static inline int ocfs2_link_credits(struct super_block *sb) | 424 | static inline int ocfs2_link_credits(struct super_block *sb) |
425 | { | 425 | { |
426 | return 2*OCFS2_INODE_UPDATE_CREDITS + 2 + | 426 | return 2*OCFS2_INODE_UPDATE_CREDITS + 3 + |
427 | ocfs2_quota_trans_credits(sb); | 427 | ocfs2_quota_trans_credits(sb); |
428 | } | 428 | } |
429 | 429 | ||
430 | /* inode + dir inode (if we unlink a dir), + dir entry block + orphan | 430 | /* inode + dir inode (if we unlink a dir), + dir entry block + orphan |
431 | * dir inode link + dir inode index leaf */ | 431 | * dir inode link + dir inode index leaf + dir index root */ |
432 | static inline int ocfs2_unlink_credits(struct super_block *sb) | 432 | static inline int ocfs2_unlink_credits(struct super_block *sb) |
433 | { | 433 | { |
434 | /* The quota update from ocfs2_link_credits is unused here... */ | 434 | /* The quota update from ocfs2_link_credits is unused here... */ |
435 | return 2 * OCFS2_INODE_UPDATE_CREDITS + 2 + ocfs2_link_credits(sb); | 435 | return 2 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_link_credits(sb); |
436 | } | 436 | } |
437 | 437 | ||
438 | /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + | 438 | /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 1d1c54ea5bc4..d549b1799a61 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -416,6 +416,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { | |||
416 | #define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \ | 416 | #define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \ |
417 | OCFS2_DIR_ROUND) & \ | 417 | OCFS2_DIR_ROUND) & \ |
418 | ~OCFS2_DIR_ROUND) | 418 | ~OCFS2_DIR_ROUND) |
419 | #define OCFS2_DIR_MIN_REC_LEN OCFS2_DIR_REC_LEN(1) | ||
419 | 420 | ||
420 | #define OCFS2_LINK_MAX 32000 | 421 | #define OCFS2_LINK_MAX 32000 |
421 | 422 | ||
@@ -842,7 +843,9 @@ struct ocfs2_dx_root_block { | |||
842 | __le16 dr_reserved1; | 843 | __le16 dr_reserved1; |
843 | __le64 dr_dir_blkno; /* Pointer to parent inode */ | 844 | __le64 dr_dir_blkno; /* Pointer to parent inode */ |
844 | __le64 dr_reserved2; | 845 | __le64 dr_reserved2; |
845 | __le64 dr_reserved3[16]; | 846 | __le64 dr_free_blk; /* Pointer to head of free |
847 | * unindexed block list. */ | ||
848 | __le64 dr_reserved3[15]; | ||
846 | union { | 849 | union { |
847 | struct ocfs2_extent_list dr_list; /* Keep this aligned to 128 | 850 | struct ocfs2_extent_list dr_list; /* Keep this aligned to 128 |
848 | * bits for maximum space | 851 | * bits for maximum space |