diff options
Diffstat (limited to 'fs/ocfs2/dir.c')
-rw-r--r-- | fs/ocfs2/dir.c | 2806 |
1 files changed, 2677 insertions, 129 deletions
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f2c4098cf337..e71160cda110 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | #include <linux/quotaops.h> | 43 | #include <linux/quotaops.h> |
44 | #include <linux/sort.h> | ||
44 | 45 | ||
45 | #define MLOG_MASK_PREFIX ML_NAMEI | 46 | #define MLOG_MASK_PREFIX ML_NAMEI |
46 | #include <cluster/masklog.h> | 47 | #include <cluster/masklog.h> |
@@ -58,6 +59,7 @@ | |||
58 | #include "namei.h" | 59 | #include "namei.h" |
59 | #include "suballoc.h" | 60 | #include "suballoc.h" |
60 | #include "super.h" | 61 | #include "super.h" |
62 | #include "sysfile.h" | ||
61 | #include "uptodate.h" | 63 | #include "uptodate.h" |
62 | 64 | ||
63 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
@@ -71,11 +73,6 @@ static unsigned char ocfs2_filetype_table[] = { | |||
71 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 73 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
72 | }; | 74 | }; |
73 | 75 | ||
74 | static int ocfs2_extend_dir(struct ocfs2_super *osb, | ||
75 | struct inode *dir, | ||
76 | struct buffer_head *parent_fe_bh, | ||
77 | unsigned int blocks_wanted, | ||
78 | struct buffer_head **new_de_bh); | ||
79 | static int ocfs2_do_extend_dir(struct super_block *sb, | 76 | static int ocfs2_do_extend_dir(struct super_block *sb, |
80 | handle_t *handle, | 77 | handle_t *handle, |
81 | struct inode *dir, | 78 | struct inode *dir, |
@@ -83,22 +80,36 @@ static int ocfs2_do_extend_dir(struct super_block *sb, | |||
83 | struct ocfs2_alloc_context *data_ac, | 80 | struct ocfs2_alloc_context *data_ac, |
84 | struct ocfs2_alloc_context *meta_ac, | 81 | struct ocfs2_alloc_context *meta_ac, |
85 | struct buffer_head **new_bh); | 82 | struct buffer_head **new_bh); |
83 | static int ocfs2_dir_indexed(struct inode *inode); | ||
86 | 84 | ||
87 | /* | 85 | /* |
88 | * These are distinct checks because future versions of the file system will | 86 | * These are distinct checks because future versions of the file system will |
89 | * want to have a trailing dirent structure independent of indexing. | 87 | * want to have a trailing dirent structure independent of indexing. |
90 | */ | 88 | */ |
91 | static int ocfs2_dir_has_trailer(struct inode *dir) | 89 | static int ocfs2_supports_dir_trailer(struct inode *dir) |
92 | { | 90 | { |
91 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
92 | |||
93 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 93 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
94 | return 0; | 94 | return 0; |
95 | 95 | ||
96 | return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb)); | 96 | return ocfs2_meta_ecc(osb) || ocfs2_dir_indexed(dir); |
97 | } | 97 | } |
98 | 98 | ||
99 | static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb) | 99 | /* |
100 | * "new' here refers to the point at which we're creating a new | ||
101 | * directory via "mkdir()", but also when we're expanding an inline | ||
102 | * directory. In either case, we don't yet have the indexing bit set | ||
103 | * on the directory, so the standard checks will fail in when metaecc | ||
104 | * is turned off. Only directory-initialization type functions should | ||
105 | * use this then. Everything else wants ocfs2_supports_dir_trailer() | ||
106 | */ | ||
107 | static int ocfs2_new_dir_wants_trailer(struct inode *dir) | ||
100 | { | 108 | { |
101 | return ocfs2_meta_ecc(osb); | 109 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
110 | |||
111 | return ocfs2_meta_ecc(osb) || | ||
112 | ocfs2_supports_indexed_dirs(osb); | ||
102 | } | 113 | } |
103 | 114 | ||
104 | static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb) | 115 | static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb) |
@@ -130,7 +141,7 @@ static int ocfs2_skip_dir_trailer(struct inode *dir, | |||
130 | { | 141 | { |
131 | unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer); | 142 | unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer); |
132 | 143 | ||
133 | if (!ocfs2_dir_has_trailer(dir)) | 144 | if (!ocfs2_supports_dir_trailer(dir)) |
134 | return 0; | 145 | return 0; |
135 | 146 | ||
136 | if (offset != toff) | 147 | if (offset != toff) |
@@ -140,7 +151,7 @@ static int ocfs2_skip_dir_trailer(struct inode *dir, | |||
140 | } | 151 | } |
141 | 152 | ||
142 | static void ocfs2_init_dir_trailer(struct inode *inode, | 153 | static void ocfs2_init_dir_trailer(struct inode *inode, |
143 | struct buffer_head *bh) | 154 | struct buffer_head *bh, u16 rec_len) |
144 | { | 155 | { |
145 | struct ocfs2_dir_block_trailer *trailer; | 156 | struct ocfs2_dir_block_trailer *trailer; |
146 | 157 | ||
@@ -150,6 +161,153 @@ static void ocfs2_init_dir_trailer(struct inode *inode, | |||
150 | cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer)); | 161 | cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer)); |
151 | trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno); | 162 | trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno); |
152 | trailer->db_blkno = cpu_to_le64(bh->b_blocknr); | 163 | trailer->db_blkno = cpu_to_le64(bh->b_blocknr); |
164 | trailer->db_free_rec_len = cpu_to_le16(rec_len); | ||
165 | } | ||
166 | /* | ||
167 | * Link an unindexed block with a dir trailer structure into the index free | ||
168 | * list. This function will modify dirdata_bh, but assumes you've already | ||
169 | * passed it to the journal. | ||
170 | */ | ||
171 | static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle, | ||
172 | struct buffer_head *dx_root_bh, | ||
173 | struct buffer_head *dirdata_bh) | ||
174 | { | ||
175 | int ret; | ||
176 | struct ocfs2_dx_root_block *dx_root; | ||
177 | struct ocfs2_dir_block_trailer *trailer; | ||
178 | |||
179 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
180 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
181 | if (ret) { | ||
182 | mlog_errno(ret); | ||
183 | goto out; | ||
184 | } | ||
185 | trailer = ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb); | ||
186 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
187 | |||
188 | trailer->db_free_next = dx_root->dr_free_blk; | ||
189 | dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr); | ||
190 | |||
191 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
192 | |||
193 | out: | ||
194 | return ret; | ||
195 | } | ||
196 | |||
197 | static int ocfs2_free_list_at_root(struct ocfs2_dir_lookup_result *res) | ||
198 | { | ||
199 | return res->dl_prev_leaf_bh == NULL; | ||
200 | } | ||
201 | |||
202 | void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res) | ||
203 | { | ||
204 | brelse(res->dl_dx_root_bh); | ||
205 | brelse(res->dl_leaf_bh); | ||
206 | brelse(res->dl_dx_leaf_bh); | ||
207 | brelse(res->dl_prev_leaf_bh); | ||
208 | } | ||
209 | |||
210 | static int ocfs2_dir_indexed(struct inode *inode) | ||
211 | { | ||
212 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INDEXED_DIR_FL) | ||
213 | return 1; | ||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | static inline int ocfs2_dx_root_inline(struct ocfs2_dx_root_block *dx_root) | ||
218 | { | ||
219 | return dx_root->dr_flags & OCFS2_DX_FLAG_INLINE; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Hashing code adapted from ext3 | ||
224 | */ | ||
225 | #define DELTA 0x9E3779B9 | ||
226 | |||
227 | static void TEA_transform(__u32 buf[4], __u32 const in[]) | ||
228 | { | ||
229 | __u32 sum = 0; | ||
230 | __u32 b0 = buf[0], b1 = buf[1]; | ||
231 | __u32 a = in[0], b = in[1], c = in[2], d = in[3]; | ||
232 | int n = 16; | ||
233 | |||
234 | do { | ||
235 | sum += DELTA; | ||
236 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); | ||
237 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); | ||
238 | } while (--n); | ||
239 | |||
240 | buf[0] += b0; | ||
241 | buf[1] += b1; | ||
242 | } | ||
243 | |||
244 | static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) | ||
245 | { | ||
246 | __u32 pad, val; | ||
247 | int i; | ||
248 | |||
249 | pad = (__u32)len | ((__u32)len << 8); | ||
250 | pad |= pad << 16; | ||
251 | |||
252 | val = pad; | ||
253 | if (len > num*4) | ||
254 | len = num * 4; | ||
255 | for (i = 0; i < len; i++) { | ||
256 | if ((i % 4) == 0) | ||
257 | val = pad; | ||
258 | val = msg[i] + (val << 8); | ||
259 | if ((i % 4) == 3) { | ||
260 | *buf++ = val; | ||
261 | val = pad; | ||
262 | num--; | ||
263 | } | ||
264 | } | ||
265 | if (--num >= 0) | ||
266 | *buf++ = val; | ||
267 | while (--num >= 0) | ||
268 | *buf++ = pad; | ||
269 | } | ||
270 | |||
271 | static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len, | ||
272 | struct ocfs2_dx_hinfo *hinfo) | ||
273 | { | ||
274 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
275 | const char *p; | ||
276 | __u32 in[8], buf[4]; | ||
277 | |||
278 | /* | ||
279 | * XXX: Is this really necessary, if the index is never looked | ||
280 | * at by readdir? Is a hash value of '0' a bad idea? | ||
281 | */ | ||
282 | if ((len == 1 && !strncmp(".", name, 1)) || | ||
283 | (len == 2 && !strncmp("..", name, 2))) { | ||
284 | buf[0] = buf[1] = 0; | ||
285 | goto out; | ||
286 | } | ||
287 | |||
288 | #ifdef OCFS2_DEBUG_DX_DIRS | ||
289 | /* | ||
290 | * This makes it very easy to debug indexing problems. We | ||
291 | * should never allow this to be selected without hand editing | ||
292 | * this file though. | ||
293 | */ | ||
294 | buf[0] = buf[1] = len; | ||
295 | goto out; | ||
296 | #endif | ||
297 | |||
298 | memcpy(buf, osb->osb_dx_seed, sizeof(buf)); | ||
299 | |||
300 | p = name; | ||
301 | while (len > 0) { | ||
302 | str2hashbuf(p, len, in, 4); | ||
303 | TEA_transform(buf, in); | ||
304 | len -= 16; | ||
305 | p += 16; | ||
306 | } | ||
307 | |||
308 | out: | ||
309 | hinfo->major_hash = buf[0]; | ||
310 | hinfo->minor_hash = buf[1]; | ||
153 | } | 311 | } |
154 | 312 | ||
155 | /* | 313 | /* |
@@ -312,6 +470,52 @@ static int ocfs2_validate_dir_block(struct super_block *sb, | |||
312 | } | 470 | } |
313 | 471 | ||
314 | /* | 472 | /* |
473 | * Validate a directory trailer. | ||
474 | * | ||
475 | * We check the trailer here rather than in ocfs2_validate_dir_block() | ||
476 | * because that function doesn't have the inode to test. | ||
477 | */ | ||
478 | static int ocfs2_check_dir_trailer(struct inode *dir, struct buffer_head *bh) | ||
479 | { | ||
480 | int rc = 0; | ||
481 | struct ocfs2_dir_block_trailer *trailer; | ||
482 | |||
483 | trailer = ocfs2_trailer_from_bh(bh, dir->i_sb); | ||
484 | if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) { | ||
485 | rc = -EINVAL; | ||
486 | ocfs2_error(dir->i_sb, | ||
487 | "Invalid dirblock #%llu: " | ||
488 | "signature = %.*s\n", | ||
489 | (unsigned long long)bh->b_blocknr, 7, | ||
490 | trailer->db_signature); | ||
491 | goto out; | ||
492 | } | ||
493 | if (le64_to_cpu(trailer->db_blkno) != bh->b_blocknr) { | ||
494 | rc = -EINVAL; | ||
495 | ocfs2_error(dir->i_sb, | ||
496 | "Directory block #%llu has an invalid " | ||
497 | "db_blkno of %llu", | ||
498 | (unsigned long long)bh->b_blocknr, | ||
499 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
500 | goto out; | ||
501 | } | ||
502 | if (le64_to_cpu(trailer->db_parent_dinode) != | ||
503 | OCFS2_I(dir)->ip_blkno) { | ||
504 | rc = -EINVAL; | ||
505 | ocfs2_error(dir->i_sb, | ||
506 | "Directory block #%llu on dinode " | ||
507 | "#%llu has an invalid parent_dinode " | ||
508 | "of %llu", | ||
509 | (unsigned long long)bh->b_blocknr, | ||
510 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
511 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
512 | goto out; | ||
513 | } | ||
514 | out: | ||
515 | return rc; | ||
516 | } | ||
517 | |||
518 | /* | ||
315 | * This function forces all errors to -EIO for consistency with its | 519 | * This function forces all errors to -EIO for consistency with its |
316 | * predecessor, ocfs2_bread(). We haven't audited what returning the | 520 | * predecessor, ocfs2_bread(). We haven't audited what returning the |
317 | * real error codes would do to callers. We log the real codes with | 521 | * real error codes would do to callers. We log the real codes with |
@@ -322,7 +526,6 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, | |||
322 | { | 526 | { |
323 | int rc = 0; | 527 | int rc = 0; |
324 | struct buffer_head *tmp = *bh; | 528 | struct buffer_head *tmp = *bh; |
325 | struct ocfs2_dir_block_trailer *trailer; | ||
326 | 529 | ||
327 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags, | 530 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags, |
328 | ocfs2_validate_dir_block); | 531 | ocfs2_validate_dir_block); |
@@ -331,42 +534,13 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, | |||
331 | goto out; | 534 | goto out; |
332 | } | 535 | } |
333 | 536 | ||
334 | /* | ||
335 | * We check the trailer here rather than in | ||
336 | * ocfs2_validate_dir_block() because that function doesn't have | ||
337 | * the inode to test. | ||
338 | */ | ||
339 | if (!(flags & OCFS2_BH_READAHEAD) && | 537 | if (!(flags & OCFS2_BH_READAHEAD) && |
340 | ocfs2_dir_has_trailer(inode)) { | 538 | ocfs2_supports_dir_trailer(inode)) { |
341 | trailer = ocfs2_trailer_from_bh(tmp, inode->i_sb); | 539 | rc = ocfs2_check_dir_trailer(inode, tmp); |
342 | if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) { | 540 | if (rc) { |
343 | rc = -EINVAL; | 541 | if (!*bh) |
344 | ocfs2_error(inode->i_sb, | 542 | brelse(tmp); |
345 | "Invalid dirblock #%llu: " | 543 | mlog_errno(rc); |
346 | "signature = %.*s\n", | ||
347 | (unsigned long long)tmp->b_blocknr, 7, | ||
348 | trailer->db_signature); | ||
349 | goto out; | ||
350 | } | ||
351 | if (le64_to_cpu(trailer->db_blkno) != tmp->b_blocknr) { | ||
352 | rc = -EINVAL; | ||
353 | ocfs2_error(inode->i_sb, | ||
354 | "Directory block #%llu has an invalid " | ||
355 | "db_blkno of %llu", | ||
356 | (unsigned long long)tmp->b_blocknr, | ||
357 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
358 | goto out; | ||
359 | } | ||
360 | if (le64_to_cpu(trailer->db_parent_dinode) != | ||
361 | OCFS2_I(inode)->ip_blkno) { | ||
362 | rc = -EINVAL; | ||
363 | ocfs2_error(inode->i_sb, | ||
364 | "Directory block #%llu on dinode " | ||
365 | "#%llu has an invalid parent_dinode " | ||
366 | "of %llu", | ||
367 | (unsigned long long)tmp->b_blocknr, | ||
368 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
369 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
370 | goto out; | 544 | goto out; |
371 | } | 545 | } |
372 | } | 546 | } |
@@ -379,6 +553,141 @@ out: | |||
379 | return rc ? -EIO : 0; | 553 | return rc ? -EIO : 0; |
380 | } | 554 | } |
381 | 555 | ||
556 | /* | ||
557 | * Read the block at 'phys' which belongs to this directory | ||
558 | * inode. This function does no virtual->physical block translation - | ||
559 | * what's passed in is assumed to be a valid directory block. | ||
560 | */ | ||
561 | static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys, | ||
562 | struct buffer_head **bh) | ||
563 | { | ||
564 | int ret; | ||
565 | struct buffer_head *tmp = *bh; | ||
566 | |||
567 | ret = ocfs2_read_block(dir, phys, &tmp, ocfs2_validate_dir_block); | ||
568 | if (ret) { | ||
569 | mlog_errno(ret); | ||
570 | goto out; | ||
571 | } | ||
572 | |||
573 | if (ocfs2_supports_dir_trailer(dir)) { | ||
574 | ret = ocfs2_check_dir_trailer(dir, tmp); | ||
575 | if (ret) { | ||
576 | if (!*bh) | ||
577 | brelse(tmp); | ||
578 | mlog_errno(ret); | ||
579 | goto out; | ||
580 | } | ||
581 | } | ||
582 | |||
583 | if (!ret && !*bh) | ||
584 | *bh = tmp; | ||
585 | out: | ||
586 | return ret; | ||
587 | } | ||
588 | |||
589 | static int ocfs2_validate_dx_root(struct super_block *sb, | ||
590 | struct buffer_head *bh) | ||
591 | { | ||
592 | int ret; | ||
593 | struct ocfs2_dx_root_block *dx_root; | ||
594 | |||
595 | BUG_ON(!buffer_uptodate(bh)); | ||
596 | |||
597 | dx_root = (struct ocfs2_dx_root_block *) bh->b_data; | ||
598 | |||
599 | ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_root->dr_check); | ||
600 | if (ret) { | ||
601 | mlog(ML_ERROR, | ||
602 | "Checksum failed for dir index root block %llu\n", | ||
603 | (unsigned long long)bh->b_blocknr); | ||
604 | return ret; | ||
605 | } | ||
606 | |||
607 | if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) { | ||
608 | ocfs2_error(sb, | ||
609 | "Dir Index Root # %llu has bad signature %.*s", | ||
610 | (unsigned long long)le64_to_cpu(dx_root->dr_blkno), | ||
611 | 7, dx_root->dr_signature); | ||
612 | return -EINVAL; | ||
613 | } | ||
614 | |||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di, | ||
619 | struct buffer_head **dx_root_bh) | ||
620 | { | ||
621 | int ret; | ||
622 | u64 blkno = le64_to_cpu(di->i_dx_root); | ||
623 | struct buffer_head *tmp = *dx_root_bh; | ||
624 | |||
625 | ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_root); | ||
626 | |||
627 | /* If ocfs2_read_block() got us a new bh, pass it up. */ | ||
628 | if (!ret && !*dx_root_bh) | ||
629 | *dx_root_bh = tmp; | ||
630 | |||
631 | return ret; | ||
632 | } | ||
633 | |||
634 | static int ocfs2_validate_dx_leaf(struct super_block *sb, | ||
635 | struct buffer_head *bh) | ||
636 | { | ||
637 | int ret; | ||
638 | struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)bh->b_data; | ||
639 | |||
640 | BUG_ON(!buffer_uptodate(bh)); | ||
641 | |||
642 | ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_leaf->dl_check); | ||
643 | if (ret) { | ||
644 | mlog(ML_ERROR, | ||
645 | "Checksum failed for dir index leaf block %llu\n", | ||
646 | (unsigned long long)bh->b_blocknr); | ||
647 | return ret; | ||
648 | } | ||
649 | |||
650 | if (!OCFS2_IS_VALID_DX_LEAF(dx_leaf)) { | ||
651 | ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s", | ||
652 | 7, dx_leaf->dl_signature); | ||
653 | return -EROFS; | ||
654 | } | ||
655 | |||
656 | return 0; | ||
657 | } | ||
658 | |||
659 | static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno, | ||
660 | struct buffer_head **dx_leaf_bh) | ||
661 | { | ||
662 | int ret; | ||
663 | struct buffer_head *tmp = *dx_leaf_bh; | ||
664 | |||
665 | ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_leaf); | ||
666 | |||
667 | /* If ocfs2_read_block() got us a new bh, pass it up. */ | ||
668 | if (!ret && !*dx_leaf_bh) | ||
669 | *dx_leaf_bh = tmp; | ||
670 | |||
671 | return ret; | ||
672 | } | ||
673 | |||
674 | /* | ||
675 | * Read a series of dx_leaf blocks. This expects all buffer_head | ||
676 | * pointers to be NULL on function entry. | ||
677 | */ | ||
678 | static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num, | ||
679 | struct buffer_head **dx_leaf_bhs) | ||
680 | { | ||
681 | int ret; | ||
682 | |||
683 | ret = ocfs2_read_blocks(dir, start, num, dx_leaf_bhs, 0, | ||
684 | ocfs2_validate_dx_leaf); | ||
685 | if (ret) | ||
686 | mlog_errno(ret); | ||
687 | |||
688 | return ret; | ||
689 | } | ||
690 | |||
382 | static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, | 691 | static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, |
383 | struct inode *dir, | 692 | struct inode *dir, |
384 | struct ocfs2_dir_entry **res_dir) | 693 | struct ocfs2_dir_entry **res_dir) |
@@ -480,39 +789,340 @@ cleanup_and_exit: | |||
480 | return ret; | 789 | return ret; |
481 | } | 790 | } |
482 | 791 | ||
792 | static int ocfs2_dx_dir_lookup_rec(struct inode *inode, | ||
793 | struct ocfs2_extent_list *el, | ||
794 | u32 major_hash, | ||
795 | u32 *ret_cpos, | ||
796 | u64 *ret_phys_blkno, | ||
797 | unsigned int *ret_clen) | ||
798 | { | ||
799 | int ret = 0, i, found; | ||
800 | struct buffer_head *eb_bh = NULL; | ||
801 | struct ocfs2_extent_block *eb; | ||
802 | struct ocfs2_extent_rec *rec = NULL; | ||
803 | |||
804 | if (el->l_tree_depth) { | ||
805 | ret = ocfs2_find_leaf(inode, el, major_hash, &eb_bh); | ||
806 | if (ret) { | ||
807 | mlog_errno(ret); | ||
808 | goto out; | ||
809 | } | ||
810 | |||
811 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
812 | el = &eb->h_list; | ||
813 | |||
814 | if (el->l_tree_depth) { | ||
815 | ocfs2_error(inode->i_sb, | ||
816 | "Inode %lu has non zero tree depth in " | ||
817 | "btree tree block %llu\n", inode->i_ino, | ||
818 | (unsigned long long)eb_bh->b_blocknr); | ||
819 | ret = -EROFS; | ||
820 | goto out; | ||
821 | } | ||
822 | } | ||
823 | |||
824 | found = 0; | ||
825 | for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { | ||
826 | rec = &el->l_recs[i]; | ||
827 | |||
828 | if (le32_to_cpu(rec->e_cpos) <= major_hash) { | ||
829 | found = 1; | ||
830 | break; | ||
831 | } | ||
832 | } | ||
833 | |||
834 | if (!found) { | ||
835 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | ||
836 | "record (%u, %u, 0) in btree", inode->i_ino, | ||
837 | le32_to_cpu(rec->e_cpos), | ||
838 | ocfs2_rec_clusters(el, rec)); | ||
839 | ret = -EROFS; | ||
840 | goto out; | ||
841 | } | ||
842 | |||
843 | if (ret_phys_blkno) | ||
844 | *ret_phys_blkno = le64_to_cpu(rec->e_blkno); | ||
845 | if (ret_cpos) | ||
846 | *ret_cpos = le32_to_cpu(rec->e_cpos); | ||
847 | if (ret_clen) | ||
848 | *ret_clen = le16_to_cpu(rec->e_leaf_clusters); | ||
849 | |||
850 | out: | ||
851 | brelse(eb_bh); | ||
852 | return ret; | ||
853 | } | ||
854 | |||
855 | /* | ||
856 | * Returns the block index, from the start of the cluster which this | ||
857 | * hash belongs too. | ||
858 | */ | ||
859 | static inline unsigned int __ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb, | ||
860 | u32 minor_hash) | ||
861 | { | ||
862 | return minor_hash & osb->osb_dx_mask; | ||
863 | } | ||
864 | |||
865 | static inline unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb, | ||
866 | struct ocfs2_dx_hinfo *hinfo) | ||
867 | { | ||
868 | return __ocfs2_dx_dir_hash_idx(osb, hinfo->minor_hash); | ||
869 | } | ||
870 | |||
871 | static int ocfs2_dx_dir_lookup(struct inode *inode, | ||
872 | struct ocfs2_extent_list *el, | ||
873 | struct ocfs2_dx_hinfo *hinfo, | ||
874 | u32 *ret_cpos, | ||
875 | u64 *ret_phys_blkno) | ||
876 | { | ||
877 | int ret = 0; | ||
878 | unsigned int cend, uninitialized_var(clen); | ||
879 | u32 uninitialized_var(cpos); | ||
880 | u64 uninitialized_var(blkno); | ||
881 | u32 name_hash = hinfo->major_hash; | ||
882 | |||
883 | ret = ocfs2_dx_dir_lookup_rec(inode, el, name_hash, &cpos, &blkno, | ||
884 | &clen); | ||
885 | if (ret) { | ||
886 | mlog_errno(ret); | ||
887 | goto out; | ||
888 | } | ||
889 | |||
890 | cend = cpos + clen; | ||
891 | if (name_hash >= cend) { | ||
892 | /* We want the last cluster */ | ||
893 | blkno += ocfs2_clusters_to_blocks(inode->i_sb, clen - 1); | ||
894 | cpos += clen - 1; | ||
895 | } else { | ||
896 | blkno += ocfs2_clusters_to_blocks(inode->i_sb, | ||
897 | name_hash - cpos); | ||
898 | cpos = name_hash; | ||
899 | } | ||
900 | |||
901 | /* | ||
902 | * We now have the cluster which should hold our entry. To | ||
903 | * find the exact block from the start of the cluster to | ||
904 | * search, we take the lower bits of the hash. | ||
905 | */ | ||
906 | blkno += ocfs2_dx_dir_hash_idx(OCFS2_SB(inode->i_sb), hinfo); | ||
907 | |||
908 | if (ret_phys_blkno) | ||
909 | *ret_phys_blkno = blkno; | ||
910 | if (ret_cpos) | ||
911 | *ret_cpos = cpos; | ||
912 | |||
913 | out: | ||
914 | |||
915 | return ret; | ||
916 | } | ||
917 | |||
918 | static int ocfs2_dx_dir_search(const char *name, int namelen, | ||
919 | struct inode *dir, | ||
920 | struct ocfs2_dx_root_block *dx_root, | ||
921 | struct ocfs2_dir_lookup_result *res) | ||
922 | { | ||
923 | int ret, i, found; | ||
924 | u64 uninitialized_var(phys); | ||
925 | struct buffer_head *dx_leaf_bh = NULL; | ||
926 | struct ocfs2_dx_leaf *dx_leaf; | ||
927 | struct ocfs2_dx_entry *dx_entry = NULL; | ||
928 | struct buffer_head *dir_ent_bh = NULL; | ||
929 | struct ocfs2_dir_entry *dir_ent = NULL; | ||
930 | struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo; | ||
931 | struct ocfs2_extent_list *dr_el; | ||
932 | struct ocfs2_dx_entry_list *entry_list; | ||
933 | |||
934 | ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo); | ||
935 | |||
936 | if (ocfs2_dx_root_inline(dx_root)) { | ||
937 | entry_list = &dx_root->dr_entries; | ||
938 | goto search; | ||
939 | } | ||
940 | |||
941 | dr_el = &dx_root->dr_list; | ||
942 | |||
943 | ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys); | ||
944 | if (ret) { | ||
945 | mlog_errno(ret); | ||
946 | goto out; | ||
947 | } | ||
948 | |||
949 | mlog(0, "Dir %llu: name: \"%.*s\", lookup of hash: %u.0x%x " | ||
950 | "returns: %llu\n", | ||
951 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
952 | namelen, name, hinfo->major_hash, hinfo->minor_hash, | ||
953 | (unsigned long long)phys); | ||
954 | |||
955 | ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh); | ||
956 | if (ret) { | ||
957 | mlog_errno(ret); | ||
958 | goto out; | ||
959 | } | ||
960 | |||
961 | dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data; | ||
962 | |||
963 | mlog(0, "leaf info: num_used: %d, count: %d\n", | ||
964 | le16_to_cpu(dx_leaf->dl_list.de_num_used), | ||
965 | le16_to_cpu(dx_leaf->dl_list.de_count)); | ||
966 | |||
967 | entry_list = &dx_leaf->dl_list; | ||
968 | |||
969 | search: | ||
970 | /* | ||
971 | * Empty leaf is legal, so no need to check for that. | ||
972 | */ | ||
973 | found = 0; | ||
974 | for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) { | ||
975 | dx_entry = &entry_list->de_entries[i]; | ||
976 | |||
977 | if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash) | ||
978 | || hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash)) | ||
979 | continue; | ||
980 | |||
981 | /* | ||
982 | * Search unindexed leaf block now. We're not | ||
983 | * guaranteed to find anything. | ||
984 | */ | ||
985 | ret = ocfs2_read_dir_block_direct(dir, | ||
986 | le64_to_cpu(dx_entry->dx_dirent_blk), | ||
987 | &dir_ent_bh); | ||
988 | if (ret) { | ||
989 | mlog_errno(ret); | ||
990 | goto out; | ||
991 | } | ||
992 | |||
993 | /* | ||
994 | * XXX: We should check the unindexed block here, | ||
995 | * before using it. | ||
996 | */ | ||
997 | |||
998 | found = ocfs2_search_dirblock(dir_ent_bh, dir, name, namelen, | ||
999 | 0, dir_ent_bh->b_data, | ||
1000 | dir->i_sb->s_blocksize, &dir_ent); | ||
1001 | if (found == 1) | ||
1002 | break; | ||
1003 | |||
1004 | if (found == -1) { | ||
1005 | /* This means we found a bad directory entry. */ | ||
1006 | ret = -EIO; | ||
1007 | mlog_errno(ret); | ||
1008 | goto out; | ||
1009 | } | ||
1010 | |||
1011 | brelse(dir_ent_bh); | ||
1012 | dir_ent_bh = NULL; | ||
1013 | } | ||
1014 | |||
1015 | if (found <= 0) { | ||
1016 | ret = -ENOENT; | ||
1017 | goto out; | ||
1018 | } | ||
1019 | |||
1020 | res->dl_leaf_bh = dir_ent_bh; | ||
1021 | res->dl_entry = dir_ent; | ||
1022 | res->dl_dx_leaf_bh = dx_leaf_bh; | ||
1023 | res->dl_dx_entry = dx_entry; | ||
1024 | |||
1025 | ret = 0; | ||
1026 | out: | ||
1027 | if (ret) { | ||
1028 | brelse(dx_leaf_bh); | ||
1029 | brelse(dir_ent_bh); | ||
1030 | } | ||
1031 | return ret; | ||
1032 | } | ||
1033 | |||
1034 | static int ocfs2_find_entry_dx(const char *name, int namelen, | ||
1035 | struct inode *dir, | ||
1036 | struct ocfs2_dir_lookup_result *lookup) | ||
1037 | { | ||
1038 | int ret; | ||
1039 | struct buffer_head *di_bh = NULL; | ||
1040 | struct ocfs2_dinode *di; | ||
1041 | struct buffer_head *dx_root_bh = NULL; | ||
1042 | struct ocfs2_dx_root_block *dx_root; | ||
1043 | |||
1044 | ret = ocfs2_read_inode_block(dir, &di_bh); | ||
1045 | if (ret) { | ||
1046 | mlog_errno(ret); | ||
1047 | goto out; | ||
1048 | } | ||
1049 | |||
1050 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1051 | |||
1052 | ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); | ||
1053 | if (ret) { | ||
1054 | mlog_errno(ret); | ||
1055 | goto out; | ||
1056 | } | ||
1057 | dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data; | ||
1058 | |||
1059 | ret = ocfs2_dx_dir_search(name, namelen, dir, dx_root, lookup); | ||
1060 | if (ret) { | ||
1061 | if (ret != -ENOENT) | ||
1062 | mlog_errno(ret); | ||
1063 | goto out; | ||
1064 | } | ||
1065 | |||
1066 | lookup->dl_dx_root_bh = dx_root_bh; | ||
1067 | dx_root_bh = NULL; | ||
1068 | out: | ||
1069 | brelse(di_bh); | ||
1070 | brelse(dx_root_bh); | ||
1071 | return ret; | ||
1072 | } | ||
1073 | |||
483 | /* | 1074 | /* |
484 | * Try to find an entry of the provided name within 'dir'. | 1075 | * Try to find an entry of the provided name within 'dir'. |
485 | * | 1076 | * |
486 | * If nothing was found, NULL is returned. Otherwise, a buffer_head | 1077 | * If nothing was found, -ENOENT is returned. Otherwise, zero is |
487 | * and pointer to the dir entry are passed back. | 1078 | * returned and the struct 'res' will contain information useful to |
1079 | * other directory manipulation functions. | ||
488 | * | 1080 | * |
489 | * Caller can NOT assume anything about the contents of the | 1081 | * Caller can NOT assume anything about the contents of the |
490 | * buffer_head - it is passed back only so that it can be passed into | 1082 | * buffer_heads - they are passed back only so that it can be passed |
491 | * any one of the manipulation functions (add entry, delete entry, | 1083 | * into any one of the manipulation functions (add entry, delete |
492 | * etc). As an example, bh in the extent directory case is a data | 1084 | * entry, etc). As an example, bh in the extent directory case is a |
493 | * block, in the inline-data case it actually points to an inode. | 1085 | * data block, in the inline-data case it actually points to an inode, |
1086 | * in the indexed directory case, multiple buffers are involved. | ||
494 | */ | 1087 | */ |
495 | struct buffer_head *ocfs2_find_entry(const char *name, int namelen, | 1088 | int ocfs2_find_entry(const char *name, int namelen, |
496 | struct inode *dir, | 1089 | struct inode *dir, struct ocfs2_dir_lookup_result *lookup) |
497 | struct ocfs2_dir_entry **res_dir) | ||
498 | { | 1090 | { |
499 | *res_dir = NULL; | 1091 | struct buffer_head *bh; |
1092 | struct ocfs2_dir_entry *res_dir = NULL; | ||
500 | 1093 | ||
1094 | if (ocfs2_dir_indexed(dir)) | ||
1095 | return ocfs2_find_entry_dx(name, namelen, dir, lookup); | ||
1096 | |||
1097 | /* | ||
1098 | * The unindexed dir code only uses part of the lookup | ||
1099 | * structure, so there's no reason to push it down further | ||
1100 | * than this. | ||
1101 | */ | ||
501 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 1102 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
502 | return ocfs2_find_entry_id(name, namelen, dir, res_dir); | 1103 | bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir); |
1104 | else | ||
1105 | bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir); | ||
1106 | |||
1107 | if (bh == NULL) | ||
1108 | return -ENOENT; | ||
503 | 1109 | ||
504 | return ocfs2_find_entry_el(name, namelen, dir, res_dir); | 1110 | lookup->dl_leaf_bh = bh; |
1111 | lookup->dl_entry = res_dir; | ||
1112 | return 0; | ||
505 | } | 1113 | } |
506 | 1114 | ||
507 | /* | 1115 | /* |
508 | * Update inode number and type of a previously found directory entry. | 1116 | * Update inode number and type of a previously found directory entry. |
509 | */ | 1117 | */ |
510 | int ocfs2_update_entry(struct inode *dir, handle_t *handle, | 1118 | int ocfs2_update_entry(struct inode *dir, handle_t *handle, |
511 | struct buffer_head *de_bh, struct ocfs2_dir_entry *de, | 1119 | struct ocfs2_dir_lookup_result *res, |
512 | struct inode *new_entry_inode) | 1120 | struct inode *new_entry_inode) |
513 | { | 1121 | { |
514 | int ret; | 1122 | int ret; |
515 | ocfs2_journal_access_func access = ocfs2_journal_access_db; | 1123 | ocfs2_journal_access_func access = ocfs2_journal_access_db; |
1124 | struct ocfs2_dir_entry *de = res->dl_entry; | ||
1125 | struct buffer_head *de_bh = res->dl_leaf_bh; | ||
516 | 1126 | ||
517 | /* | 1127 | /* |
518 | * The same code works fine for both inline-data and extent | 1128 | * The same code works fine for both inline-data and extent |
@@ -538,6 +1148,10 @@ out: | |||
538 | return ret; | 1148 | return ret; |
539 | } | 1149 | } |
540 | 1150 | ||
1151 | /* | ||
1152 | * __ocfs2_delete_entry deletes a directory entry by merging it with the | ||
1153 | * previous entry | ||
1154 | */ | ||
541 | static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, | 1155 | static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, |
542 | struct ocfs2_dir_entry *de_del, | 1156 | struct ocfs2_dir_entry *de_del, |
543 | struct buffer_head *bh, char *first_de, | 1157 | struct buffer_head *bh, char *first_de, |
@@ -587,6 +1201,181 @@ bail: | |||
587 | return status; | 1201 | return status; |
588 | } | 1202 | } |
589 | 1203 | ||
1204 | static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de) | ||
1205 | { | ||
1206 | unsigned int hole; | ||
1207 | |||
1208 | if (le64_to_cpu(de->inode) == 0) | ||
1209 | hole = le16_to_cpu(de->rec_len); | ||
1210 | else | ||
1211 | hole = le16_to_cpu(de->rec_len) - | ||
1212 | OCFS2_DIR_REC_LEN(de->name_len); | ||
1213 | |||
1214 | return hole; | ||
1215 | } | ||
1216 | |||
1217 | static int ocfs2_find_max_rec_len(struct super_block *sb, | ||
1218 | struct buffer_head *dirblock_bh) | ||
1219 | { | ||
1220 | int size, this_hole, largest_hole = 0; | ||
1221 | char *trailer, *de_buf, *limit, *start = dirblock_bh->b_data; | ||
1222 | struct ocfs2_dir_entry *de; | ||
1223 | |||
1224 | trailer = (char *)ocfs2_trailer_from_bh(dirblock_bh, sb); | ||
1225 | size = ocfs2_dir_trailer_blk_off(sb); | ||
1226 | limit = start + size; | ||
1227 | de_buf = start; | ||
1228 | de = (struct ocfs2_dir_entry *)de_buf; | ||
1229 | do { | ||
1230 | if (de_buf != trailer) { | ||
1231 | this_hole = ocfs2_figure_dirent_hole(de); | ||
1232 | if (this_hole > largest_hole) | ||
1233 | largest_hole = this_hole; | ||
1234 | } | ||
1235 | |||
1236 | de_buf += le16_to_cpu(de->rec_len); | ||
1237 | de = (struct ocfs2_dir_entry *)de_buf; | ||
1238 | } while (de_buf < limit); | ||
1239 | |||
1240 | if (largest_hole >= OCFS2_DIR_MIN_REC_LEN) | ||
1241 | return largest_hole; | ||
1242 | return 0; | ||
1243 | } | ||
1244 | |||
1245 | static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list, | ||
1246 | int index) | ||
1247 | { | ||
1248 | int num_used = le16_to_cpu(entry_list->de_num_used); | ||
1249 | |||
1250 | if (num_used == 1 || index == (num_used - 1)) | ||
1251 | goto clear; | ||
1252 | |||
1253 | memmove(&entry_list->de_entries[index], | ||
1254 | &entry_list->de_entries[index + 1], | ||
1255 | (num_used - index - 1)*sizeof(struct ocfs2_dx_entry)); | ||
1256 | clear: | ||
1257 | num_used--; | ||
1258 | memset(&entry_list->de_entries[num_used], 0, | ||
1259 | sizeof(struct ocfs2_dx_entry)); | ||
1260 | entry_list->de_num_used = cpu_to_le16(num_used); | ||
1261 | } | ||
1262 | |||
1263 | static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, | ||
1264 | struct ocfs2_dir_lookup_result *lookup) | ||
1265 | { | ||
1266 | int ret, index, max_rec_len, add_to_free_list = 0; | ||
1267 | struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; | ||
1268 | struct buffer_head *leaf_bh = lookup->dl_leaf_bh; | ||
1269 | struct ocfs2_dx_leaf *dx_leaf; | ||
1270 | struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry; | ||
1271 | struct ocfs2_dir_block_trailer *trailer; | ||
1272 | struct ocfs2_dx_root_block *dx_root; | ||
1273 | struct ocfs2_dx_entry_list *entry_list; | ||
1274 | |||
1275 | /* | ||
1276 | * This function gets a bit messy because we might have to | ||
1277 | * modify the root block, regardless of whether the indexed | ||
1278 | * entries are stored inline. | ||
1279 | */ | ||
1280 | |||
1281 | /* | ||
1282 | * *Only* set 'entry_list' here, based on where we're looking | ||
1283 | * for the indexed entries. Later, we might still want to | ||
1284 | * journal both blocks, based on free list state. | ||
1285 | */ | ||
1286 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
1287 | if (ocfs2_dx_root_inline(dx_root)) { | ||
1288 | entry_list = &dx_root->dr_entries; | ||
1289 | } else { | ||
1290 | dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data; | ||
1291 | entry_list = &dx_leaf->dl_list; | ||
1292 | } | ||
1293 | |||
1294 | /* Neither of these are a disk corruption - that should have | ||
1295 | * been caught by lookup, before we got here. */ | ||
1296 | BUG_ON(le16_to_cpu(entry_list->de_count) <= 0); | ||
1297 | BUG_ON(le16_to_cpu(entry_list->de_num_used) <= 0); | ||
1298 | |||
1299 | index = (char *)dx_entry - (char *)entry_list->de_entries; | ||
1300 | index /= sizeof(*dx_entry); | ||
1301 | |||
1302 | if (index >= le16_to_cpu(entry_list->de_num_used)) { | ||
1303 | mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n", | ||
1304 | (unsigned long long)OCFS2_I(dir)->ip_blkno, index, | ||
1305 | entry_list, dx_entry); | ||
1306 | return -EIO; | ||
1307 | } | ||
1308 | |||
1309 | /* | ||
1310 | * We know that removal of this dirent will leave enough room | ||
1311 | * for a new one, so add this block to the free list if it | ||
1312 | * isn't already there. | ||
1313 | */ | ||
1314 | trailer = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb); | ||
1315 | if (trailer->db_free_rec_len == 0) | ||
1316 | add_to_free_list = 1; | ||
1317 | |||
1318 | /* | ||
1319 | * Add the block holding our index into the journal before | ||
1320 | * removing the unindexed entry. If we get an error return | ||
1321 | * from __ocfs2_delete_entry(), then it hasn't removed the | ||
1322 | * entry yet. Likewise, successful return means we *must* | ||
1323 | * remove the indexed entry. | ||
1324 | * | ||
1325 | * We're also careful to journal the root tree block here as | ||
1326 | * the entry count needs to be updated. Also, we might be | ||
1327 | * adding to the start of the free list. | ||
1328 | */ | ||
1329 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
1330 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1331 | if (ret) { | ||
1332 | mlog_errno(ret); | ||
1333 | goto out; | ||
1334 | } | ||
1335 | |||
1336 | if (!ocfs2_dx_root_inline(dx_root)) { | ||
1337 | ret = ocfs2_journal_access_dl(handle, dir, | ||
1338 | lookup->dl_dx_leaf_bh, | ||
1339 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1340 | if (ret) { | ||
1341 | mlog_errno(ret); | ||
1342 | goto out; | ||
1343 | } | ||
1344 | } | ||
1345 | |||
1346 | mlog(0, "Dir %llu: delete entry at index: %d\n", | ||
1347 | (unsigned long long)OCFS2_I(dir)->ip_blkno, index); | ||
1348 | |||
1349 | ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry, | ||
1350 | leaf_bh, leaf_bh->b_data, leaf_bh->b_size); | ||
1351 | if (ret) { | ||
1352 | mlog_errno(ret); | ||
1353 | goto out; | ||
1354 | } | ||
1355 | |||
1356 | max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, leaf_bh); | ||
1357 | trailer->db_free_rec_len = cpu_to_le16(max_rec_len); | ||
1358 | if (add_to_free_list) { | ||
1359 | trailer->db_free_next = dx_root->dr_free_blk; | ||
1360 | dx_root->dr_free_blk = cpu_to_le64(leaf_bh->b_blocknr); | ||
1361 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
1362 | } | ||
1363 | |||
1364 | /* leaf_bh was journal_accessed for us in __ocfs2_delete_entry */ | ||
1365 | ocfs2_journal_dirty(handle, leaf_bh); | ||
1366 | |||
1367 | le32_add_cpu(&dx_root->dr_num_entries, -1); | ||
1368 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
1369 | |||
1370 | ocfs2_dx_list_remove_entry(entry_list, index); | ||
1371 | |||
1372 | if (!ocfs2_dx_root_inline(dx_root)) | ||
1373 | ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh); | ||
1374 | |||
1375 | out: | ||
1376 | return ret; | ||
1377 | } | ||
1378 | |||
590 | static inline int ocfs2_delete_entry_id(handle_t *handle, | 1379 | static inline int ocfs2_delete_entry_id(handle_t *handle, |
591 | struct inode *dir, | 1380 | struct inode *dir, |
592 | struct ocfs2_dir_entry *de_del, | 1381 | struct ocfs2_dir_entry *de_del, |
@@ -624,18 +1413,22 @@ static inline int ocfs2_delete_entry_el(handle_t *handle, | |||
624 | } | 1413 | } |
625 | 1414 | ||
626 | /* | 1415 | /* |
627 | * ocfs2_delete_entry deletes a directory entry by merging it with the | 1416 | * Delete a directory entry. Hide the details of directory |
628 | * previous entry | 1417 | * implementation from the caller. |
629 | */ | 1418 | */ |
630 | int ocfs2_delete_entry(handle_t *handle, | 1419 | int ocfs2_delete_entry(handle_t *handle, |
631 | struct inode *dir, | 1420 | struct inode *dir, |
632 | struct ocfs2_dir_entry *de_del, | 1421 | struct ocfs2_dir_lookup_result *res) |
633 | struct buffer_head *bh) | ||
634 | { | 1422 | { |
1423 | if (ocfs2_dir_indexed(dir)) | ||
1424 | return ocfs2_delete_entry_dx(handle, dir, res); | ||
1425 | |||
635 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 1426 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
636 | return ocfs2_delete_entry_id(handle, dir, de_del, bh); | 1427 | return ocfs2_delete_entry_id(handle, dir, res->dl_entry, |
1428 | res->dl_leaf_bh); | ||
637 | 1429 | ||
638 | return ocfs2_delete_entry_el(handle, dir, de_del, bh); | 1430 | return ocfs2_delete_entry_el(handle, dir, res->dl_entry, |
1431 | res->dl_leaf_bh); | ||
639 | } | 1432 | } |
640 | 1433 | ||
641 | /* | 1434 | /* |
@@ -663,18 +1456,166 @@ static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de, | |||
663 | return 0; | 1456 | return 0; |
664 | } | 1457 | } |
665 | 1458 | ||
1459 | static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf, | ||
1460 | struct ocfs2_dx_entry *dx_new_entry) | ||
1461 | { | ||
1462 | int i; | ||
1463 | |||
1464 | i = le16_to_cpu(dx_leaf->dl_list.de_num_used); | ||
1465 | dx_leaf->dl_list.de_entries[i] = *dx_new_entry; | ||
1466 | |||
1467 | le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1); | ||
1468 | } | ||
1469 | |||
1470 | static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list, | ||
1471 | struct ocfs2_dx_hinfo *hinfo, | ||
1472 | u64 dirent_blk) | ||
1473 | { | ||
1474 | int i; | ||
1475 | struct ocfs2_dx_entry *dx_entry; | ||
1476 | |||
1477 | i = le16_to_cpu(entry_list->de_num_used); | ||
1478 | dx_entry = &entry_list->de_entries[i]; | ||
1479 | |||
1480 | memset(dx_entry, 0, sizeof(*dx_entry)); | ||
1481 | dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash); | ||
1482 | dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash); | ||
1483 | dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk); | ||
1484 | |||
1485 | le16_add_cpu(&entry_list->de_num_used, 1); | ||
1486 | } | ||
1487 | |||
1488 | static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle, | ||
1489 | struct ocfs2_dx_hinfo *hinfo, | ||
1490 | u64 dirent_blk, | ||
1491 | struct buffer_head *dx_leaf_bh) | ||
1492 | { | ||
1493 | int ret; | ||
1494 | struct ocfs2_dx_leaf *dx_leaf; | ||
1495 | |||
1496 | ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, | ||
1497 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1498 | if (ret) { | ||
1499 | mlog_errno(ret); | ||
1500 | goto out; | ||
1501 | } | ||
1502 | |||
1503 | dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; | ||
1504 | ocfs2_dx_entry_list_insert(&dx_leaf->dl_list, hinfo, dirent_blk); | ||
1505 | ocfs2_journal_dirty(handle, dx_leaf_bh); | ||
1506 | |||
1507 | out: | ||
1508 | return ret; | ||
1509 | } | ||
1510 | |||
1511 | static void ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle, | ||
1512 | struct ocfs2_dx_hinfo *hinfo, | ||
1513 | u64 dirent_blk, | ||
1514 | struct ocfs2_dx_root_block *dx_root) | ||
1515 | { | ||
1516 | ocfs2_dx_entry_list_insert(&dx_root->dr_entries, hinfo, dirent_blk); | ||
1517 | } | ||
1518 | |||
1519 | static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle, | ||
1520 | struct ocfs2_dir_lookup_result *lookup) | ||
1521 | { | ||
1522 | int ret = 0; | ||
1523 | struct ocfs2_dx_root_block *dx_root; | ||
1524 | struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; | ||
1525 | |||
1526 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
1527 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1528 | if (ret) { | ||
1529 | mlog_errno(ret); | ||
1530 | goto out; | ||
1531 | } | ||
1532 | |||
1533 | dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data; | ||
1534 | if (ocfs2_dx_root_inline(dx_root)) { | ||
1535 | ocfs2_dx_inline_root_insert(dir, handle, | ||
1536 | &lookup->dl_hinfo, | ||
1537 | lookup->dl_leaf_bh->b_blocknr, | ||
1538 | dx_root); | ||
1539 | } else { | ||
1540 | ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo, | ||
1541 | lookup->dl_leaf_bh->b_blocknr, | ||
1542 | lookup->dl_dx_leaf_bh); | ||
1543 | if (ret) | ||
1544 | goto out; | ||
1545 | } | ||
1546 | |||
1547 | le32_add_cpu(&dx_root->dr_num_entries, 1); | ||
1548 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
1549 | |||
1550 | out: | ||
1551 | return ret; | ||
1552 | } | ||
1553 | |||
1554 | static void ocfs2_remove_block_from_free_list(struct inode *dir, | ||
1555 | handle_t *handle, | ||
1556 | struct ocfs2_dir_lookup_result *lookup) | ||
1557 | { | ||
1558 | struct ocfs2_dir_block_trailer *trailer, *prev; | ||
1559 | struct ocfs2_dx_root_block *dx_root; | ||
1560 | struct buffer_head *bh; | ||
1561 | |||
1562 | trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb); | ||
1563 | |||
1564 | if (ocfs2_free_list_at_root(lookup)) { | ||
1565 | bh = lookup->dl_dx_root_bh; | ||
1566 | dx_root = (struct ocfs2_dx_root_block *)bh->b_data; | ||
1567 | dx_root->dr_free_blk = trailer->db_free_next; | ||
1568 | } else { | ||
1569 | bh = lookup->dl_prev_leaf_bh; | ||
1570 | prev = ocfs2_trailer_from_bh(bh, dir->i_sb); | ||
1571 | prev->db_free_next = trailer->db_free_next; | ||
1572 | } | ||
1573 | |||
1574 | trailer->db_free_rec_len = cpu_to_le16(0); | ||
1575 | trailer->db_free_next = cpu_to_le64(0); | ||
1576 | |||
1577 | ocfs2_journal_dirty(handle, bh); | ||
1578 | ocfs2_journal_dirty(handle, lookup->dl_leaf_bh); | ||
1579 | } | ||
1580 | |||
1581 | /* | ||
1582 | * This expects that a journal write has been reserved on | ||
1583 | * lookup->dl_prev_leaf_bh or lookup->dl_dx_root_bh | ||
1584 | */ | ||
1585 | static void ocfs2_recalc_free_list(struct inode *dir, handle_t *handle, | ||
1586 | struct ocfs2_dir_lookup_result *lookup) | ||
1587 | { | ||
1588 | int max_rec_len; | ||
1589 | struct ocfs2_dir_block_trailer *trailer; | ||
1590 | |||
1591 | /* Walk dl_leaf_bh to figure out what the new free rec_len is. */ | ||
1592 | max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, lookup->dl_leaf_bh); | ||
1593 | if (max_rec_len) { | ||
1594 | /* | ||
1595 | * There's still room in this block, so no need to remove it | ||
1596 | * from the free list. In this case, we just want to update | ||
1597 | * the rec len accounting. | ||
1598 | */ | ||
1599 | trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb); | ||
1600 | trailer->db_free_rec_len = cpu_to_le16(max_rec_len); | ||
1601 | ocfs2_journal_dirty(handle, lookup->dl_leaf_bh); | ||
1602 | } else { | ||
1603 | ocfs2_remove_block_from_free_list(dir, handle, lookup); | ||
1604 | } | ||
1605 | } | ||
1606 | |||
666 | /* we don't always have a dentry for what we want to add, so people | 1607 | /* we don't always have a dentry for what we want to add, so people |
667 | * like orphan dir can call this instead. | 1608 | * like orphan dir can call this instead. |
668 | * | 1609 | * |
669 | * If you pass me insert_bh, I'll skip the search of the other dir | 1610 | * The lookup context must have been filled from |
670 | * blocks and put the record in there. | 1611 | * ocfs2_prepare_dir_for_insert. |
671 | */ | 1612 | */ |
672 | int __ocfs2_add_entry(handle_t *handle, | 1613 | int __ocfs2_add_entry(handle_t *handle, |
673 | struct inode *dir, | 1614 | struct inode *dir, |
674 | const char *name, int namelen, | 1615 | const char *name, int namelen, |
675 | struct inode *inode, u64 blkno, | 1616 | struct inode *inode, u64 blkno, |
676 | struct buffer_head *parent_fe_bh, | 1617 | struct buffer_head *parent_fe_bh, |
677 | struct buffer_head *insert_bh) | 1618 | struct ocfs2_dir_lookup_result *lookup) |
678 | { | 1619 | { |
679 | unsigned long offset; | 1620 | unsigned long offset; |
680 | unsigned short rec_len; | 1621 | unsigned short rec_len; |
@@ -683,6 +1624,7 @@ int __ocfs2_add_entry(handle_t *handle, | |||
683 | struct super_block *sb = dir->i_sb; | 1624 | struct super_block *sb = dir->i_sb; |
684 | int retval, status; | 1625 | int retval, status; |
685 | unsigned int size = sb->s_blocksize; | 1626 | unsigned int size = sb->s_blocksize; |
1627 | struct buffer_head *insert_bh = lookup->dl_leaf_bh; | ||
686 | char *data_start = insert_bh->b_data; | 1628 | char *data_start = insert_bh->b_data; |
687 | 1629 | ||
688 | mlog_entry_void(); | 1630 | mlog_entry_void(); |
@@ -690,7 +1632,31 @@ int __ocfs2_add_entry(handle_t *handle, | |||
690 | if (!namelen) | 1632 | if (!namelen) |
691 | return -EINVAL; | 1633 | return -EINVAL; |
692 | 1634 | ||
693 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 1635 | if (ocfs2_dir_indexed(dir)) { |
1636 | struct buffer_head *bh; | ||
1637 | |||
1638 | /* | ||
1639 | * An indexed dir may require that we update the free space | ||
1640 | * list. Reserve a write to the previous node in the list so | ||
1641 | * that we don't fail later. | ||
1642 | * | ||
1643 | * XXX: This can be either a dx_root_block, or an unindexed | ||
1644 | * directory tree leaf block. | ||
1645 | */ | ||
1646 | if (ocfs2_free_list_at_root(lookup)) { | ||
1647 | bh = lookup->dl_dx_root_bh; | ||
1648 | retval = ocfs2_journal_access_dr(handle, dir, bh, | ||
1649 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1650 | } else { | ||
1651 | bh = lookup->dl_prev_leaf_bh; | ||
1652 | retval = ocfs2_journal_access_db(handle, dir, bh, | ||
1653 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1654 | } | ||
1655 | if (retval) { | ||
1656 | mlog_errno(retval); | ||
1657 | return retval; | ||
1658 | } | ||
1659 | } else if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
694 | data_start = di->id2.i_data.id_data; | 1660 | data_start = di->id2.i_data.id_data; |
695 | size = i_size_read(dir); | 1661 | size = i_size_read(dir); |
696 | 1662 | ||
@@ -737,10 +1703,22 @@ int __ocfs2_add_entry(handle_t *handle, | |||
737 | status = ocfs2_journal_access_di(handle, dir, | 1703 | status = ocfs2_journal_access_di(handle, dir, |
738 | insert_bh, | 1704 | insert_bh, |
739 | OCFS2_JOURNAL_ACCESS_WRITE); | 1705 | OCFS2_JOURNAL_ACCESS_WRITE); |
740 | else | 1706 | else { |
741 | status = ocfs2_journal_access_db(handle, dir, | 1707 | status = ocfs2_journal_access_db(handle, dir, |
742 | insert_bh, | 1708 | insert_bh, |
743 | OCFS2_JOURNAL_ACCESS_WRITE); | 1709 | OCFS2_JOURNAL_ACCESS_WRITE); |
1710 | |||
1711 | if (ocfs2_dir_indexed(dir)) { | ||
1712 | status = ocfs2_dx_dir_insert(dir, | ||
1713 | handle, | ||
1714 | lookup); | ||
1715 | if (status) { | ||
1716 | mlog_errno(status); | ||
1717 | goto bail; | ||
1718 | } | ||
1719 | } | ||
1720 | } | ||
1721 | |||
744 | /* By now the buffer is marked for journaling */ | 1722 | /* By now the buffer is marked for journaling */ |
745 | offset += le16_to_cpu(de->rec_len); | 1723 | offset += le16_to_cpu(de->rec_len); |
746 | if (le64_to_cpu(de->inode)) { | 1724 | if (le64_to_cpu(de->inode)) { |
@@ -761,6 +1739,9 @@ int __ocfs2_add_entry(handle_t *handle, | |||
761 | de->name_len = namelen; | 1739 | de->name_len = namelen; |
762 | memcpy(de->name, name, namelen); | 1740 | memcpy(de->name, name, namelen); |
763 | 1741 | ||
1742 | if (ocfs2_dir_indexed(dir)) | ||
1743 | ocfs2_recalc_free_list(dir, handle, lookup); | ||
1744 | |||
764 | dir->i_version++; | 1745 | dir->i_version++; |
765 | status = ocfs2_journal_dirty(handle, insert_bh); | 1746 | status = ocfs2_journal_dirty(handle, insert_bh); |
766 | retval = 0; | 1747 | retval = 0; |
@@ -870,6 +1851,10 @@ out: | |||
870 | return 0; | 1851 | return 0; |
871 | } | 1852 | } |
872 | 1853 | ||
1854 | /* | ||
1855 | * NOTE: This function can be called against unindexed directories, | ||
1856 | * and indexed ones. | ||
1857 | */ | ||
873 | static int ocfs2_dir_foreach_blk_el(struct inode *inode, | 1858 | static int ocfs2_dir_foreach_blk_el(struct inode *inode, |
874 | u64 *f_version, | 1859 | u64 *f_version, |
875 | loff_t *f_pos, void *priv, | 1860 | loff_t *f_pos, void *priv, |
@@ -1071,31 +2056,22 @@ int ocfs2_find_files_on_disk(const char *name, | |||
1071 | int namelen, | 2056 | int namelen, |
1072 | u64 *blkno, | 2057 | u64 *blkno, |
1073 | struct inode *inode, | 2058 | struct inode *inode, |
1074 | struct buffer_head **dirent_bh, | 2059 | struct ocfs2_dir_lookup_result *lookup) |
1075 | struct ocfs2_dir_entry **dirent) | ||
1076 | { | 2060 | { |
1077 | int status = -ENOENT; | 2061 | int status = -ENOENT; |
1078 | 2062 | ||
1079 | mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n", | 2063 | mlog(0, "name=%.*s, blkno=%p, inode=%llu\n", namelen, name, blkno, |
1080 | namelen, name, blkno, inode, dirent_bh, dirent); | 2064 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
1081 | 2065 | ||
1082 | *dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent); | 2066 | status = ocfs2_find_entry(name, namelen, inode, lookup); |
1083 | if (!*dirent_bh || !*dirent) { | 2067 | if (status) |
1084 | status = -ENOENT; | ||
1085 | goto leave; | 2068 | goto leave; |
1086 | } | ||
1087 | 2069 | ||
1088 | *blkno = le64_to_cpu((*dirent)->inode); | 2070 | *blkno = le64_to_cpu(lookup->dl_entry->inode); |
1089 | 2071 | ||
1090 | status = 0; | 2072 | status = 0; |
1091 | leave: | 2073 | leave: |
1092 | if (status < 0) { | ||
1093 | *dirent = NULL; | ||
1094 | brelse(*dirent_bh); | ||
1095 | *dirent_bh = NULL; | ||
1096 | } | ||
1097 | 2074 | ||
1098 | mlog_exit(status); | ||
1099 | return status; | 2075 | return status; |
1100 | } | 2076 | } |
1101 | 2077 | ||
@@ -1107,11 +2083,10 @@ int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, | |||
1107 | int namelen, u64 *blkno) | 2083 | int namelen, u64 *blkno) |
1108 | { | 2084 | { |
1109 | int ret; | 2085 | int ret; |
1110 | struct buffer_head *bh = NULL; | 2086 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
1111 | struct ocfs2_dir_entry *dirent = NULL; | ||
1112 | 2087 | ||
1113 | ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &bh, &dirent); | 2088 | ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &lookup); |
1114 | brelse(bh); | 2089 | ocfs2_free_dir_lookup_result(&lookup); |
1115 | 2090 | ||
1116 | return ret; | 2091 | return ret; |
1117 | } | 2092 | } |
@@ -1128,20 +2103,18 @@ int ocfs2_check_dir_for_entry(struct inode *dir, | |||
1128 | int namelen) | 2103 | int namelen) |
1129 | { | 2104 | { |
1130 | int ret; | 2105 | int ret; |
1131 | struct buffer_head *dirent_bh = NULL; | 2106 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
1132 | struct ocfs2_dir_entry *dirent = NULL; | ||
1133 | 2107 | ||
1134 | mlog_entry("dir %llu, name '%.*s'\n", | 2108 | mlog_entry("dir %llu, name '%.*s'\n", |
1135 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); | 2109 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); |
1136 | 2110 | ||
1137 | ret = -EEXIST; | 2111 | ret = -EEXIST; |
1138 | dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent); | 2112 | if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) |
1139 | if (dirent_bh) | ||
1140 | goto bail; | 2113 | goto bail; |
1141 | 2114 | ||
1142 | ret = 0; | 2115 | ret = 0; |
1143 | bail: | 2116 | bail: |
1144 | brelse(dirent_bh); | 2117 | ocfs2_free_dir_lookup_result(&lookup); |
1145 | 2118 | ||
1146 | mlog_exit(ret); | 2119 | mlog_exit(ret); |
1147 | return ret; | 2120 | return ret; |
@@ -1151,6 +2124,7 @@ struct ocfs2_empty_dir_priv { | |||
1151 | unsigned seen_dot; | 2124 | unsigned seen_dot; |
1152 | unsigned seen_dot_dot; | 2125 | unsigned seen_dot_dot; |
1153 | unsigned seen_other; | 2126 | unsigned seen_other; |
2127 | unsigned dx_dir; | ||
1154 | }; | 2128 | }; |
1155 | static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, | 2129 | static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, |
1156 | loff_t pos, u64 ino, unsigned type) | 2130 | loff_t pos, u64 ino, unsigned type) |
@@ -1160,6 +2134,13 @@ static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, | |||
1160 | /* | 2134 | /* |
1161 | * Check the positions of "." and ".." records to be sure | 2135 | * Check the positions of "." and ".." records to be sure |
1162 | * they're in the correct place. | 2136 | * they're in the correct place. |
2137 | * | ||
2138 | * Indexed directories don't need to proceed past the first | ||
2139 | * two entries, so we end the scan after seeing '..'. Despite | ||
2140 | * that, we allow the scan to proceed In the event that we | ||
2141 | * have a corrupted indexed directory (no dot or dot dot | ||
2142 | * entries). This allows us to double check for existing | ||
2143 | * entries which might not have been found in the index. | ||
1163 | */ | 2144 | */ |
1164 | if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) { | 2145 | if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) { |
1165 | p->seen_dot = 1; | 2146 | p->seen_dot = 1; |
@@ -1169,16 +2150,57 @@ static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, | |||
1169 | if (name_len == 2 && !strncmp("..", name, 2) && | 2150 | if (name_len == 2 && !strncmp("..", name, 2) && |
1170 | pos == OCFS2_DIR_REC_LEN(1)) { | 2151 | pos == OCFS2_DIR_REC_LEN(1)) { |
1171 | p->seen_dot_dot = 1; | 2152 | p->seen_dot_dot = 1; |
2153 | |||
2154 | if (p->dx_dir && p->seen_dot) | ||
2155 | return 1; | ||
2156 | |||
1172 | return 0; | 2157 | return 0; |
1173 | } | 2158 | } |
1174 | 2159 | ||
1175 | p->seen_other = 1; | 2160 | p->seen_other = 1; |
1176 | return 1; | 2161 | return 1; |
1177 | } | 2162 | } |
2163 | |||
2164 | static int ocfs2_empty_dir_dx(struct inode *inode, | ||
2165 | struct ocfs2_empty_dir_priv *priv) | ||
2166 | { | ||
2167 | int ret; | ||
2168 | struct buffer_head *di_bh = NULL; | ||
2169 | struct buffer_head *dx_root_bh = NULL; | ||
2170 | struct ocfs2_dinode *di; | ||
2171 | struct ocfs2_dx_root_block *dx_root; | ||
2172 | |||
2173 | priv->dx_dir = 1; | ||
2174 | |||
2175 | ret = ocfs2_read_inode_block(inode, &di_bh); | ||
2176 | if (ret) { | ||
2177 | mlog_errno(ret); | ||
2178 | goto out; | ||
2179 | } | ||
2180 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2181 | |||
2182 | ret = ocfs2_read_dx_root(inode, di, &dx_root_bh); | ||
2183 | if (ret) { | ||
2184 | mlog_errno(ret); | ||
2185 | goto out; | ||
2186 | } | ||
2187 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
2188 | |||
2189 | if (le32_to_cpu(dx_root->dr_num_entries) != 2) | ||
2190 | priv->seen_other = 1; | ||
2191 | |||
2192 | out: | ||
2193 | brelse(di_bh); | ||
2194 | brelse(dx_root_bh); | ||
2195 | return ret; | ||
2196 | } | ||
2197 | |||
1178 | /* | 2198 | /* |
1179 | * routine to check that the specified directory is empty (for rmdir) | 2199 | * routine to check that the specified directory is empty (for rmdir) |
1180 | * | 2200 | * |
1181 | * Returns 1 if dir is empty, zero otherwise. | 2201 | * Returns 1 if dir is empty, zero otherwise. |
2202 | * | ||
2203 | * XXX: This is a performance problem for unindexed directories. | ||
1182 | */ | 2204 | */ |
1183 | int ocfs2_empty_dir(struct inode *inode) | 2205 | int ocfs2_empty_dir(struct inode *inode) |
1184 | { | 2206 | { |
@@ -1188,6 +2210,16 @@ int ocfs2_empty_dir(struct inode *inode) | |||
1188 | 2210 | ||
1189 | memset(&priv, 0, sizeof(priv)); | 2211 | memset(&priv, 0, sizeof(priv)); |
1190 | 2212 | ||
2213 | if (ocfs2_dir_indexed(inode)) { | ||
2214 | ret = ocfs2_empty_dir_dx(inode, &priv); | ||
2215 | if (ret) | ||
2216 | mlog_errno(ret); | ||
2217 | /* | ||
2218 | * We still run ocfs2_dir_foreach to get the checks | ||
2219 | * for "." and "..". | ||
2220 | */ | ||
2221 | } | ||
2222 | |||
1191 | ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir); | 2223 | ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir); |
1192 | if (ret) | 2224 | if (ret) |
1193 | mlog_errno(ret); | 2225 | mlog_errno(ret); |
@@ -1280,7 +2312,8 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1280 | struct inode *parent, | 2312 | struct inode *parent, |
1281 | struct inode *inode, | 2313 | struct inode *inode, |
1282 | struct buffer_head *fe_bh, | 2314 | struct buffer_head *fe_bh, |
1283 | struct ocfs2_alloc_context *data_ac) | 2315 | struct ocfs2_alloc_context *data_ac, |
2316 | struct buffer_head **ret_new_bh) | ||
1284 | { | 2317 | { |
1285 | int status; | 2318 | int status; |
1286 | unsigned int size = osb->sb->s_blocksize; | 2319 | unsigned int size = osb->sb->s_blocksize; |
@@ -1289,7 +2322,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1289 | 2322 | ||
1290 | mlog_entry_void(); | 2323 | mlog_entry_void(); |
1291 | 2324 | ||
1292 | if (ocfs2_supports_dir_trailer(osb)) | 2325 | if (ocfs2_new_dir_wants_trailer(inode)) |
1293 | size = ocfs2_dir_trailer_blk_off(parent->i_sb); | 2326 | size = ocfs2_dir_trailer_blk_off(parent->i_sb); |
1294 | 2327 | ||
1295 | status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, | 2328 | status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, |
@@ -1310,8 +2343,19 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1310 | memset(new_bh->b_data, 0, osb->sb->s_blocksize); | 2343 | memset(new_bh->b_data, 0, osb->sb->s_blocksize); |
1311 | 2344 | ||
1312 | de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size); | 2345 | de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size); |
1313 | if (ocfs2_supports_dir_trailer(osb)) | 2346 | if (ocfs2_new_dir_wants_trailer(inode)) { |
1314 | ocfs2_init_dir_trailer(inode, new_bh); | 2347 | int size = le16_to_cpu(de->rec_len); |
2348 | |||
2349 | /* | ||
2350 | * Figure out the size of the hole left over after | ||
2351 | * insertion of '.' and '..'. The trailer wants this | ||
2352 | * information. | ||
2353 | */ | ||
2354 | size -= OCFS2_DIR_REC_LEN(2); | ||
2355 | size -= sizeof(struct ocfs2_dir_block_trailer); | ||
2356 | |||
2357 | ocfs2_init_dir_trailer(inode, new_bh, size); | ||
2358 | } | ||
1315 | 2359 | ||
1316 | status = ocfs2_journal_dirty(handle, new_bh); | 2360 | status = ocfs2_journal_dirty(handle, new_bh); |
1317 | if (status < 0) { | 2361 | if (status < 0) { |
@@ -1329,6 +2373,10 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1329 | } | 2373 | } |
1330 | 2374 | ||
1331 | status = 0; | 2375 | status = 0; |
2376 | if (ret_new_bh) { | ||
2377 | *ret_new_bh = new_bh; | ||
2378 | new_bh = NULL; | ||
2379 | } | ||
1332 | bail: | 2380 | bail: |
1333 | brelse(new_bh); | 2381 | brelse(new_bh); |
1334 | 2382 | ||
@@ -1336,20 +2384,427 @@ bail: | |||
1336 | return status; | 2384 | return status; |
1337 | } | 2385 | } |
1338 | 2386 | ||
2387 | static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, | ||
2388 | handle_t *handle, struct inode *dir, | ||
2389 | struct buffer_head *di_bh, | ||
2390 | struct buffer_head *dirdata_bh, | ||
2391 | struct ocfs2_alloc_context *meta_ac, | ||
2392 | int dx_inline, u32 num_entries, | ||
2393 | struct buffer_head **ret_dx_root_bh) | ||
2394 | { | ||
2395 | int ret; | ||
2396 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | ||
2397 | u16 dr_suballoc_bit; | ||
2398 | u64 dr_blkno; | ||
2399 | unsigned int num_bits; | ||
2400 | struct buffer_head *dx_root_bh = NULL; | ||
2401 | struct ocfs2_dx_root_block *dx_root; | ||
2402 | struct ocfs2_dir_block_trailer *trailer = | ||
2403 | ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb); | ||
2404 | |||
2405 | ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit, | ||
2406 | &num_bits, &dr_blkno); | ||
2407 | if (ret) { | ||
2408 | mlog_errno(ret); | ||
2409 | goto out; | ||
2410 | } | ||
2411 | |||
2412 | mlog(0, "Dir %llu, attach new index block: %llu\n", | ||
2413 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
2414 | (unsigned long long)dr_blkno); | ||
2415 | |||
2416 | dx_root_bh = sb_getblk(osb->sb, dr_blkno); | ||
2417 | if (dx_root_bh == NULL) { | ||
2418 | ret = -EIO; | ||
2419 | goto out; | ||
2420 | } | ||
2421 | ocfs2_set_new_buffer_uptodate(dir, dx_root_bh); | ||
2422 | |||
2423 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
2424 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2425 | if (ret < 0) { | ||
2426 | mlog_errno(ret); | ||
2427 | goto out; | ||
2428 | } | ||
2429 | |||
2430 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
2431 | memset(dx_root, 0, osb->sb->s_blocksize); | ||
2432 | strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE); | ||
2433 | dx_root->dr_suballoc_slot = cpu_to_le16(osb->slot_num); | ||
2434 | dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit); | ||
2435 | dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); | ||
2436 | dx_root->dr_blkno = cpu_to_le64(dr_blkno); | ||
2437 | dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno); | ||
2438 | dx_root->dr_num_entries = cpu_to_le32(num_entries); | ||
2439 | if (le16_to_cpu(trailer->db_free_rec_len)) | ||
2440 | dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr); | ||
2441 | else | ||
2442 | dx_root->dr_free_blk = cpu_to_le64(0); | ||
2443 | |||
2444 | if (dx_inline) { | ||
2445 | dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE; | ||
2446 | dx_root->dr_entries.de_count = | ||
2447 | cpu_to_le16(ocfs2_dx_entries_per_root(osb->sb)); | ||
2448 | } else { | ||
2449 | dx_root->dr_list.l_count = | ||
2450 | cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb)); | ||
2451 | } | ||
2452 | |||
2453 | ret = ocfs2_journal_dirty(handle, dx_root_bh); | ||
2454 | if (ret) | ||
2455 | mlog_errno(ret); | ||
2456 | |||
2457 | ret = ocfs2_journal_access_di(handle, dir, di_bh, | ||
2458 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2459 | if (ret) { | ||
2460 | mlog_errno(ret); | ||
2461 | goto out; | ||
2462 | } | ||
2463 | |||
2464 | di->i_dx_root = cpu_to_le64(dr_blkno); | ||
2465 | |||
2466 | OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; | ||
2467 | di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); | ||
2468 | |||
2469 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
2470 | if (ret) | ||
2471 | mlog_errno(ret); | ||
2472 | |||
2473 | *ret_dx_root_bh = dx_root_bh; | ||
2474 | dx_root_bh = NULL; | ||
2475 | |||
2476 | out: | ||
2477 | brelse(dx_root_bh); | ||
2478 | return ret; | ||
2479 | } | ||
2480 | |||
2481 | static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb, | ||
2482 | handle_t *handle, struct inode *dir, | ||
2483 | struct buffer_head **dx_leaves, | ||
2484 | int num_dx_leaves, u64 start_blk) | ||
2485 | { | ||
2486 | int ret, i; | ||
2487 | struct ocfs2_dx_leaf *dx_leaf; | ||
2488 | struct buffer_head *bh; | ||
2489 | |||
2490 | for (i = 0; i < num_dx_leaves; i++) { | ||
2491 | bh = sb_getblk(osb->sb, start_blk + i); | ||
2492 | if (bh == NULL) { | ||
2493 | ret = -EIO; | ||
2494 | goto out; | ||
2495 | } | ||
2496 | dx_leaves[i] = bh; | ||
2497 | |||
2498 | ocfs2_set_new_buffer_uptodate(dir, bh); | ||
2499 | |||
2500 | ret = ocfs2_journal_access_dl(handle, dir, bh, | ||
2501 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2502 | if (ret < 0) { | ||
2503 | mlog_errno(ret); | ||
2504 | goto out; | ||
2505 | } | ||
2506 | |||
2507 | dx_leaf = (struct ocfs2_dx_leaf *) bh->b_data; | ||
2508 | |||
2509 | memset(dx_leaf, 0, osb->sb->s_blocksize); | ||
2510 | strcpy(dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE); | ||
2511 | dx_leaf->dl_fs_generation = cpu_to_le32(osb->fs_generation); | ||
2512 | dx_leaf->dl_blkno = cpu_to_le64(bh->b_blocknr); | ||
2513 | dx_leaf->dl_list.de_count = | ||
2514 | cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb)); | ||
2515 | |||
2516 | mlog(0, | ||
2517 | "Dir %llu, format dx_leaf: %llu, entry count: %u\n", | ||
2518 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
2519 | (unsigned long long)bh->b_blocknr, | ||
2520 | le16_to_cpu(dx_leaf->dl_list.de_count)); | ||
2521 | |||
2522 | ocfs2_journal_dirty(handle, bh); | ||
2523 | } | ||
2524 | |||
2525 | ret = 0; | ||
2526 | out: | ||
2527 | return ret; | ||
2528 | } | ||
2529 | |||
2530 | /* | ||
2531 | * Allocates and formats a new cluster for use in an indexed dir | ||
2532 | * leaf. This version will not do the extent insert, so that it can be | ||
2533 | * used by operations which need careful ordering. | ||
2534 | */ | ||
2535 | static int __ocfs2_dx_dir_new_cluster(struct inode *dir, | ||
2536 | u32 cpos, handle_t *handle, | ||
2537 | struct ocfs2_alloc_context *data_ac, | ||
2538 | struct buffer_head **dx_leaves, | ||
2539 | int num_dx_leaves, u64 *ret_phys_blkno) | ||
2540 | { | ||
2541 | int ret; | ||
2542 | u32 phys, num; | ||
2543 | u64 phys_blkno; | ||
2544 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2545 | |||
2546 | /* | ||
2547 | * XXX: For create, this should claim cluster for the index | ||
2548 | * *before* the unindexed insert so that we have a better | ||
2549 | * chance of contiguousness as the directory grows in number | ||
2550 | * of entries. | ||
2551 | */ | ||
2552 | ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 1, &phys, &num); | ||
2553 | if (ret) { | ||
2554 | mlog_errno(ret); | ||
2555 | goto out; | ||
2556 | } | ||
2557 | |||
2558 | /* | ||
2559 | * Format the new cluster first. That way, we're inserting | ||
2560 | * valid data. | ||
2561 | */ | ||
2562 | phys_blkno = ocfs2_clusters_to_blocks(osb->sb, phys); | ||
2563 | ret = ocfs2_dx_dir_format_cluster(osb, handle, dir, dx_leaves, | ||
2564 | num_dx_leaves, phys_blkno); | ||
2565 | if (ret) { | ||
2566 | mlog_errno(ret); | ||
2567 | goto out; | ||
2568 | } | ||
2569 | |||
2570 | *ret_phys_blkno = phys_blkno; | ||
2571 | out: | ||
2572 | return ret; | ||
2573 | } | ||
2574 | |||
2575 | static int ocfs2_dx_dir_new_cluster(struct inode *dir, | ||
2576 | struct ocfs2_extent_tree *et, | ||
2577 | u32 cpos, handle_t *handle, | ||
2578 | struct ocfs2_alloc_context *data_ac, | ||
2579 | struct ocfs2_alloc_context *meta_ac, | ||
2580 | struct buffer_head **dx_leaves, | ||
2581 | int num_dx_leaves) | ||
2582 | { | ||
2583 | int ret; | ||
2584 | u64 phys_blkno; | ||
2585 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2586 | |||
2587 | ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves, | ||
2588 | num_dx_leaves, &phys_blkno); | ||
2589 | if (ret) { | ||
2590 | mlog_errno(ret); | ||
2591 | goto out; | ||
2592 | } | ||
2593 | |||
2594 | ret = ocfs2_insert_extent(osb, handle, dir, et, cpos, phys_blkno, 1, 0, | ||
2595 | meta_ac); | ||
2596 | if (ret) | ||
2597 | mlog_errno(ret); | ||
2598 | out: | ||
2599 | return ret; | ||
2600 | } | ||
2601 | |||
2602 | static struct buffer_head **ocfs2_dx_dir_kmalloc_leaves(struct super_block *sb, | ||
2603 | int *ret_num_leaves) | ||
2604 | { | ||
2605 | int num_dx_leaves = ocfs2_clusters_to_blocks(sb, 1); | ||
2606 | struct buffer_head **dx_leaves; | ||
2607 | |||
2608 | dx_leaves = kcalloc(num_dx_leaves, sizeof(struct buffer_head *), | ||
2609 | GFP_NOFS); | ||
2610 | if (dx_leaves && ret_num_leaves) | ||
2611 | *ret_num_leaves = num_dx_leaves; | ||
2612 | |||
2613 | return dx_leaves; | ||
2614 | } | ||
2615 | |||
2616 | static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb, | ||
2617 | handle_t *handle, | ||
2618 | struct inode *parent, | ||
2619 | struct inode *inode, | ||
2620 | struct buffer_head *di_bh, | ||
2621 | struct ocfs2_alloc_context *data_ac, | ||
2622 | struct ocfs2_alloc_context *meta_ac) | ||
2623 | { | ||
2624 | int ret; | ||
2625 | struct buffer_head *leaf_bh = NULL; | ||
2626 | struct buffer_head *dx_root_bh = NULL; | ||
2627 | struct ocfs2_dx_hinfo hinfo; | ||
2628 | struct ocfs2_dx_root_block *dx_root; | ||
2629 | struct ocfs2_dx_entry_list *entry_list; | ||
2630 | |||
2631 | /* | ||
2632 | * Our strategy is to create the directory as though it were | ||
2633 | * unindexed, then add the index block. This works with very | ||
2634 | * little complication since the state of a new directory is a | ||
2635 | * very well known quantity. | ||
2636 | * | ||
2637 | * Essentially, we have two dirents ("." and ".."), in the 1st | ||
2638 | * block which need indexing. These are easily inserted into | ||
2639 | * the index block. | ||
2640 | */ | ||
2641 | |||
2642 | ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, di_bh, | ||
2643 | data_ac, &leaf_bh); | ||
2644 | if (ret) { | ||
2645 | mlog_errno(ret); | ||
2646 | goto out; | ||
2647 | } | ||
2648 | |||
2649 | ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, leaf_bh, | ||
2650 | meta_ac, 1, 2, &dx_root_bh); | ||
2651 | if (ret) { | ||
2652 | mlog_errno(ret); | ||
2653 | goto out; | ||
2654 | } | ||
2655 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
2656 | entry_list = &dx_root->dr_entries; | ||
2657 | |||
2658 | /* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */ | ||
2659 | ocfs2_dx_dir_name_hash(inode, ".", 1, &hinfo); | ||
2660 | ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr); | ||
2661 | |||
2662 | ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo); | ||
2663 | ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr); | ||
2664 | |||
2665 | out: | ||
2666 | brelse(dx_root_bh); | ||
2667 | brelse(leaf_bh); | ||
2668 | return ret; | ||
2669 | } | ||
2670 | |||
1339 | int ocfs2_fill_new_dir(struct ocfs2_super *osb, | 2671 | int ocfs2_fill_new_dir(struct ocfs2_super *osb, |
1340 | handle_t *handle, | 2672 | handle_t *handle, |
1341 | struct inode *parent, | 2673 | struct inode *parent, |
1342 | struct inode *inode, | 2674 | struct inode *inode, |
1343 | struct buffer_head *fe_bh, | 2675 | struct buffer_head *fe_bh, |
1344 | struct ocfs2_alloc_context *data_ac) | 2676 | struct ocfs2_alloc_context *data_ac, |
2677 | struct ocfs2_alloc_context *meta_ac) | ||
2678 | |||
1345 | { | 2679 | { |
1346 | BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL); | 2680 | BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL); |
1347 | 2681 | ||
1348 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 2682 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
1349 | return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh); | 2683 | return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh); |
1350 | 2684 | ||
2685 | if (ocfs2_supports_indexed_dirs(osb)) | ||
2686 | return ocfs2_fill_new_dir_dx(osb, handle, parent, inode, fe_bh, | ||
2687 | data_ac, meta_ac); | ||
2688 | |||
1351 | return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh, | 2689 | return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh, |
1352 | data_ac); | 2690 | data_ac, NULL); |
2691 | } | ||
2692 | |||
2693 | static int ocfs2_dx_dir_index_block(struct inode *dir, | ||
2694 | handle_t *handle, | ||
2695 | struct buffer_head **dx_leaves, | ||
2696 | int num_dx_leaves, | ||
2697 | u32 *num_dx_entries, | ||
2698 | struct buffer_head *dirent_bh) | ||
2699 | { | ||
2700 | int ret, namelen, i; | ||
2701 | char *de_buf, *limit; | ||
2702 | struct ocfs2_dir_entry *de; | ||
2703 | struct buffer_head *dx_leaf_bh; | ||
2704 | struct ocfs2_dx_hinfo hinfo; | ||
2705 | u64 dirent_blk = dirent_bh->b_blocknr; | ||
2706 | |||
2707 | de_buf = dirent_bh->b_data; | ||
2708 | limit = de_buf + dir->i_sb->s_blocksize; | ||
2709 | |||
2710 | while (de_buf < limit) { | ||
2711 | de = (struct ocfs2_dir_entry *)de_buf; | ||
2712 | |||
2713 | namelen = de->name_len; | ||
2714 | if (!namelen || !de->inode) | ||
2715 | goto inc; | ||
2716 | |||
2717 | ocfs2_dx_dir_name_hash(dir, de->name, namelen, &hinfo); | ||
2718 | |||
2719 | i = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), &hinfo); | ||
2720 | dx_leaf_bh = dx_leaves[i]; | ||
2721 | |||
2722 | ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &hinfo, | ||
2723 | dirent_blk, dx_leaf_bh); | ||
2724 | if (ret) { | ||
2725 | mlog_errno(ret); | ||
2726 | goto out; | ||
2727 | } | ||
2728 | |||
2729 | *num_dx_entries = *num_dx_entries + 1; | ||
2730 | |||
2731 | inc: | ||
2732 | de_buf += le16_to_cpu(de->rec_len); | ||
2733 | } | ||
2734 | |||
2735 | out: | ||
2736 | return ret; | ||
2737 | } | ||
2738 | |||
2739 | /* | ||
2740 | * XXX: This expects dx_root_bh to already be part of the transaction. | ||
2741 | */ | ||
2742 | static void ocfs2_dx_dir_index_root_block(struct inode *dir, | ||
2743 | struct buffer_head *dx_root_bh, | ||
2744 | struct buffer_head *dirent_bh) | ||
2745 | { | ||
2746 | char *de_buf, *limit; | ||
2747 | struct ocfs2_dx_root_block *dx_root; | ||
2748 | struct ocfs2_dir_entry *de; | ||
2749 | struct ocfs2_dx_hinfo hinfo; | ||
2750 | u64 dirent_blk = dirent_bh->b_blocknr; | ||
2751 | |||
2752 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
2753 | |||
2754 | de_buf = dirent_bh->b_data; | ||
2755 | limit = de_buf + dir->i_sb->s_blocksize; | ||
2756 | |||
2757 | while (de_buf < limit) { | ||
2758 | de = (struct ocfs2_dir_entry *)de_buf; | ||
2759 | |||
2760 | if (!de->name_len || !de->inode) | ||
2761 | goto inc; | ||
2762 | |||
2763 | ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo); | ||
2764 | |||
2765 | mlog(0, | ||
2766 | "dir: %llu, major: 0x%x minor: 0x%x, index: %u, name: %.*s\n", | ||
2767 | (unsigned long long)dir->i_ino, hinfo.major_hash, | ||
2768 | hinfo.minor_hash, | ||
2769 | le16_to_cpu(dx_root->dr_entries.de_num_used), | ||
2770 | de->name_len, de->name); | ||
2771 | |||
2772 | ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo, | ||
2773 | dirent_blk); | ||
2774 | |||
2775 | le32_add_cpu(&dx_root->dr_num_entries, 1); | ||
2776 | inc: | ||
2777 | de_buf += le16_to_cpu(de->rec_len); | ||
2778 | } | ||
2779 | } | ||
2780 | |||
2781 | /* | ||
2782 | * Count the number of inline directory entries in di_bh and compare | ||
2783 | * them against the number of entries we can hold in an inline dx root | ||
2784 | * block. | ||
2785 | */ | ||
2786 | static int ocfs2_new_dx_should_be_inline(struct inode *dir, | ||
2787 | struct buffer_head *di_bh) | ||
2788 | { | ||
2789 | int dirent_count = 0; | ||
2790 | char *de_buf, *limit; | ||
2791 | struct ocfs2_dir_entry *de; | ||
2792 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2793 | |||
2794 | de_buf = di->id2.i_data.id_data; | ||
2795 | limit = de_buf + i_size_read(dir); | ||
2796 | |||
2797 | while (de_buf < limit) { | ||
2798 | de = (struct ocfs2_dir_entry *)de_buf; | ||
2799 | |||
2800 | if (de->name_len && de->inode) | ||
2801 | dirent_count++; | ||
2802 | |||
2803 | de_buf += le16_to_cpu(de->rec_len); | ||
2804 | } | ||
2805 | |||
2806 | /* We are careful to leave room for one extra record. */ | ||
2807 | return dirent_count < ocfs2_dx_entries_per_root(dir->i_sb); | ||
1353 | } | 2808 | } |
1354 | 2809 | ||
1355 | /* | 2810 | /* |
@@ -1358,18 +2813,26 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb, | |||
1358 | * expansion from an inline directory to one with extents. The first dir block | 2813 | * expansion from an inline directory to one with extents. The first dir block |
1359 | * in that case is taken from the inline data portion of the inode block. | 2814 | * in that case is taken from the inline data portion of the inode block. |
1360 | * | 2815 | * |
2816 | * This will also return the largest amount of contiguous space for a dirent | ||
2817 | * in the block. That value is *not* necessarily the last dirent, even after | ||
2818 | * expansion. The directory indexing code wants this value for free space | ||
2819 | * accounting. We do this here since we're already walking the entire dir | ||
2820 | * block. | ||
2821 | * | ||
1361 | * We add the dir trailer if this filesystem wants it. | 2822 | * We add the dir trailer if this filesystem wants it. |
1362 | */ | 2823 | */ |
1363 | static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, | 2824 | static unsigned int ocfs2_expand_last_dirent(char *start, unsigned int old_size, |
1364 | struct super_block *sb) | 2825 | struct inode *dir) |
1365 | { | 2826 | { |
2827 | struct super_block *sb = dir->i_sb; | ||
1366 | struct ocfs2_dir_entry *de; | 2828 | struct ocfs2_dir_entry *de; |
1367 | struct ocfs2_dir_entry *prev_de; | 2829 | struct ocfs2_dir_entry *prev_de; |
1368 | char *de_buf, *limit; | 2830 | char *de_buf, *limit; |
1369 | unsigned int new_size = sb->s_blocksize; | 2831 | unsigned int new_size = sb->s_blocksize; |
1370 | unsigned int bytes; | 2832 | unsigned int bytes, this_hole; |
2833 | unsigned int largest_hole = 0; | ||
1371 | 2834 | ||
1372 | if (ocfs2_supports_dir_trailer(OCFS2_SB(sb))) | 2835 | if (ocfs2_new_dir_wants_trailer(dir)) |
1373 | new_size = ocfs2_dir_trailer_blk_off(sb); | 2836 | new_size = ocfs2_dir_trailer_blk_off(sb); |
1374 | 2837 | ||
1375 | bytes = new_size - old_size; | 2838 | bytes = new_size - old_size; |
@@ -1378,12 +2841,26 @@ static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, | |||
1378 | de_buf = start; | 2841 | de_buf = start; |
1379 | de = (struct ocfs2_dir_entry *)de_buf; | 2842 | de = (struct ocfs2_dir_entry *)de_buf; |
1380 | do { | 2843 | do { |
2844 | this_hole = ocfs2_figure_dirent_hole(de); | ||
2845 | if (this_hole > largest_hole) | ||
2846 | largest_hole = this_hole; | ||
2847 | |||
1381 | prev_de = de; | 2848 | prev_de = de; |
1382 | de_buf += le16_to_cpu(de->rec_len); | 2849 | de_buf += le16_to_cpu(de->rec_len); |
1383 | de = (struct ocfs2_dir_entry *)de_buf; | 2850 | de = (struct ocfs2_dir_entry *)de_buf; |
1384 | } while (de_buf < limit); | 2851 | } while (de_buf < limit); |
1385 | 2852 | ||
1386 | le16_add_cpu(&prev_de->rec_len, bytes); | 2853 | le16_add_cpu(&prev_de->rec_len, bytes); |
2854 | |||
2855 | /* We need to double check this after modification of the final | ||
2856 | * dirent. */ | ||
2857 | this_hole = ocfs2_figure_dirent_hole(prev_de); | ||
2858 | if (this_hole > largest_hole) | ||
2859 | largest_hole = this_hole; | ||
2860 | |||
2861 | if (largest_hole >= OCFS2_DIR_MIN_REC_LEN) | ||
2862 | return largest_hole; | ||
2863 | return 0; | ||
1387 | } | 2864 | } |
1388 | 2865 | ||
1389 | /* | 2866 | /* |
@@ -1396,29 +2873,61 @@ static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, | |||
1396 | */ | 2873 | */ |
1397 | static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | 2874 | static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, |
1398 | unsigned int blocks_wanted, | 2875 | unsigned int blocks_wanted, |
2876 | struct ocfs2_dir_lookup_result *lookup, | ||
1399 | struct buffer_head **first_block_bh) | 2877 | struct buffer_head **first_block_bh) |
1400 | { | 2878 | { |
1401 | u32 alloc, bit_off, len; | 2879 | u32 alloc, dx_alloc, bit_off, len, num_dx_entries = 0; |
1402 | struct super_block *sb = dir->i_sb; | 2880 | struct super_block *sb = dir->i_sb; |
1403 | int ret, credits = ocfs2_inline_to_extents_credits(sb); | 2881 | int ret, i, num_dx_leaves = 0, dx_inline = 0, |
1404 | u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits; | 2882 | credits = ocfs2_inline_to_extents_credits(sb); |
2883 | u64 dx_insert_blkno, blkno, | ||
2884 | bytes = blocks_wanted << sb->s_blocksize_bits; | ||
1405 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | 2885 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
1406 | struct ocfs2_inode_info *oi = OCFS2_I(dir); | 2886 | struct ocfs2_inode_info *oi = OCFS2_I(dir); |
1407 | struct ocfs2_alloc_context *data_ac; | 2887 | struct ocfs2_alloc_context *data_ac; |
2888 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
1408 | struct buffer_head *dirdata_bh = NULL; | 2889 | struct buffer_head *dirdata_bh = NULL; |
2890 | struct buffer_head *dx_root_bh = NULL; | ||
2891 | struct buffer_head **dx_leaves = NULL; | ||
1409 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 2892 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
1410 | handle_t *handle; | 2893 | handle_t *handle; |
1411 | struct ocfs2_extent_tree et; | 2894 | struct ocfs2_extent_tree et; |
1412 | int did_quota = 0; | 2895 | struct ocfs2_extent_tree dx_et; |
2896 | int did_quota = 0, bytes_allocated = 0; | ||
1413 | 2897 | ||
1414 | ocfs2_init_dinode_extent_tree(&et, dir, di_bh); | 2898 | ocfs2_init_dinode_extent_tree(&et, dir, di_bh); |
1415 | 2899 | ||
1416 | alloc = ocfs2_clusters_for_bytes(sb, bytes); | 2900 | alloc = ocfs2_clusters_for_bytes(sb, bytes); |
2901 | dx_alloc = 0; | ||
2902 | |||
2903 | if (ocfs2_supports_indexed_dirs(osb)) { | ||
2904 | credits += ocfs2_add_dir_index_credits(sb); | ||
2905 | |||
2906 | dx_inline = ocfs2_new_dx_should_be_inline(dir, di_bh); | ||
2907 | if (!dx_inline) { | ||
2908 | /* Add one more cluster for an index leaf */ | ||
2909 | dx_alloc++; | ||
2910 | dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb, | ||
2911 | &num_dx_leaves); | ||
2912 | if (!dx_leaves) { | ||
2913 | ret = -ENOMEM; | ||
2914 | mlog_errno(ret); | ||
2915 | goto out; | ||
2916 | } | ||
2917 | } | ||
2918 | |||
2919 | /* This gets us the dx_root */ | ||
2920 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); | ||
2921 | if (ret) { | ||
2922 | mlog_errno(ret); | ||
2923 | goto out; | ||
2924 | } | ||
2925 | } | ||
1417 | 2926 | ||
1418 | /* | 2927 | /* |
1419 | * We should never need more than 2 clusters for this - | 2928 | * We should never need more than 2 clusters for the unindexed |
1420 | * maximum dirent size is far less than one block. In fact, | 2929 | * tree - maximum dirent size is far less than one block. In |
1421 | * the only time we'd need more than one cluster is if | 2930 | * fact, the only time we'd need more than one cluster is if |
1422 | * blocksize == clustersize and the dirent won't fit in the | 2931 | * blocksize == clustersize and the dirent won't fit in the |
1423 | * extra space that the expansion to a single block gives. As | 2932 | * extra space that the expansion to a single block gives. As |
1424 | * of today, that only happens on 4k/4k file systems. | 2933 | * of today, that only happens on 4k/4k file systems. |
@@ -1435,7 +2944,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1435 | 2944 | ||
1436 | /* | 2945 | /* |
1437 | * Prepare for worst case allocation scenario of two separate | 2946 | * Prepare for worst case allocation scenario of two separate |
1438 | * extents. | 2947 | * extents in the unindexed tree. |
1439 | */ | 2948 | */ |
1440 | if (alloc == 2) | 2949 | if (alloc == 2) |
1441 | credits += OCFS2_SUBALLOC_ALLOC; | 2950 | credits += OCFS2_SUBALLOC_ALLOC; |
@@ -1448,11 +2957,29 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1448 | } | 2957 | } |
1449 | 2958 | ||
1450 | if (vfs_dq_alloc_space_nodirty(dir, | 2959 | if (vfs_dq_alloc_space_nodirty(dir, |
1451 | ocfs2_clusters_to_bytes(osb->sb, alloc))) { | 2960 | ocfs2_clusters_to_bytes(osb->sb, |
2961 | alloc + dx_alloc))) { | ||
1452 | ret = -EDQUOT; | 2962 | ret = -EDQUOT; |
1453 | goto out_commit; | 2963 | goto out_commit; |
1454 | } | 2964 | } |
1455 | did_quota = 1; | 2965 | did_quota = 1; |
2966 | |||
2967 | if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { | ||
2968 | /* | ||
2969 | * Allocate our index cluster first, to maximize the | ||
2970 | * possibility that unindexed leaves grow | ||
2971 | * contiguously. | ||
2972 | */ | ||
2973 | ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, | ||
2974 | dx_leaves, num_dx_leaves, | ||
2975 | &dx_insert_blkno); | ||
2976 | if (ret) { | ||
2977 | mlog_errno(ret); | ||
2978 | goto out_commit; | ||
2979 | } | ||
2980 | bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1); | ||
2981 | } | ||
2982 | |||
1456 | /* | 2983 | /* |
1457 | * Try to claim as many clusters as the bitmap can give though | 2984 | * Try to claim as many clusters as the bitmap can give though |
1458 | * if we only get one now, that's enough to continue. The rest | 2985 | * if we only get one now, that's enough to continue. The rest |
@@ -1463,6 +2990,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1463 | mlog_errno(ret); | 2990 | mlog_errno(ret); |
1464 | goto out_commit; | 2991 | goto out_commit; |
1465 | } | 2992 | } |
2993 | bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1); | ||
1466 | 2994 | ||
1467 | /* | 2995 | /* |
1468 | * Operations are carefully ordered so that we set up the new | 2996 | * Operations are carefully ordered so that we set up the new |
@@ -1489,9 +3017,16 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1489 | memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); | 3017 | memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); |
1490 | memset(dirdata_bh->b_data + i_size_read(dir), 0, | 3018 | memset(dirdata_bh->b_data + i_size_read(dir), 0, |
1491 | sb->s_blocksize - i_size_read(dir)); | 3019 | sb->s_blocksize - i_size_read(dir)); |
1492 | ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), sb); | 3020 | i = ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), dir); |
1493 | if (ocfs2_supports_dir_trailer(osb)) | 3021 | if (ocfs2_new_dir_wants_trailer(dir)) { |
1494 | ocfs2_init_dir_trailer(dir, dirdata_bh); | 3022 | /* |
3023 | * Prepare the dir trailer up front. It will otherwise look | ||
3024 | * like a valid dirent. Even if inserting the index fails | ||
3025 | * (unlikely), then all we'll have done is given first dir | ||
3026 | * block a small amount of fragmentation. | ||
3027 | */ | ||
3028 | ocfs2_init_dir_trailer(dir, dirdata_bh, i); | ||
3029 | } | ||
1495 | 3030 | ||
1496 | ret = ocfs2_journal_dirty(handle, dirdata_bh); | 3031 | ret = ocfs2_journal_dirty(handle, dirdata_bh); |
1497 | if (ret) { | 3032 | if (ret) { |
@@ -1499,6 +3034,24 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1499 | goto out_commit; | 3034 | goto out_commit; |
1500 | } | 3035 | } |
1501 | 3036 | ||
3037 | if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { | ||
3038 | /* | ||
3039 | * Dx dirs with an external cluster need to do this up | ||
3040 | * front. Inline dx root's get handled later, after | ||
3041 | * we've allocated our root block. We get passed back | ||
3042 | * a total number of items so that dr_num_entries can | ||
3043 | * be correctly set once the dx_root has been | ||
3044 | * allocated. | ||
3045 | */ | ||
3046 | ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves, | ||
3047 | num_dx_leaves, &num_dx_entries, | ||
3048 | dirdata_bh); | ||
3049 | if (ret) { | ||
3050 | mlog_errno(ret); | ||
3051 | goto out_commit; | ||
3052 | } | ||
3053 | } | ||
3054 | |||
1502 | /* | 3055 | /* |
1503 | * Set extent, i_size, etc on the directory. After this, the | 3056 | * Set extent, i_size, etc on the directory. After this, the |
1504 | * inode should contain the same exact dirents as before and | 3057 | * inode should contain the same exact dirents as before and |
@@ -1551,6 +3104,27 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1551 | goto out_commit; | 3104 | goto out_commit; |
1552 | } | 3105 | } |
1553 | 3106 | ||
3107 | if (ocfs2_supports_indexed_dirs(osb)) { | ||
3108 | ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh, | ||
3109 | dirdata_bh, meta_ac, dx_inline, | ||
3110 | num_dx_entries, &dx_root_bh); | ||
3111 | if (ret) { | ||
3112 | mlog_errno(ret); | ||
3113 | goto out_commit; | ||
3114 | } | ||
3115 | |||
3116 | if (dx_inline) { | ||
3117 | ocfs2_dx_dir_index_root_block(dir, dx_root_bh, | ||
3118 | dirdata_bh); | ||
3119 | } else { | ||
3120 | ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh); | ||
3121 | ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0, | ||
3122 | dx_insert_blkno, 1, 0, NULL); | ||
3123 | if (ret) | ||
3124 | mlog_errno(ret); | ||
3125 | } | ||
3126 | } | ||
3127 | |||
1554 | /* | 3128 | /* |
1555 | * We asked for two clusters, but only got one in the 1st | 3129 | * We asked for two clusters, but only got one in the 1st |
1556 | * pass. Claim the 2nd cluster as a separate extent. | 3130 | * pass. Claim the 2nd cluster as a separate extent. |
@@ -1570,15 +3144,32 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1570 | mlog_errno(ret); | 3144 | mlog_errno(ret); |
1571 | goto out_commit; | 3145 | goto out_commit; |
1572 | } | 3146 | } |
3147 | bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1); | ||
1573 | } | 3148 | } |
1574 | 3149 | ||
1575 | *first_block_bh = dirdata_bh; | 3150 | *first_block_bh = dirdata_bh; |
1576 | dirdata_bh = NULL; | 3151 | dirdata_bh = NULL; |
3152 | if (ocfs2_supports_indexed_dirs(osb)) { | ||
3153 | unsigned int off; | ||
3154 | |||
3155 | if (!dx_inline) { | ||
3156 | /* | ||
3157 | * We need to return the correct block within the | ||
3158 | * cluster which should hold our entry. | ||
3159 | */ | ||
3160 | off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), | ||
3161 | &lookup->dl_hinfo); | ||
3162 | get_bh(dx_leaves[off]); | ||
3163 | lookup->dl_dx_leaf_bh = dx_leaves[off]; | ||
3164 | } | ||
3165 | lookup->dl_dx_root_bh = dx_root_bh; | ||
3166 | dx_root_bh = NULL; | ||
3167 | } | ||
1577 | 3168 | ||
1578 | out_commit: | 3169 | out_commit: |
1579 | if (ret < 0 && did_quota) | 3170 | if (ret < 0 && did_quota) |
1580 | vfs_dq_free_space_nodirty(dir, | 3171 | vfs_dq_free_space_nodirty(dir, bytes_allocated); |
1581 | ocfs2_clusters_to_bytes(osb->sb, 2)); | 3172 | |
1582 | ocfs2_commit_trans(osb, handle); | 3173 | ocfs2_commit_trans(osb, handle); |
1583 | 3174 | ||
1584 | out_sem: | 3175 | out_sem: |
@@ -1587,8 +3178,17 @@ out_sem: | |||
1587 | out: | 3178 | out: |
1588 | if (data_ac) | 3179 | if (data_ac) |
1589 | ocfs2_free_alloc_context(data_ac); | 3180 | ocfs2_free_alloc_context(data_ac); |
3181 | if (meta_ac) | ||
3182 | ocfs2_free_alloc_context(meta_ac); | ||
3183 | |||
3184 | if (dx_leaves) { | ||
3185 | for (i = 0; i < num_dx_leaves; i++) | ||
3186 | brelse(dx_leaves[i]); | ||
3187 | kfree(dx_leaves); | ||
3188 | } | ||
1590 | 3189 | ||
1591 | brelse(dirdata_bh); | 3190 | brelse(dirdata_bh); |
3191 | brelse(dx_root_bh); | ||
1592 | 3192 | ||
1593 | return ret; | 3193 | return ret; |
1594 | } | 3194 | } |
@@ -1658,11 +3258,14 @@ bail: | |||
1658 | * is to be turned into an extent based one. The size of the dirent to | 3258 | * is to be turned into an extent based one. The size of the dirent to |
1659 | * insert might be larger than the space gained by growing to just one | 3259 | * insert might be larger than the space gained by growing to just one |
1660 | * block, so we may have to grow the inode by two blocks in that case. | 3260 | * block, so we may have to grow the inode by two blocks in that case. |
3261 | * | ||
3262 | * If the directory is already indexed, dx_root_bh must be provided. | ||
1661 | */ | 3263 | */ |
1662 | static int ocfs2_extend_dir(struct ocfs2_super *osb, | 3264 | static int ocfs2_extend_dir(struct ocfs2_super *osb, |
1663 | struct inode *dir, | 3265 | struct inode *dir, |
1664 | struct buffer_head *parent_fe_bh, | 3266 | struct buffer_head *parent_fe_bh, |
1665 | unsigned int blocks_wanted, | 3267 | unsigned int blocks_wanted, |
3268 | struct ocfs2_dir_lookup_result *lookup, | ||
1666 | struct buffer_head **new_de_bh) | 3269 | struct buffer_head **new_de_bh) |
1667 | { | 3270 | { |
1668 | int status = 0; | 3271 | int status = 0; |
@@ -1677,17 +3280,29 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1677 | struct ocfs2_dir_entry * de; | 3280 | struct ocfs2_dir_entry * de; |
1678 | struct super_block *sb = osb->sb; | 3281 | struct super_block *sb = osb->sb; |
1679 | struct ocfs2_extent_tree et; | 3282 | struct ocfs2_extent_tree et; |
3283 | struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; | ||
1680 | 3284 | ||
1681 | mlog_entry_void(); | 3285 | mlog_entry_void(); |
1682 | 3286 | ||
1683 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 3287 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
3288 | /* | ||
3289 | * This would be a code error as an inline directory should | ||
3290 | * never have an index root. | ||
3291 | */ | ||
3292 | BUG_ON(dx_root_bh); | ||
3293 | |||
1684 | status = ocfs2_expand_inline_dir(dir, parent_fe_bh, | 3294 | status = ocfs2_expand_inline_dir(dir, parent_fe_bh, |
1685 | blocks_wanted, &new_bh); | 3295 | blocks_wanted, lookup, |
3296 | &new_bh); | ||
1686 | if (status) { | 3297 | if (status) { |
1687 | mlog_errno(status); | 3298 | mlog_errno(status); |
1688 | goto bail; | 3299 | goto bail; |
1689 | } | 3300 | } |
1690 | 3301 | ||
3302 | /* Expansion from inline to an indexed directory will | ||
3303 | * have given us this. */ | ||
3304 | dx_root_bh = lookup->dl_dx_root_bh; | ||
3305 | |||
1691 | if (blocks_wanted == 1) { | 3306 | if (blocks_wanted == 1) { |
1692 | /* | 3307 | /* |
1693 | * If the new dirent will fit inside the space | 3308 | * If the new dirent will fit inside the space |
@@ -1751,6 +3366,10 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1751 | } | 3366 | } |
1752 | 3367 | ||
1753 | do_extend: | 3368 | do_extend: |
3369 | if (ocfs2_dir_indexed(dir)) | ||
3370 | credits++; /* For attaching the new dirent block to the | ||
3371 | * dx_root */ | ||
3372 | |||
1754 | down_write(&OCFS2_I(dir)->ip_alloc_sem); | 3373 | down_write(&OCFS2_I(dir)->ip_alloc_sem); |
1755 | drop_alloc_sem = 1; | 3374 | drop_alloc_sem = 1; |
1756 | 3375 | ||
@@ -1781,9 +3400,19 @@ do_extend: | |||
1781 | 3400 | ||
1782 | de = (struct ocfs2_dir_entry *) new_bh->b_data; | 3401 | de = (struct ocfs2_dir_entry *) new_bh->b_data; |
1783 | de->inode = 0; | 3402 | de->inode = 0; |
1784 | if (ocfs2_dir_has_trailer(dir)) { | 3403 | if (ocfs2_supports_dir_trailer(dir)) { |
1785 | de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb)); | 3404 | de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb)); |
1786 | ocfs2_init_dir_trailer(dir, new_bh); | 3405 | |
3406 | ocfs2_init_dir_trailer(dir, new_bh, le16_to_cpu(de->rec_len)); | ||
3407 | |||
3408 | if (ocfs2_dir_indexed(dir)) { | ||
3409 | status = ocfs2_dx_dir_link_trailer(dir, handle, | ||
3410 | dx_root_bh, new_bh); | ||
3411 | if (status) { | ||
3412 | mlog_errno(status); | ||
3413 | goto bail; | ||
3414 | } | ||
3415 | } | ||
1787 | } else { | 3416 | } else { |
1788 | de->rec_len = cpu_to_le16(sb->s_blocksize); | 3417 | de->rec_len = cpu_to_le16(sb->s_blocksize); |
1789 | } | 3418 | } |
@@ -1839,7 +3468,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, | |||
1839 | * This calculates how many free bytes we'd have in block zero, should | 3468 | * This calculates how many free bytes we'd have in block zero, should |
1840 | * this function force expansion to an extent tree. | 3469 | * this function force expansion to an extent tree. |
1841 | */ | 3470 | */ |
1842 | if (ocfs2_supports_dir_trailer(OCFS2_SB(sb))) | 3471 | if (ocfs2_new_dir_wants_trailer(dir)) |
1843 | free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir); | 3472 | free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir); |
1844 | else | 3473 | else |
1845 | free_space = dir->i_sb->s_blocksize - i_size_read(dir); | 3474 | free_space = dir->i_sb->s_blocksize - i_size_read(dir); |
@@ -1970,12 +3599,766 @@ bail: | |||
1970 | return status; | 3599 | return status; |
1971 | } | 3600 | } |
1972 | 3601 | ||
3602 | static int dx_leaf_sort_cmp(const void *a, const void *b) | ||
3603 | { | ||
3604 | const struct ocfs2_dx_entry *entry1 = a; | ||
3605 | const struct ocfs2_dx_entry *entry2 = b; | ||
3606 | u32 major_hash1 = le32_to_cpu(entry1->dx_major_hash); | ||
3607 | u32 major_hash2 = le32_to_cpu(entry2->dx_major_hash); | ||
3608 | u32 minor_hash1 = le32_to_cpu(entry1->dx_minor_hash); | ||
3609 | u32 minor_hash2 = le32_to_cpu(entry2->dx_minor_hash); | ||
3610 | |||
3611 | if (major_hash1 > major_hash2) | ||
3612 | return 1; | ||
3613 | if (major_hash1 < major_hash2) | ||
3614 | return -1; | ||
3615 | |||
3616 | /* | ||
3617 | * It is not strictly necessary to sort by minor | ||
3618 | */ | ||
3619 | if (minor_hash1 > minor_hash2) | ||
3620 | return 1; | ||
3621 | if (minor_hash1 < minor_hash2) | ||
3622 | return -1; | ||
3623 | return 0; | ||
3624 | } | ||
3625 | |||
3626 | static void dx_leaf_sort_swap(void *a, void *b, int size) | ||
3627 | { | ||
3628 | struct ocfs2_dx_entry *entry1 = a; | ||
3629 | struct ocfs2_dx_entry *entry2 = b; | ||
3630 | struct ocfs2_dx_entry tmp; | ||
3631 | |||
3632 | BUG_ON(size != sizeof(*entry1)); | ||
3633 | |||
3634 | tmp = *entry1; | ||
3635 | *entry1 = *entry2; | ||
3636 | *entry2 = tmp; | ||
3637 | } | ||
3638 | |||
3639 | static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf) | ||
3640 | { | ||
3641 | struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; | ||
3642 | int i, num = le16_to_cpu(dl_list->de_num_used); | ||
3643 | |||
3644 | for (i = 0; i < (num - 1); i++) { | ||
3645 | if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) != | ||
3646 | le32_to_cpu(dl_list->de_entries[i + 1].dx_major_hash)) | ||
3647 | return 0; | ||
3648 | } | ||
3649 | |||
3650 | return 1; | ||
3651 | } | ||
3652 | |||
3653 | /* | ||
3654 | * Find the optimal value to split this leaf on. This expects the leaf | ||
3655 | * entries to be in sorted order. | ||
3656 | * | ||
3657 | * leaf_cpos is the cpos of the leaf we're splitting. insert_hash is | ||
3658 | * the hash we want to insert. | ||
3659 | * | ||
3660 | * This function is only concerned with the major hash - that which | ||
3661 | * determines which cluster an item belongs to. | ||
3662 | */ | ||
3663 | static int ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf *dx_leaf, | ||
3664 | u32 leaf_cpos, u32 insert_hash, | ||
3665 | u32 *split_hash) | ||
3666 | { | ||
3667 | struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; | ||
3668 | int i, num_used = le16_to_cpu(dl_list->de_num_used); | ||
3669 | int allsame; | ||
3670 | |||
3671 | /* | ||
3672 | * There's a couple rare, but nasty corner cases we have to | ||
3673 | * check for here. All of them involve a leaf where all value | ||
3674 | * have the same hash, which is what we look for first. | ||
3675 | * | ||
3676 | * Most of the time, all of the above is false, and we simply | ||
3677 | * pick the median value for a split. | ||
3678 | */ | ||
3679 | allsame = ocfs2_dx_leaf_same_major(dx_leaf); | ||
3680 | if (allsame) { | ||
3681 | u32 val = le32_to_cpu(dl_list->de_entries[0].dx_major_hash); | ||
3682 | |||
3683 | if (val == insert_hash) { | ||
3684 | /* | ||
3685 | * No matter where we would choose to split, | ||
3686 | * the new entry would want to occupy the same | ||
3687 | * block as these. Since there's no space left | ||
3688 | * in their existing block, we know there | ||
3689 | * won't be space after the split. | ||
3690 | */ | ||
3691 | return -ENOSPC; | ||
3692 | } | ||
3693 | |||
3694 | if (val == leaf_cpos) { | ||
3695 | /* | ||
3696 | * Because val is the same as leaf_cpos (which | ||
3697 | * is the smallest value this leaf can have), | ||
3698 | * yet is not equal to insert_hash, then we | ||
3699 | * know that insert_hash *must* be larger than | ||
3700 | * val (and leaf_cpos). At least cpos+1 in value. | ||
3701 | * | ||
3702 | * We also know then, that there cannot be an | ||
3703 | * adjacent extent (otherwise we'd be looking | ||
3704 | * at it). Choosing this value gives us a | ||
3705 | * chance to get some contiguousness. | ||
3706 | */ | ||
3707 | *split_hash = leaf_cpos + 1; | ||
3708 | return 0; | ||
3709 | } | ||
3710 | |||
3711 | if (val > insert_hash) { | ||
3712 | /* | ||
3713 | * val can not be the same as insert hash, and | ||
3714 | * also must be larger than leaf_cpos. Also, | ||
3715 | * we know that there can't be a leaf between | ||
3716 | * cpos and val, otherwise the entries with | ||
3717 | * hash 'val' would be there. | ||
3718 | */ | ||
3719 | *split_hash = val; | ||
3720 | return 0; | ||
3721 | } | ||
3722 | |||
3723 | *split_hash = insert_hash; | ||
3724 | return 0; | ||
3725 | } | ||
3726 | |||
3727 | /* | ||
3728 | * Since the records are sorted and the checks above | ||
3729 | * guaranteed that not all records in this block are the same, | ||
3730 | * we simple travel forward, from the median, and pick the 1st | ||
3731 | * record whose value is larger than leaf_cpos. | ||
3732 | */ | ||
3733 | for (i = (num_used / 2); i < num_used; i++) | ||
3734 | if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) > | ||
3735 | leaf_cpos) | ||
3736 | break; | ||
3737 | |||
3738 | BUG_ON(i == num_used); /* Should be impossible */ | ||
3739 | *split_hash = le32_to_cpu(dl_list->de_entries[i].dx_major_hash); | ||
3740 | return 0; | ||
3741 | } | ||
3742 | |||
3743 | /* | ||
3744 | * Transfer all entries in orig_dx_leaves whose major hash is equal to or | ||
3745 | * larger than split_hash into new_dx_leaves. We use a temporary | ||
3746 | * buffer (tmp_dx_leaf) to make the changes to the original leaf blocks. | ||
3747 | * | ||
3748 | * Since the block offset inside a leaf (cluster) is a constant mask | ||
3749 | * of minor_hash, we can optimize - an item at block offset X within | ||
3750 | * the original cluster, will be at offset X within the new cluster. | ||
3751 | */ | ||
3752 | static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash, | ||
3753 | handle_t *handle, | ||
3754 | struct ocfs2_dx_leaf *tmp_dx_leaf, | ||
3755 | struct buffer_head **orig_dx_leaves, | ||
3756 | struct buffer_head **new_dx_leaves, | ||
3757 | int num_dx_leaves) | ||
3758 | { | ||
3759 | int i, j, num_used; | ||
3760 | u32 major_hash; | ||
3761 | struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf; | ||
3762 | struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list; | ||
3763 | struct ocfs2_dx_entry *dx_entry; | ||
3764 | |||
3765 | tmp_list = &tmp_dx_leaf->dl_list; | ||
3766 | |||
3767 | for (i = 0; i < num_dx_leaves; i++) { | ||
3768 | orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data; | ||
3769 | orig_list = &orig_dx_leaf->dl_list; | ||
3770 | new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data; | ||
3771 | new_list = &new_dx_leaf->dl_list; | ||
3772 | |||
3773 | num_used = le16_to_cpu(orig_list->de_num_used); | ||
3774 | |||
3775 | memcpy(tmp_dx_leaf, orig_dx_leaf, dir->i_sb->s_blocksize); | ||
3776 | tmp_list->de_num_used = cpu_to_le16(0); | ||
3777 | memset(&tmp_list->de_entries, 0, sizeof(*dx_entry)*num_used); | ||
3778 | |||
3779 | for (j = 0; j < num_used; j++) { | ||
3780 | dx_entry = &orig_list->de_entries[j]; | ||
3781 | major_hash = le32_to_cpu(dx_entry->dx_major_hash); | ||
3782 | if (major_hash >= split_hash) | ||
3783 | ocfs2_dx_dir_leaf_insert_tail(new_dx_leaf, | ||
3784 | dx_entry); | ||
3785 | else | ||
3786 | ocfs2_dx_dir_leaf_insert_tail(tmp_dx_leaf, | ||
3787 | dx_entry); | ||
3788 | } | ||
3789 | memcpy(orig_dx_leaf, tmp_dx_leaf, dir->i_sb->s_blocksize); | ||
3790 | |||
3791 | ocfs2_journal_dirty(handle, orig_dx_leaves[i]); | ||
3792 | ocfs2_journal_dirty(handle, new_dx_leaves[i]); | ||
3793 | } | ||
3794 | } | ||
3795 | |||
3796 | static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb, | ||
3797 | struct ocfs2_dx_root_block *dx_root) | ||
3798 | { | ||
3799 | int credits = ocfs2_clusters_to_blocks(osb->sb, 2); | ||
3800 | |||
3801 | credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list, 1); | ||
3802 | credits += ocfs2_quota_trans_credits(osb->sb); | ||
3803 | return credits; | ||
3804 | } | ||
3805 | |||
3806 | /* | ||
3807 | * Find the median value in dx_leaf_bh and allocate a new leaf to move | ||
3808 | * half our entries into. | ||
3809 | */ | ||
3810 | static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | ||
3811 | struct buffer_head *dx_root_bh, | ||
3812 | struct buffer_head *dx_leaf_bh, | ||
3813 | struct ocfs2_dx_hinfo *hinfo, u32 leaf_cpos, | ||
3814 | u64 leaf_blkno) | ||
3815 | { | ||
3816 | struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; | ||
3817 | int credits, ret, i, num_used, did_quota = 0; | ||
3818 | u32 cpos, split_hash, insert_hash = hinfo->major_hash; | ||
3819 | u64 orig_leaves_start; | ||
3820 | int num_dx_leaves; | ||
3821 | struct buffer_head **orig_dx_leaves = NULL; | ||
3822 | struct buffer_head **new_dx_leaves = NULL; | ||
3823 | struct ocfs2_alloc_context *data_ac = NULL, *meta_ac = NULL; | ||
3824 | struct ocfs2_extent_tree et; | ||
3825 | handle_t *handle = NULL; | ||
3826 | struct ocfs2_dx_root_block *dx_root; | ||
3827 | struct ocfs2_dx_leaf *tmp_dx_leaf = NULL; | ||
3828 | |||
3829 | mlog(0, "DX Dir: %llu, rebalance leaf leaf_blkno: %llu insert: %u\n", | ||
3830 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
3831 | (unsigned long long)leaf_blkno, insert_hash); | ||
3832 | |||
3833 | ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); | ||
3834 | |||
3835 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
3836 | /* | ||
3837 | * XXX: This is a rather large limit. We should use a more | ||
3838 | * realistic value. | ||
3839 | */ | ||
3840 | if (le32_to_cpu(dx_root->dr_clusters) == UINT_MAX) | ||
3841 | return -ENOSPC; | ||
3842 | |||
3843 | num_used = le16_to_cpu(dx_leaf->dl_list.de_num_used); | ||
3844 | if (num_used < le16_to_cpu(dx_leaf->dl_list.de_count)) { | ||
3845 | mlog(ML_ERROR, "DX Dir: %llu, Asked to rebalance empty leaf: " | ||
3846 | "%llu, %d\n", (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
3847 | (unsigned long long)leaf_blkno, num_used); | ||
3848 | ret = -EIO; | ||
3849 | goto out; | ||
3850 | } | ||
3851 | |||
3852 | orig_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves); | ||
3853 | if (!orig_dx_leaves) { | ||
3854 | ret = -ENOMEM; | ||
3855 | mlog_errno(ret); | ||
3856 | goto out; | ||
3857 | } | ||
3858 | |||
3859 | new_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, NULL); | ||
3860 | if (!new_dx_leaves) { | ||
3861 | ret = -ENOMEM; | ||
3862 | mlog_errno(ret); | ||
3863 | goto out; | ||
3864 | } | ||
3865 | |||
3866 | ret = ocfs2_lock_allocators(dir, &et, 1, 0, &data_ac, &meta_ac); | ||
3867 | if (ret) { | ||
3868 | if (ret != -ENOSPC) | ||
3869 | mlog_errno(ret); | ||
3870 | goto out; | ||
3871 | } | ||
3872 | |||
3873 | credits = ocfs2_dx_dir_rebalance_credits(osb, dx_root); | ||
3874 | handle = ocfs2_start_trans(osb, credits); | ||
3875 | if (IS_ERR(handle)) { | ||
3876 | ret = PTR_ERR(handle); | ||
3877 | handle = NULL; | ||
3878 | mlog_errno(ret); | ||
3879 | goto out; | ||
3880 | } | ||
3881 | |||
3882 | if (vfs_dq_alloc_space_nodirty(dir, | ||
3883 | ocfs2_clusters_to_bytes(dir->i_sb, 1))) { | ||
3884 | ret = -EDQUOT; | ||
3885 | goto out_commit; | ||
3886 | } | ||
3887 | did_quota = 1; | ||
3888 | |||
3889 | ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, | ||
3890 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3891 | if (ret) { | ||
3892 | mlog_errno(ret); | ||
3893 | goto out_commit; | ||
3894 | } | ||
3895 | |||
3896 | /* | ||
3897 | * This block is changing anyway, so we can sort it in place. | ||
3898 | */ | ||
3899 | sort(dx_leaf->dl_list.de_entries, num_used, | ||
3900 | sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp, | ||
3901 | dx_leaf_sort_swap); | ||
3902 | |||
3903 | ret = ocfs2_journal_dirty(handle, dx_leaf_bh); | ||
3904 | if (ret) { | ||
3905 | mlog_errno(ret); | ||
3906 | goto out_commit; | ||
3907 | } | ||
3908 | |||
3909 | ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash, | ||
3910 | &split_hash); | ||
3911 | if (ret) { | ||
3912 | mlog_errno(ret); | ||
3913 | goto out_commit; | ||
3914 | } | ||
3915 | |||
3916 | mlog(0, "Split leaf (%u) at %u, insert major hash is %u\n", | ||
3917 | leaf_cpos, split_hash, insert_hash); | ||
3918 | |||
3919 | /* | ||
3920 | * We have to carefully order operations here. There are items | ||
3921 | * which want to be in the new cluster before insert, but in | ||
3922 | * order to put those items in the new cluster, we alter the | ||
3923 | * old cluster. A failure to insert gets nasty. | ||
3924 | * | ||
3925 | * So, start by reserving writes to the old | ||
3926 | * cluster. ocfs2_dx_dir_new_cluster will reserve writes on | ||
3927 | * the new cluster for us, before inserting it. The insert | ||
3928 | * won't happen if there's an error before that. Once the | ||
3929 | * insert is done then, we can transfer from one leaf into the | ||
3930 | * other without fear of hitting any error. | ||
3931 | */ | ||
3932 | |||
3933 | /* | ||
3934 | * The leaf transfer wants some scratch space so that we don't | ||
3935 | * wind up doing a bunch of expensive memmove(). | ||
3936 | */ | ||
3937 | tmp_dx_leaf = kmalloc(osb->sb->s_blocksize, GFP_NOFS); | ||
3938 | if (!tmp_dx_leaf) { | ||
3939 | ret = -ENOMEM; | ||
3940 | mlog_errno(ret); | ||
3941 | goto out_commit; | ||
3942 | } | ||
3943 | |||
3944 | orig_leaves_start = ocfs2_block_to_cluster_start(dir->i_sb, leaf_blkno); | ||
3945 | ret = ocfs2_read_dx_leaves(dir, orig_leaves_start, num_dx_leaves, | ||
3946 | orig_dx_leaves); | ||
3947 | if (ret) { | ||
3948 | mlog_errno(ret); | ||
3949 | goto out_commit; | ||
3950 | } | ||
3951 | |||
3952 | for (i = 0; i < num_dx_leaves; i++) { | ||
3953 | ret = ocfs2_journal_access_dl(handle, dir, orig_dx_leaves[i], | ||
3954 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3955 | if (ret) { | ||
3956 | mlog_errno(ret); | ||
3957 | goto out_commit; | ||
3958 | } | ||
3959 | } | ||
3960 | |||
3961 | cpos = split_hash; | ||
3962 | ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, | ||
3963 | data_ac, meta_ac, new_dx_leaves, | ||
3964 | num_dx_leaves); | ||
3965 | if (ret) { | ||
3966 | mlog_errno(ret); | ||
3967 | goto out_commit; | ||
3968 | } | ||
3969 | |||
3970 | ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, | ||
3971 | orig_dx_leaves, new_dx_leaves, num_dx_leaves); | ||
3972 | |||
3973 | out_commit: | ||
3974 | if (ret < 0 && did_quota) | ||
3975 | vfs_dq_free_space_nodirty(dir, | ||
3976 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); | ||
3977 | |||
3978 | ocfs2_commit_trans(osb, handle); | ||
3979 | |||
3980 | out: | ||
3981 | if (orig_dx_leaves || new_dx_leaves) { | ||
3982 | for (i = 0; i < num_dx_leaves; i++) { | ||
3983 | if (orig_dx_leaves) | ||
3984 | brelse(orig_dx_leaves[i]); | ||
3985 | if (new_dx_leaves) | ||
3986 | brelse(new_dx_leaves[i]); | ||
3987 | } | ||
3988 | kfree(orig_dx_leaves); | ||
3989 | kfree(new_dx_leaves); | ||
3990 | } | ||
3991 | |||
3992 | if (meta_ac) | ||
3993 | ocfs2_free_alloc_context(meta_ac); | ||
3994 | if (data_ac) | ||
3995 | ocfs2_free_alloc_context(data_ac); | ||
3996 | |||
3997 | kfree(tmp_dx_leaf); | ||
3998 | return ret; | ||
3999 | } | ||
4000 | |||
4001 | static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir, | ||
4002 | struct buffer_head *di_bh, | ||
4003 | struct buffer_head *dx_root_bh, | ||
4004 | const char *name, int namelen, | ||
4005 | struct ocfs2_dir_lookup_result *lookup) | ||
4006 | { | ||
4007 | int ret, rebalanced = 0; | ||
4008 | struct ocfs2_dx_root_block *dx_root; | ||
4009 | struct buffer_head *dx_leaf_bh = NULL; | ||
4010 | struct ocfs2_dx_leaf *dx_leaf; | ||
4011 | u64 blkno; | ||
4012 | u32 leaf_cpos; | ||
4013 | |||
4014 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
4015 | |||
4016 | restart_search: | ||
4017 | ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo, | ||
4018 | &leaf_cpos, &blkno); | ||
4019 | if (ret) { | ||
4020 | mlog_errno(ret); | ||
4021 | goto out; | ||
4022 | } | ||
4023 | |||
4024 | ret = ocfs2_read_dx_leaf(dir, blkno, &dx_leaf_bh); | ||
4025 | if (ret) { | ||
4026 | mlog_errno(ret); | ||
4027 | goto out; | ||
4028 | } | ||
4029 | |||
4030 | dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; | ||
4031 | |||
4032 | if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >= | ||
4033 | le16_to_cpu(dx_leaf->dl_list.de_count)) { | ||
4034 | if (rebalanced) { | ||
4035 | /* | ||
4036 | * Rebalancing should have provided us with | ||
4037 | * space in an appropriate leaf. | ||
4038 | * | ||
4039 | * XXX: Is this an abnormal condition then? | ||
4040 | * Should we print a message here? | ||
4041 | */ | ||
4042 | ret = -ENOSPC; | ||
4043 | goto out; | ||
4044 | } | ||
4045 | |||
4046 | ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh, | ||
4047 | &lookup->dl_hinfo, leaf_cpos, | ||
4048 | blkno); | ||
4049 | if (ret) { | ||
4050 | if (ret != -ENOSPC) | ||
4051 | mlog_errno(ret); | ||
4052 | goto out; | ||
4053 | } | ||
4054 | |||
4055 | /* | ||
4056 | * Restart the lookup. The rebalance might have | ||
4057 | * changed which block our item fits into. Mark our | ||
4058 | * progress, so we only execute this once. | ||
4059 | */ | ||
4060 | brelse(dx_leaf_bh); | ||
4061 | dx_leaf_bh = NULL; | ||
4062 | rebalanced = 1; | ||
4063 | goto restart_search; | ||
4064 | } | ||
4065 | |||
4066 | lookup->dl_dx_leaf_bh = dx_leaf_bh; | ||
4067 | dx_leaf_bh = NULL; | ||
4068 | |||
4069 | out: | ||
4070 | brelse(dx_leaf_bh); | ||
4071 | return ret; | ||
4072 | } | ||
4073 | |||
4074 | static int ocfs2_search_dx_free_list(struct inode *dir, | ||
4075 | struct buffer_head *dx_root_bh, | ||
4076 | int namelen, | ||
4077 | struct ocfs2_dir_lookup_result *lookup) | ||
4078 | { | ||
4079 | int ret = -ENOSPC; | ||
4080 | struct buffer_head *leaf_bh = NULL, *prev_leaf_bh = NULL; | ||
4081 | struct ocfs2_dir_block_trailer *db; | ||
4082 | u64 next_block; | ||
4083 | int rec_len = OCFS2_DIR_REC_LEN(namelen); | ||
4084 | struct ocfs2_dx_root_block *dx_root; | ||
4085 | |||
4086 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
4087 | next_block = le64_to_cpu(dx_root->dr_free_blk); | ||
4088 | |||
4089 | while (next_block) { | ||
4090 | brelse(prev_leaf_bh); | ||
4091 | prev_leaf_bh = leaf_bh; | ||
4092 | leaf_bh = NULL; | ||
4093 | |||
4094 | ret = ocfs2_read_dir_block_direct(dir, next_block, &leaf_bh); | ||
4095 | if (ret) { | ||
4096 | mlog_errno(ret); | ||
4097 | goto out; | ||
4098 | } | ||
4099 | |||
4100 | db = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb); | ||
4101 | if (rec_len <= le16_to_cpu(db->db_free_rec_len)) { | ||
4102 | lookup->dl_leaf_bh = leaf_bh; | ||
4103 | lookup->dl_prev_leaf_bh = prev_leaf_bh; | ||
4104 | leaf_bh = NULL; | ||
4105 | prev_leaf_bh = NULL; | ||
4106 | break; | ||
4107 | } | ||
4108 | |||
4109 | next_block = le64_to_cpu(db->db_free_next); | ||
4110 | } | ||
4111 | |||
4112 | if (!next_block) | ||
4113 | ret = -ENOSPC; | ||
4114 | |||
4115 | out: | ||
4116 | |||
4117 | brelse(leaf_bh); | ||
4118 | brelse(prev_leaf_bh); | ||
4119 | return ret; | ||
4120 | } | ||
4121 | |||
4122 | static int ocfs2_expand_inline_dx_root(struct inode *dir, | ||
4123 | struct buffer_head *dx_root_bh) | ||
4124 | { | ||
4125 | int ret, num_dx_leaves, i, j, did_quota = 0; | ||
4126 | struct buffer_head **dx_leaves = NULL; | ||
4127 | struct ocfs2_extent_tree et; | ||
4128 | u64 insert_blkno; | ||
4129 | struct ocfs2_alloc_context *data_ac = NULL; | ||
4130 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
4131 | handle_t *handle = NULL; | ||
4132 | struct ocfs2_dx_root_block *dx_root; | ||
4133 | struct ocfs2_dx_entry_list *entry_list; | ||
4134 | struct ocfs2_dx_entry *dx_entry; | ||
4135 | struct ocfs2_dx_leaf *target_leaf; | ||
4136 | |||
4137 | ret = ocfs2_reserve_clusters(osb, 1, &data_ac); | ||
4138 | if (ret) { | ||
4139 | mlog_errno(ret); | ||
4140 | goto out; | ||
4141 | } | ||
4142 | |||
4143 | dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves); | ||
4144 | if (!dx_leaves) { | ||
4145 | ret = -ENOMEM; | ||
4146 | mlog_errno(ret); | ||
4147 | goto out; | ||
4148 | } | ||
4149 | |||
4150 | handle = ocfs2_start_trans(osb, ocfs2_calc_dxi_expand_credits(osb->sb)); | ||
4151 | if (IS_ERR(handle)) { | ||
4152 | ret = PTR_ERR(handle); | ||
4153 | mlog_errno(ret); | ||
4154 | goto out; | ||
4155 | } | ||
4156 | |||
4157 | if (vfs_dq_alloc_space_nodirty(dir, | ||
4158 | ocfs2_clusters_to_bytes(osb->sb, 1))) { | ||
4159 | ret = -EDQUOT; | ||
4160 | goto out_commit; | ||
4161 | } | ||
4162 | did_quota = 1; | ||
4163 | |||
4164 | /* | ||
4165 | * We do this up front, before the allocation, so that a | ||
4166 | * failure to add the dx_root_bh to the journal won't result | ||
4167 | * us losing clusters. | ||
4168 | */ | ||
4169 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
4170 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4171 | if (ret) { | ||
4172 | mlog_errno(ret); | ||
4173 | goto out_commit; | ||
4174 | } | ||
4175 | |||
4176 | ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, dx_leaves, | ||
4177 | num_dx_leaves, &insert_blkno); | ||
4178 | if (ret) { | ||
4179 | mlog_errno(ret); | ||
4180 | goto out_commit; | ||
4181 | } | ||
4182 | |||
4183 | /* | ||
4184 | * Transfer the entries from our dx_root into the appropriate | ||
4185 | * block | ||
4186 | */ | ||
4187 | dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data; | ||
4188 | entry_list = &dx_root->dr_entries; | ||
4189 | |||
4190 | for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) { | ||
4191 | dx_entry = &entry_list->de_entries[i]; | ||
4192 | |||
4193 | j = __ocfs2_dx_dir_hash_idx(osb, | ||
4194 | le32_to_cpu(dx_entry->dx_minor_hash)); | ||
4195 | target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]->b_data; | ||
4196 | |||
4197 | ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry); | ||
4198 | |||
4199 | /* Each leaf has been passed to the journal already | ||
4200 | * via __ocfs2_dx_dir_new_cluster() */ | ||
4201 | } | ||
4202 | |||
4203 | dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE; | ||
4204 | memset(&dx_root->dr_list, 0, osb->sb->s_blocksize - | ||
4205 | offsetof(struct ocfs2_dx_root_block, dr_list)); | ||
4206 | dx_root->dr_list.l_count = | ||
4207 | cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb)); | ||
4208 | |||
4209 | /* This should never fail considering we start with an empty | ||
4210 | * dx_root. */ | ||
4211 | ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); | ||
4212 | ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, | ||
4213 | insert_blkno, 1, 0, NULL); | ||
4214 | if (ret) | ||
4215 | mlog_errno(ret); | ||
4216 | did_quota = 0; | ||
4217 | |||
4218 | ocfs2_journal_dirty(handle, dx_root_bh); | ||
4219 | |||
4220 | out_commit: | ||
4221 | if (ret < 0 && did_quota) | ||
4222 | vfs_dq_free_space_nodirty(dir, | ||
4223 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); | ||
4224 | |||
4225 | ocfs2_commit_trans(osb, handle); | ||
4226 | |||
4227 | out: | ||
4228 | if (data_ac) | ||
4229 | ocfs2_free_alloc_context(data_ac); | ||
4230 | |||
4231 | if (dx_leaves) { | ||
4232 | for (i = 0; i < num_dx_leaves; i++) | ||
4233 | brelse(dx_leaves[i]); | ||
4234 | kfree(dx_leaves); | ||
4235 | } | ||
4236 | return ret; | ||
4237 | } | ||
4238 | |||
4239 | static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh) | ||
4240 | { | ||
4241 | struct ocfs2_dx_root_block *dx_root; | ||
4242 | struct ocfs2_dx_entry_list *entry_list; | ||
4243 | |||
4244 | dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data; | ||
4245 | entry_list = &dx_root->dr_entries; | ||
4246 | |||
4247 | if (le16_to_cpu(entry_list->de_num_used) >= | ||
4248 | le16_to_cpu(entry_list->de_count)) | ||
4249 | return -ENOSPC; | ||
4250 | |||
4251 | return 0; | ||
4252 | } | ||
4253 | |||
4254 | static int ocfs2_prepare_dx_dir_for_insert(struct inode *dir, | ||
4255 | struct buffer_head *di_bh, | ||
4256 | const char *name, | ||
4257 | int namelen, | ||
4258 | struct ocfs2_dir_lookup_result *lookup) | ||
4259 | { | ||
4260 | int ret, free_dx_root = 1; | ||
4261 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
4262 | struct buffer_head *dx_root_bh = NULL; | ||
4263 | struct buffer_head *leaf_bh = NULL; | ||
4264 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4265 | struct ocfs2_dx_root_block *dx_root; | ||
4266 | |||
4267 | ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); | ||
4268 | if (ret) { | ||
4269 | mlog_errno(ret); | ||
4270 | goto out; | ||
4271 | } | ||
4272 | |||
4273 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
4274 | if (le32_to_cpu(dx_root->dr_num_entries) == OCFS2_DX_ENTRIES_MAX) { | ||
4275 | ret = -ENOSPC; | ||
4276 | mlog_errno(ret); | ||
4277 | goto out; | ||
4278 | } | ||
4279 | |||
4280 | if (ocfs2_dx_root_inline(dx_root)) { | ||
4281 | ret = ocfs2_inline_dx_has_space(dx_root_bh); | ||
4282 | |||
4283 | if (ret == 0) | ||
4284 | goto search_el; | ||
4285 | |||
4286 | /* | ||
4287 | * We ran out of room in the root block. Expand it to | ||
4288 | * an extent, then allow ocfs2_find_dir_space_dx to do | ||
4289 | * the rest. | ||
4290 | */ | ||
4291 | ret = ocfs2_expand_inline_dx_root(dir, dx_root_bh); | ||
4292 | if (ret) { | ||
4293 | mlog_errno(ret); | ||
4294 | goto out; | ||
4295 | } | ||
4296 | } | ||
4297 | |||
4298 | /* | ||
4299 | * Insert preparation for an indexed directory is split into two | ||
4300 | * steps. The call to find_dir_space_dx reserves room in the index for | ||
4301 | * an additional item. If we run out of space there, it's a real error | ||
4302 | * we can't continue on. | ||
4303 | */ | ||
4304 | ret = ocfs2_find_dir_space_dx(osb, dir, di_bh, dx_root_bh, name, | ||
4305 | namelen, lookup); | ||
4306 | if (ret) { | ||
4307 | mlog_errno(ret); | ||
4308 | goto out; | ||
4309 | } | ||
4310 | |||
4311 | search_el: | ||
4312 | /* | ||
4313 | * Next, we need to find space in the unindexed tree. This call | ||
4314 | * searches using the free space linked list. If the unindexed tree | ||
4315 | * lacks sufficient space, we'll expand it below. The expansion code | ||
4316 | * is smart enough to add any new blocks to the free space list. | ||
4317 | */ | ||
4318 | ret = ocfs2_search_dx_free_list(dir, dx_root_bh, namelen, lookup); | ||
4319 | if (ret && ret != -ENOSPC) { | ||
4320 | mlog_errno(ret); | ||
4321 | goto out; | ||
4322 | } | ||
4323 | |||
4324 | /* Do this up here - ocfs2_extend_dir might need the dx_root */ | ||
4325 | lookup->dl_dx_root_bh = dx_root_bh; | ||
4326 | free_dx_root = 0; | ||
4327 | |||
4328 | if (ret == -ENOSPC) { | ||
4329 | ret = ocfs2_extend_dir(osb, dir, di_bh, 1, lookup, &leaf_bh); | ||
4330 | |||
4331 | if (ret) { | ||
4332 | mlog_errno(ret); | ||
4333 | goto out; | ||
4334 | } | ||
4335 | |||
4336 | /* | ||
4337 | * We make the assumption here that new leaf blocks are added | ||
4338 | * to the front of our free list. | ||
4339 | */ | ||
4340 | lookup->dl_prev_leaf_bh = NULL; | ||
4341 | lookup->dl_leaf_bh = leaf_bh; | ||
4342 | } | ||
4343 | |||
4344 | out: | ||
4345 | if (free_dx_root) | ||
4346 | brelse(dx_root_bh); | ||
4347 | return ret; | ||
4348 | } | ||
4349 | |||
4350 | /* | ||
4351 | * Get a directory ready for insert. Any directory allocation required | ||
4352 | * happens here. Success returns zero, and enough context in the dir | ||
4353 | * lookup result that ocfs2_add_entry() will be able complete the task | ||
4354 | * with minimal performance impact. | ||
4355 | */ | ||
1973 | int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | 4356 | int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, |
1974 | struct inode *dir, | 4357 | struct inode *dir, |
1975 | struct buffer_head *parent_fe_bh, | 4358 | struct buffer_head *parent_fe_bh, |
1976 | const char *name, | 4359 | const char *name, |
1977 | int namelen, | 4360 | int namelen, |
1978 | struct buffer_head **ret_de_bh) | 4361 | struct ocfs2_dir_lookup_result *lookup) |
1979 | { | 4362 | { |
1980 | int ret; | 4363 | int ret; |
1981 | unsigned int blocks_wanted = 1; | 4364 | unsigned int blocks_wanted = 1; |
@@ -1984,14 +4367,34 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
1984 | mlog(0, "getting ready to insert namelen %d into dir %llu\n", | 4367 | mlog(0, "getting ready to insert namelen %d into dir %llu\n", |
1985 | namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); | 4368 | namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); |
1986 | 4369 | ||
1987 | *ret_de_bh = NULL; | ||
1988 | |||
1989 | if (!namelen) { | 4370 | if (!namelen) { |
1990 | ret = -EINVAL; | 4371 | ret = -EINVAL; |
1991 | mlog_errno(ret); | 4372 | mlog_errno(ret); |
1992 | goto out; | 4373 | goto out; |
1993 | } | 4374 | } |
1994 | 4375 | ||
4376 | /* | ||
4377 | * Do this up front to reduce confusion. | ||
4378 | * | ||
4379 | * The directory might start inline, then be turned into an | ||
4380 | * indexed one, in which case we'd need to hash deep inside | ||
4381 | * ocfs2_find_dir_space_id(). Since | ||
4382 | * ocfs2_prepare_dx_dir_for_insert() also needs this hash | ||
4383 | * done, there seems no point in spreading out the calls. We | ||
4384 | * can optimize away the case where the file system doesn't | ||
4385 | * support indexing. | ||
4386 | */ | ||
4387 | if (ocfs2_supports_indexed_dirs(osb)) | ||
4388 | ocfs2_dx_dir_name_hash(dir, name, namelen, &lookup->dl_hinfo); | ||
4389 | |||
4390 | if (ocfs2_dir_indexed(dir)) { | ||
4391 | ret = ocfs2_prepare_dx_dir_for_insert(dir, parent_fe_bh, | ||
4392 | name, namelen, lookup); | ||
4393 | if (ret) | ||
4394 | mlog_errno(ret); | ||
4395 | goto out; | ||
4396 | } | ||
4397 | |||
1995 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 4398 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
1996 | ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name, | 4399 | ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name, |
1997 | namelen, &bh, &blocks_wanted); | 4400 | namelen, &bh, &blocks_wanted); |
@@ -2010,7 +4413,7 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
2010 | BUG_ON(bh); | 4413 | BUG_ON(bh); |
2011 | 4414 | ||
2012 | ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted, | 4415 | ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted, |
2013 | &bh); | 4416 | lookup, &bh); |
2014 | if (ret) { | 4417 | if (ret) { |
2015 | if (ret != -ENOSPC) | 4418 | if (ret != -ENOSPC) |
2016 | mlog_errno(ret); | 4419 | mlog_errno(ret); |
@@ -2020,9 +4423,154 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
2020 | BUG_ON(!bh); | 4423 | BUG_ON(!bh); |
2021 | } | 4424 | } |
2022 | 4425 | ||
2023 | *ret_de_bh = bh; | 4426 | lookup->dl_leaf_bh = bh; |
2024 | bh = NULL; | 4427 | bh = NULL; |
2025 | out: | 4428 | out: |
2026 | brelse(bh); | 4429 | brelse(bh); |
2027 | return ret; | 4430 | return ret; |
2028 | } | 4431 | } |
4432 | |||
4433 | static int ocfs2_dx_dir_remove_index(struct inode *dir, | ||
4434 | struct buffer_head *di_bh, | ||
4435 | struct buffer_head *dx_root_bh) | ||
4436 | { | ||
4437 | int ret; | ||
4438 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
4439 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4440 | struct ocfs2_dx_root_block *dx_root; | ||
4441 | struct inode *dx_alloc_inode = NULL; | ||
4442 | struct buffer_head *dx_alloc_bh = NULL; | ||
4443 | handle_t *handle; | ||
4444 | u64 blk; | ||
4445 | u16 bit; | ||
4446 | u64 bg_blkno; | ||
4447 | |||
4448 | dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data; | ||
4449 | |||
4450 | dx_alloc_inode = ocfs2_get_system_file_inode(osb, | ||
4451 | EXTENT_ALLOC_SYSTEM_INODE, | ||
4452 | le16_to_cpu(dx_root->dr_suballoc_slot)); | ||
4453 | if (!dx_alloc_inode) { | ||
4454 | ret = -ENOMEM; | ||
4455 | mlog_errno(ret); | ||
4456 | goto out; | ||
4457 | } | ||
4458 | mutex_lock(&dx_alloc_inode->i_mutex); | ||
4459 | |||
4460 | ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1); | ||
4461 | if (ret) { | ||
4462 | mlog_errno(ret); | ||
4463 | goto out_mutex; | ||
4464 | } | ||
4465 | |||
4466 | handle = ocfs2_start_trans(osb, OCFS2_DX_ROOT_REMOVE_CREDITS); | ||
4467 | if (IS_ERR(handle)) { | ||
4468 | ret = PTR_ERR(handle); | ||
4469 | mlog_errno(ret); | ||
4470 | goto out_unlock; | ||
4471 | } | ||
4472 | |||
4473 | ret = ocfs2_journal_access_di(handle, dir, di_bh, | ||
4474 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4475 | if (ret) { | ||
4476 | mlog_errno(ret); | ||
4477 | goto out_commit; | ||
4478 | } | ||
4479 | |||
4480 | OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL; | ||
4481 | di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); | ||
4482 | di->i_dx_root = cpu_to_le64(0ULL); | ||
4483 | |||
4484 | ocfs2_journal_dirty(handle, di_bh); | ||
4485 | |||
4486 | blk = le64_to_cpu(dx_root->dr_blkno); | ||
4487 | bit = le16_to_cpu(dx_root->dr_suballoc_bit); | ||
4488 | bg_blkno = ocfs2_which_suballoc_group(blk, bit); | ||
4489 | ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh, | ||
4490 | bit, bg_blkno, 1); | ||
4491 | if (ret) | ||
4492 | mlog_errno(ret); | ||
4493 | |||
4494 | out_commit: | ||
4495 | ocfs2_commit_trans(osb, handle); | ||
4496 | |||
4497 | out_unlock: | ||
4498 | ocfs2_inode_unlock(dx_alloc_inode, 1); | ||
4499 | |||
4500 | out_mutex: | ||
4501 | mutex_unlock(&dx_alloc_inode->i_mutex); | ||
4502 | brelse(dx_alloc_bh); | ||
4503 | out: | ||
4504 | iput(dx_alloc_inode); | ||
4505 | return ret; | ||
4506 | } | ||
4507 | |||
4508 | int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh) | ||
4509 | { | ||
4510 | int ret; | ||
4511 | unsigned int uninitialized_var(clen); | ||
4512 | u32 major_hash = UINT_MAX, p_cpos, uninitialized_var(cpos); | ||
4513 | u64 uninitialized_var(blkno); | ||
4514 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
4515 | struct buffer_head *dx_root_bh = NULL; | ||
4516 | struct ocfs2_dx_root_block *dx_root; | ||
4517 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4518 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
4519 | struct ocfs2_extent_tree et; | ||
4520 | |||
4521 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
4522 | |||
4523 | if (!ocfs2_dir_indexed(dir)) | ||
4524 | return 0; | ||
4525 | |||
4526 | ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); | ||
4527 | if (ret) { | ||
4528 | mlog_errno(ret); | ||
4529 | goto out; | ||
4530 | } | ||
4531 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
4532 | |||
4533 | if (ocfs2_dx_root_inline(dx_root)) | ||
4534 | goto remove_index; | ||
4535 | |||
4536 | ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); | ||
4537 | |||
4538 | /* XXX: What if dr_clusters is too large? */ | ||
4539 | while (le32_to_cpu(dx_root->dr_clusters)) { | ||
4540 | ret = ocfs2_dx_dir_lookup_rec(dir, &dx_root->dr_list, | ||
4541 | major_hash, &cpos, &blkno, &clen); | ||
4542 | if (ret) { | ||
4543 | mlog_errno(ret); | ||
4544 | goto out; | ||
4545 | } | ||
4546 | |||
4547 | p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); | ||
4548 | |||
4549 | ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, | ||
4550 | &dealloc); | ||
4551 | if (ret) { | ||
4552 | mlog_errno(ret); | ||
4553 | goto out; | ||
4554 | } | ||
4555 | |||
4556 | if (cpos == 0) | ||
4557 | break; | ||
4558 | |||
4559 | major_hash = cpos - 1; | ||
4560 | } | ||
4561 | |||
4562 | remove_index: | ||
4563 | ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh); | ||
4564 | if (ret) { | ||
4565 | mlog_errno(ret); | ||
4566 | goto out; | ||
4567 | } | ||
4568 | |||
4569 | ocfs2_remove_from_cache(dir, dx_root_bh); | ||
4570 | out: | ||
4571 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
4572 | ocfs2_run_deallocs(osb, &dealloc); | ||
4573 | |||
4574 | brelse(dx_root_bh); | ||
4575 | return ret; | ||
4576 | } | ||