diff options
Diffstat (limited to 'fs/ocfs2/dir.c')
-rw-r--r-- | fs/ocfs2/dir.c | 1908 |
1 files changed, 1845 insertions, 63 deletions
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 76ffb5c10b3e..0b8c88b47a4e 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | #include <linux/quotaops.h> | 43 | #include <linux/quotaops.h> |
44 | #include <linux/sort.h> | ||
44 | 45 | ||
45 | #define MLOG_MASK_PREFIX ML_NAMEI | 46 | #define MLOG_MASK_PREFIX ML_NAMEI |
46 | #include <cluster/masklog.h> | 47 | #include <cluster/masklog.h> |
@@ -58,6 +59,7 @@ | |||
58 | #include "namei.h" | 59 | #include "namei.h" |
59 | #include "suballoc.h" | 60 | #include "suballoc.h" |
60 | #include "super.h" | 61 | #include "super.h" |
62 | #include "sysfile.h" | ||
61 | #include "uptodate.h" | 63 | #include "uptodate.h" |
62 | 64 | ||
63 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
@@ -71,11 +73,6 @@ static unsigned char ocfs2_filetype_table[] = { | |||
71 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 73 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
72 | }; | 74 | }; |
73 | 75 | ||
74 | static int ocfs2_extend_dir(struct ocfs2_super *osb, | ||
75 | struct inode *dir, | ||
76 | struct buffer_head *parent_fe_bh, | ||
77 | unsigned int blocks_wanted, | ||
78 | struct buffer_head **new_de_bh); | ||
79 | static int ocfs2_do_extend_dir(struct super_block *sb, | 76 | static int ocfs2_do_extend_dir(struct super_block *sb, |
80 | handle_t *handle, | 77 | handle_t *handle, |
81 | struct inode *dir, | 78 | struct inode *dir, |
@@ -155,6 +152,105 @@ static void ocfs2_init_dir_trailer(struct inode *inode, | |||
155 | void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res) | 152 | void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res) |
156 | { | 153 | { |
157 | brelse(res->dl_leaf_bh); | 154 | brelse(res->dl_leaf_bh); |
155 | brelse(res->dl_dx_leaf_bh); | ||
156 | } | ||
157 | |||
158 | static int ocfs2_dir_indexed(struct inode *inode) | ||
159 | { | ||
160 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INDEXED_DIR_FL) | ||
161 | return 1; | ||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * Hashing code adapted from ext3 | ||
167 | */ | ||
168 | #define DELTA 0x9E3779B9 | ||
169 | |||
170 | static void TEA_transform(__u32 buf[4], __u32 const in[]) | ||
171 | { | ||
172 | __u32 sum = 0; | ||
173 | __u32 b0 = buf[0], b1 = buf[1]; | ||
174 | __u32 a = in[0], b = in[1], c = in[2], d = in[3]; | ||
175 | int n = 16; | ||
176 | |||
177 | do { | ||
178 | sum += DELTA; | ||
179 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); | ||
180 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); | ||
181 | } while (--n); | ||
182 | |||
183 | buf[0] += b0; | ||
184 | buf[1] += b1; | ||
185 | } | ||
186 | |||
187 | static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) | ||
188 | { | ||
189 | __u32 pad, val; | ||
190 | int i; | ||
191 | |||
192 | pad = (__u32)len | ((__u32)len << 8); | ||
193 | pad |= pad << 16; | ||
194 | |||
195 | val = pad; | ||
196 | if (len > num*4) | ||
197 | len = num * 4; | ||
198 | for (i = 0; i < len; i++) { | ||
199 | if ((i % 4) == 0) | ||
200 | val = pad; | ||
201 | val = msg[i] + (val << 8); | ||
202 | if ((i % 4) == 3) { | ||
203 | *buf++ = val; | ||
204 | val = pad; | ||
205 | num--; | ||
206 | } | ||
207 | } | ||
208 | if (--num >= 0) | ||
209 | *buf++ = val; | ||
210 | while (--num >= 0) | ||
211 | *buf++ = pad; | ||
212 | } | ||
213 | |||
214 | static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len, | ||
215 | struct ocfs2_dx_hinfo *hinfo) | ||
216 | { | ||
217 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
218 | const char *p; | ||
219 | __u32 in[8], buf[4]; | ||
220 | |||
221 | /* | ||
222 | * XXX: Is this really necessary, if the index is never looked | ||
223 | * at by readdir? Is a hash value of '0' a bad idea? | ||
224 | */ | ||
225 | if ((len == 1 && !strncmp(".", name, 1)) || | ||
226 | (len == 2 && !strncmp("..", name, 2))) { | ||
227 | buf[0] = buf[1] = 0; | ||
228 | goto out; | ||
229 | } | ||
230 | |||
231 | #ifdef OCFS2_DEBUG_DX_DIRS | ||
232 | /* | ||
233 | * This makes it very easy to debug indexing problems. We | ||
234 | * should never allow this to be selected without hand editing | ||
235 | * this file though. | ||
236 | */ | ||
237 | buf[0] = buf[1] = len; | ||
238 | goto out; | ||
239 | #endif | ||
240 | |||
241 | memcpy(buf, osb->osb_dx_seed, sizeof(buf)); | ||
242 | |||
243 | p = name; | ||
244 | while (len > 0) { | ||
245 | str2hashbuf(p, len, in, 4); | ||
246 | TEA_transform(buf, in); | ||
247 | len -= 16; | ||
248 | p += 16; | ||
249 | } | ||
250 | |||
251 | out: | ||
252 | hinfo->major_hash = buf[0]; | ||
253 | hinfo->minor_hash = buf[1]; | ||
158 | } | 254 | } |
159 | 255 | ||
160 | /* | 256 | /* |
@@ -317,6 +413,52 @@ static int ocfs2_validate_dir_block(struct super_block *sb, | |||
317 | } | 413 | } |
318 | 414 | ||
319 | /* | 415 | /* |
416 | * Validate a directory trailer. | ||
417 | * | ||
418 | * We check the trailer here rather than in ocfs2_validate_dir_block() | ||
419 | * because that function doesn't have the inode to test. | ||
420 | */ | ||
421 | static int ocfs2_check_dir_trailer(struct inode *dir, struct buffer_head *bh) | ||
422 | { | ||
423 | int rc = 0; | ||
424 | struct ocfs2_dir_block_trailer *trailer; | ||
425 | |||
426 | trailer = ocfs2_trailer_from_bh(bh, dir->i_sb); | ||
427 | if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) { | ||
428 | rc = -EINVAL; | ||
429 | ocfs2_error(dir->i_sb, | ||
430 | "Invalid dirblock #%llu: " | ||
431 | "signature = %.*s\n", | ||
432 | (unsigned long long)bh->b_blocknr, 7, | ||
433 | trailer->db_signature); | ||
434 | goto out; | ||
435 | } | ||
436 | if (le64_to_cpu(trailer->db_blkno) != bh->b_blocknr) { | ||
437 | rc = -EINVAL; | ||
438 | ocfs2_error(dir->i_sb, | ||
439 | "Directory block #%llu has an invalid " | ||
440 | "db_blkno of %llu", | ||
441 | (unsigned long long)bh->b_blocknr, | ||
442 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
443 | goto out; | ||
444 | } | ||
445 | if (le64_to_cpu(trailer->db_parent_dinode) != | ||
446 | OCFS2_I(dir)->ip_blkno) { | ||
447 | rc = -EINVAL; | ||
448 | ocfs2_error(dir->i_sb, | ||
449 | "Directory block #%llu on dinode " | ||
450 | "#%llu has an invalid parent_dinode " | ||
451 | "of %llu", | ||
452 | (unsigned long long)bh->b_blocknr, | ||
453 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
454 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
455 | goto out; | ||
456 | } | ||
457 | out: | ||
458 | return rc; | ||
459 | } | ||
460 | |||
461 | /* | ||
320 | * This function forces all errors to -EIO for consistency with its | 462 | * This function forces all errors to -EIO for consistency with its |
321 | * predecessor, ocfs2_bread(). We haven't audited what returning the | 463 | * predecessor, ocfs2_bread(). We haven't audited what returning the |
322 | * real error codes would do to callers. We log the real codes with | 464 | * real error codes would do to callers. We log the real codes with |
@@ -327,7 +469,6 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, | |||
327 | { | 469 | { |
328 | int rc = 0; | 470 | int rc = 0; |
329 | struct buffer_head *tmp = *bh; | 471 | struct buffer_head *tmp = *bh; |
330 | struct ocfs2_dir_block_trailer *trailer; | ||
331 | 472 | ||
332 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags, | 473 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags, |
333 | ocfs2_validate_dir_block); | 474 | ocfs2_validate_dir_block); |
@@ -336,42 +477,13 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, | |||
336 | goto out; | 477 | goto out; |
337 | } | 478 | } |
338 | 479 | ||
339 | /* | ||
340 | * We check the trailer here rather than in | ||
341 | * ocfs2_validate_dir_block() because that function doesn't have | ||
342 | * the inode to test. | ||
343 | */ | ||
344 | if (!(flags & OCFS2_BH_READAHEAD) && | 480 | if (!(flags & OCFS2_BH_READAHEAD) && |
345 | ocfs2_dir_has_trailer(inode)) { | 481 | ocfs2_dir_has_trailer(inode)) { |
346 | trailer = ocfs2_trailer_from_bh(tmp, inode->i_sb); | 482 | rc = ocfs2_check_dir_trailer(inode, tmp); |
347 | if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) { | 483 | if (rc) { |
348 | rc = -EINVAL; | 484 | if (!*bh) |
349 | ocfs2_error(inode->i_sb, | 485 | brelse(tmp); |
350 | "Invalid dirblock #%llu: " | 486 | mlog_errno(rc); |
351 | "signature = %.*s\n", | ||
352 | (unsigned long long)tmp->b_blocknr, 7, | ||
353 | trailer->db_signature); | ||
354 | goto out; | ||
355 | } | ||
356 | if (le64_to_cpu(trailer->db_blkno) != tmp->b_blocknr) { | ||
357 | rc = -EINVAL; | ||
358 | ocfs2_error(inode->i_sb, | ||
359 | "Directory block #%llu has an invalid " | ||
360 | "db_blkno of %llu", | ||
361 | (unsigned long long)tmp->b_blocknr, | ||
362 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
363 | goto out; | ||
364 | } | ||
365 | if (le64_to_cpu(trailer->db_parent_dinode) != | ||
366 | OCFS2_I(inode)->ip_blkno) { | ||
367 | rc = -EINVAL; | ||
368 | ocfs2_error(inode->i_sb, | ||
369 | "Directory block #%llu on dinode " | ||
370 | "#%llu has an invalid parent_dinode " | ||
371 | "of %llu", | ||
372 | (unsigned long long)tmp->b_blocknr, | ||
373 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
374 | (unsigned long long)le64_to_cpu(trailer->db_blkno)); | ||
375 | goto out; | 487 | goto out; |
376 | } | 488 | } |
377 | } | 489 | } |
@@ -384,6 +496,141 @@ out: | |||
384 | return rc ? -EIO : 0; | 496 | return rc ? -EIO : 0; |
385 | } | 497 | } |
386 | 498 | ||
499 | /* | ||
500 | * Read the block at 'phys' which belongs to this directory | ||
501 | * inode. This function does no virtual->physical block translation - | ||
502 | * what's passed in is assumed to be a valid directory block. | ||
503 | */ | ||
504 | static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys, | ||
505 | struct buffer_head **bh) | ||
506 | { | ||
507 | int ret; | ||
508 | struct buffer_head *tmp = *bh; | ||
509 | |||
510 | ret = ocfs2_read_block(dir, phys, &tmp, ocfs2_validate_dir_block); | ||
511 | if (ret) { | ||
512 | mlog_errno(ret); | ||
513 | goto out; | ||
514 | } | ||
515 | |||
516 | if (ocfs2_supports_dir_trailer(dir)) { | ||
517 | ret = ocfs2_check_dir_trailer(dir, tmp); | ||
518 | if (ret) { | ||
519 | if (!*bh) | ||
520 | brelse(tmp); | ||
521 | mlog_errno(ret); | ||
522 | goto out; | ||
523 | } | ||
524 | } | ||
525 | |||
526 | if (!ret && !*bh) | ||
527 | *bh = tmp; | ||
528 | out: | ||
529 | return ret; | ||
530 | } | ||
531 | |||
532 | static int ocfs2_validate_dx_root(struct super_block *sb, | ||
533 | struct buffer_head *bh) | ||
534 | { | ||
535 | int ret; | ||
536 | struct ocfs2_dx_root_block *dx_root; | ||
537 | |||
538 | BUG_ON(!buffer_uptodate(bh)); | ||
539 | |||
540 | dx_root = (struct ocfs2_dx_root_block *) bh->b_data; | ||
541 | |||
542 | ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_root->dr_check); | ||
543 | if (ret) { | ||
544 | mlog(ML_ERROR, | ||
545 | "Checksum failed for dir index root block %llu\n", | ||
546 | (unsigned long long)bh->b_blocknr); | ||
547 | return ret; | ||
548 | } | ||
549 | |||
550 | if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) { | ||
551 | ocfs2_error(sb, | ||
552 | "Dir Index Root # %llu has bad signature %.*s", | ||
553 | (unsigned long long)le64_to_cpu(dx_root->dr_blkno), | ||
554 | 7, dx_root->dr_signature); | ||
555 | return -EINVAL; | ||
556 | } | ||
557 | |||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di, | ||
562 | struct buffer_head **dx_root_bh) | ||
563 | { | ||
564 | int ret; | ||
565 | u64 blkno = le64_to_cpu(di->i_dx_root); | ||
566 | struct buffer_head *tmp = *dx_root_bh; | ||
567 | |||
568 | ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_root); | ||
569 | |||
570 | /* If ocfs2_read_block() got us a new bh, pass it up. */ | ||
571 | if (!ret && !*dx_root_bh) | ||
572 | *dx_root_bh = tmp; | ||
573 | |||
574 | return ret; | ||
575 | } | ||
576 | |||
577 | static int ocfs2_validate_dx_leaf(struct super_block *sb, | ||
578 | struct buffer_head *bh) | ||
579 | { | ||
580 | int ret; | ||
581 | struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)bh->b_data; | ||
582 | |||
583 | BUG_ON(!buffer_uptodate(bh)); | ||
584 | |||
585 | ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_leaf->dl_check); | ||
586 | if (ret) { | ||
587 | mlog(ML_ERROR, | ||
588 | "Checksum failed for dir index leaf block %llu\n", | ||
589 | (unsigned long long)bh->b_blocknr); | ||
590 | return ret; | ||
591 | } | ||
592 | |||
593 | if (!OCFS2_IS_VALID_DX_LEAF(dx_leaf)) { | ||
594 | ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s", | ||
595 | 7, dx_leaf->dl_signature); | ||
596 | return -EROFS; | ||
597 | } | ||
598 | |||
599 | return 0; | ||
600 | } | ||
601 | |||
602 | static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno, | ||
603 | struct buffer_head **dx_leaf_bh) | ||
604 | { | ||
605 | int ret; | ||
606 | struct buffer_head *tmp = *dx_leaf_bh; | ||
607 | |||
608 | ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_leaf); | ||
609 | |||
610 | /* If ocfs2_read_block() got us a new bh, pass it up. */ | ||
611 | if (!ret && !*dx_leaf_bh) | ||
612 | *dx_leaf_bh = tmp; | ||
613 | |||
614 | return ret; | ||
615 | } | ||
616 | |||
617 | /* | ||
618 | * Read a series of dx_leaf blocks. This expects all buffer_head | ||
619 | * pointers to be NULL on function entry. | ||
620 | */ | ||
621 | static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num, | ||
622 | struct buffer_head **dx_leaf_bhs) | ||
623 | { | ||
624 | int ret; | ||
625 | |||
626 | ret = ocfs2_read_blocks(dir, start, num, dx_leaf_bhs, 0, | ||
627 | ocfs2_validate_dx_leaf); | ||
628 | if (ret) | ||
629 | mlog_errno(ret); | ||
630 | |||
631 | return ret; | ||
632 | } | ||
633 | |||
387 | static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, | 634 | static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, |
388 | struct inode *dir, | 635 | struct inode *dir, |
389 | struct ocfs2_dir_entry **res_dir) | 636 | struct ocfs2_dir_entry **res_dir) |
@@ -485,6 +732,270 @@ cleanup_and_exit: | |||
485 | return ret; | 732 | return ret; |
486 | } | 733 | } |
487 | 734 | ||
735 | static int ocfs2_dx_dir_lookup_rec(struct inode *inode, | ||
736 | struct ocfs2_extent_list *el, | ||
737 | u32 major_hash, | ||
738 | u32 *ret_cpos, | ||
739 | u64 *ret_phys_blkno, | ||
740 | unsigned int *ret_clen) | ||
741 | { | ||
742 | int ret = 0, i, found; | ||
743 | struct buffer_head *eb_bh = NULL; | ||
744 | struct ocfs2_extent_block *eb; | ||
745 | struct ocfs2_extent_rec *rec = NULL; | ||
746 | |||
747 | if (el->l_tree_depth) { | ||
748 | ret = ocfs2_find_leaf(inode, el, major_hash, &eb_bh); | ||
749 | if (ret) { | ||
750 | mlog_errno(ret); | ||
751 | goto out; | ||
752 | } | ||
753 | |||
754 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
755 | el = &eb->h_list; | ||
756 | |||
757 | if (el->l_tree_depth) { | ||
758 | ocfs2_error(inode->i_sb, | ||
759 | "Inode %lu has non zero tree depth in " | ||
760 | "btree tree block %llu\n", inode->i_ino, | ||
761 | (unsigned long long)eb_bh->b_blocknr); | ||
762 | ret = -EROFS; | ||
763 | goto out; | ||
764 | } | ||
765 | } | ||
766 | |||
767 | found = 0; | ||
768 | for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { | ||
769 | rec = &el->l_recs[i]; | ||
770 | |||
771 | if (le32_to_cpu(rec->e_cpos) <= major_hash) { | ||
772 | found = 1; | ||
773 | break; | ||
774 | } | ||
775 | } | ||
776 | |||
777 | if (!found) { | ||
778 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | ||
779 | "record (%u, %u, 0) in btree", inode->i_ino, | ||
780 | le32_to_cpu(rec->e_cpos), | ||
781 | ocfs2_rec_clusters(el, rec)); | ||
782 | ret = -EROFS; | ||
783 | goto out; | ||
784 | } | ||
785 | |||
786 | if (ret_phys_blkno) | ||
787 | *ret_phys_blkno = le64_to_cpu(rec->e_blkno); | ||
788 | if (ret_cpos) | ||
789 | *ret_cpos = le32_to_cpu(rec->e_cpos); | ||
790 | if (ret_clen) | ||
791 | *ret_clen = le16_to_cpu(rec->e_leaf_clusters); | ||
792 | |||
793 | out: | ||
794 | brelse(eb_bh); | ||
795 | return ret; | ||
796 | } | ||
797 | |||
798 | /* | ||
799 | * Returns the block index, from the start of the cluster which this | ||
800 | * hash belongs too. | ||
801 | */ | ||
802 | static unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb, | ||
803 | struct ocfs2_dx_hinfo *hinfo) | ||
804 | { | ||
805 | u32 minor_hash = hinfo->minor_hash; | ||
806 | return minor_hash & osb->osb_dx_mask; | ||
807 | } | ||
808 | |||
809 | static int ocfs2_dx_dir_lookup(struct inode *inode, | ||
810 | struct ocfs2_extent_list *el, | ||
811 | struct ocfs2_dx_hinfo *hinfo, | ||
812 | u32 *ret_cpos, | ||
813 | u64 *ret_phys_blkno) | ||
814 | { | ||
815 | int ret = 0; | ||
816 | unsigned int cend, uninitialized_var(clen); | ||
817 | u32 uninitialized_var(cpos); | ||
818 | u64 uninitialized_var(blkno); | ||
819 | u32 name_hash = hinfo->major_hash; | ||
820 | |||
821 | ret = ocfs2_dx_dir_lookup_rec(inode, el, name_hash, &cpos, &blkno, | ||
822 | &clen); | ||
823 | if (ret) { | ||
824 | mlog_errno(ret); | ||
825 | goto out; | ||
826 | } | ||
827 | |||
828 | cend = cpos + clen; | ||
829 | if (name_hash >= cend) { | ||
830 | /* We want the last cluster */ | ||
831 | blkno += ocfs2_clusters_to_blocks(inode->i_sb, clen - 1); | ||
832 | cpos += clen - 1; | ||
833 | } else { | ||
834 | blkno += ocfs2_clusters_to_blocks(inode->i_sb, | ||
835 | name_hash - cpos); | ||
836 | cpos = name_hash; | ||
837 | } | ||
838 | |||
839 | /* | ||
840 | * We now have the cluster which should hold our entry. To | ||
841 | * find the exact block from the start of the cluster to | ||
842 | * search, we take the lower bits of the hash. | ||
843 | */ | ||
844 | blkno += ocfs2_dx_dir_hash_idx(OCFS2_SB(inode->i_sb), hinfo); | ||
845 | |||
846 | if (ret_phys_blkno) | ||
847 | *ret_phys_blkno = blkno; | ||
848 | if (ret_cpos) | ||
849 | *ret_cpos = cpos; | ||
850 | |||
851 | out: | ||
852 | |||
853 | return ret; | ||
854 | } | ||
855 | |||
856 | static int ocfs2_dx_dir_search(const char *name, int namelen, | ||
857 | struct inode *dir, | ||
858 | struct ocfs2_extent_list *dr_el, | ||
859 | struct ocfs2_dir_lookup_result *res) | ||
860 | { | ||
861 | int ret, i, found; | ||
862 | u64 uninitialized_var(phys); | ||
863 | struct buffer_head *dx_leaf_bh = NULL; | ||
864 | struct ocfs2_dx_leaf *dx_leaf; | ||
865 | struct ocfs2_dx_entry *dx_entry = NULL; | ||
866 | struct buffer_head *dir_ent_bh = NULL; | ||
867 | struct ocfs2_dir_entry *dir_ent = NULL; | ||
868 | struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo; | ||
869 | |||
870 | ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo); | ||
871 | |||
872 | ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys); | ||
873 | if (ret) { | ||
874 | mlog_errno(ret); | ||
875 | goto out; | ||
876 | } | ||
877 | |||
878 | mlog(0, "Dir %llu: name: \"%.*s\", lookup of hash: %u.0x%x " | ||
879 | "returns: %llu\n", | ||
880 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
881 | namelen, name, hinfo->major_hash, hinfo->minor_hash, | ||
882 | (unsigned long long)phys); | ||
883 | |||
884 | ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh); | ||
885 | if (ret) { | ||
886 | mlog_errno(ret); | ||
887 | goto out; | ||
888 | } | ||
889 | |||
890 | dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data; | ||
891 | |||
892 | mlog(0, "leaf info: num_used: %d, count: %d\n", | ||
893 | le16_to_cpu(dx_leaf->dl_list.de_num_used), | ||
894 | le16_to_cpu(dx_leaf->dl_list.de_count)); | ||
895 | |||
896 | /* | ||
897 | * Empty leaf is legal, so no need to check for that. | ||
898 | */ | ||
899 | found = 0; | ||
900 | for (i = 0; i < le16_to_cpu(dx_leaf->dl_list.de_num_used); i++) { | ||
901 | dx_entry = &dx_leaf->dl_list.de_entries[i]; | ||
902 | |||
903 | if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash) | ||
904 | || hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash)) | ||
905 | continue; | ||
906 | |||
907 | /* | ||
908 | * Search unindexed leaf block now. We're not | ||
909 | * guaranteed to find anything. | ||
910 | */ | ||
911 | ret = ocfs2_read_dir_block_direct(dir, | ||
912 | le64_to_cpu(dx_entry->dx_dirent_blk), | ||
913 | &dir_ent_bh); | ||
914 | if (ret) { | ||
915 | mlog_errno(ret); | ||
916 | goto out; | ||
917 | } | ||
918 | |||
919 | /* | ||
920 | * XXX: We should check the unindexed block here, | ||
921 | * before using it. | ||
922 | */ | ||
923 | |||
924 | found = ocfs2_search_dirblock(dir_ent_bh, dir, name, namelen, | ||
925 | 0, dir_ent_bh->b_data, | ||
926 | dir->i_sb->s_blocksize, &dir_ent); | ||
927 | if (found == 1) | ||
928 | break; | ||
929 | |||
930 | if (found == -1) { | ||
931 | /* This means we found a bad directory entry. */ | ||
932 | ret = -EIO; | ||
933 | mlog_errno(ret); | ||
934 | goto out; | ||
935 | } | ||
936 | |||
937 | brelse(dir_ent_bh); | ||
938 | dir_ent_bh = NULL; | ||
939 | } | ||
940 | |||
941 | if (found <= 0) { | ||
942 | ret = -ENOENT; | ||
943 | goto out; | ||
944 | } | ||
945 | |||
946 | res->dl_leaf_bh = dir_ent_bh; | ||
947 | res->dl_entry = dir_ent; | ||
948 | res->dl_dx_leaf_bh = dx_leaf_bh; | ||
949 | res->dl_dx_entry = dx_entry; | ||
950 | |||
951 | ret = 0; | ||
952 | out: | ||
953 | if (ret) { | ||
954 | brelse(dx_leaf_bh); | ||
955 | brelse(dir_ent_bh); | ||
956 | } | ||
957 | return ret; | ||
958 | } | ||
959 | |||
960 | static int ocfs2_find_entry_dx(const char *name, int namelen, | ||
961 | struct inode *dir, | ||
962 | struct ocfs2_dir_lookup_result *lookup) | ||
963 | { | ||
964 | int ret; | ||
965 | struct buffer_head *di_bh = NULL; | ||
966 | struct ocfs2_dinode *di; | ||
967 | struct buffer_head *dx_root_bh = NULL; | ||
968 | struct ocfs2_dx_root_block *dx_root; | ||
969 | |||
970 | ret = ocfs2_read_inode_block(dir, &di_bh); | ||
971 | if (ret) { | ||
972 | mlog_errno(ret); | ||
973 | goto out; | ||
974 | } | ||
975 | |||
976 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
977 | |||
978 | ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); | ||
979 | if (ret) { | ||
980 | mlog_errno(ret); | ||
981 | goto out; | ||
982 | } | ||
983 | dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data; | ||
984 | |||
985 | ret = ocfs2_dx_dir_search(name, namelen, dir, &dx_root->dr_list, | ||
986 | lookup); | ||
987 | if (ret) { | ||
988 | if (ret != -ENOENT) | ||
989 | mlog_errno(ret); | ||
990 | goto out; | ||
991 | } | ||
992 | |||
993 | out: | ||
994 | brelse(di_bh); | ||
995 | brelse(dx_root_bh); | ||
996 | return ret; | ||
997 | } | ||
998 | |||
488 | /* | 999 | /* |
489 | * Try to find an entry of the provided name within 'dir'. | 1000 | * Try to find an entry of the provided name within 'dir'. |
490 | * | 1001 | * |
@@ -493,10 +1004,11 @@ cleanup_and_exit: | |||
493 | * other directory manipulation functions. | 1004 | * other directory manipulation functions. |
494 | * | 1005 | * |
495 | * Caller can NOT assume anything about the contents of the | 1006 | * Caller can NOT assume anything about the contents of the |
496 | * buffer_heads - they are passed back only so that it can be passed into | 1007 | * buffer_heads - they are passed back only so that it can be passed |
497 | * any one of the manipulation functions (add entry, delete entry, | 1008 | * into any one of the manipulation functions (add entry, delete |
498 | * etc). As an example, bh in the extent directory case is a data | 1009 | * entry, etc). As an example, bh in the extent directory case is a |
499 | * block, in the inline-data case it actually points to an inode. | 1010 | * data block, in the inline-data case it actually points to an inode, |
1011 | * in the indexed directory case, multiple buffers are involved. | ||
500 | */ | 1012 | */ |
501 | int ocfs2_find_entry(const char *name, int namelen, | 1013 | int ocfs2_find_entry(const char *name, int namelen, |
502 | struct inode *dir, struct ocfs2_dir_lookup_result *lookup) | 1014 | struct inode *dir, struct ocfs2_dir_lookup_result *lookup) |
@@ -504,6 +1016,14 @@ int ocfs2_find_entry(const char *name, int namelen, | |||
504 | struct buffer_head *bh; | 1016 | struct buffer_head *bh; |
505 | struct ocfs2_dir_entry *res_dir = NULL; | 1017 | struct ocfs2_dir_entry *res_dir = NULL; |
506 | 1018 | ||
1019 | if (ocfs2_dir_indexed(dir)) | ||
1020 | return ocfs2_find_entry_dx(name, namelen, dir, lookup); | ||
1021 | |||
1022 | /* | ||
1023 | * The unindexed dir code only uses part of the lookup | ||
1024 | * structure, so there's no reason to push it down further | ||
1025 | * than this. | ||
1026 | */ | ||
507 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 1027 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
508 | bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir); | 1028 | bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir); |
509 | else | 1029 | else |
@@ -553,6 +1073,10 @@ out: | |||
553 | return ret; | 1073 | return ret; |
554 | } | 1074 | } |
555 | 1075 | ||
1076 | /* | ||
1077 | * __ocfs2_delete_entry deletes a directory entry by merging it with the | ||
1078 | * previous entry | ||
1079 | */ | ||
556 | static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, | 1080 | static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, |
557 | struct ocfs2_dir_entry *de_del, | 1081 | struct ocfs2_dir_entry *de_del, |
558 | struct buffer_head *bh, char *first_de, | 1082 | struct buffer_head *bh, char *first_de, |
@@ -602,6 +1126,79 @@ bail: | |||
602 | return status; | 1126 | return status; |
603 | } | 1127 | } |
604 | 1128 | ||
1129 | static void ocfs2_dx_leaf_remove_entry(struct ocfs2_dx_leaf *dx_leaf, int index) | ||
1130 | { | ||
1131 | struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; | ||
1132 | int num_used = le16_to_cpu(dl_list->de_num_used); | ||
1133 | |||
1134 | if (num_used == 1 || index == (num_used - 1)) | ||
1135 | goto clear; | ||
1136 | |||
1137 | memmove(&dl_list->de_entries[index], &dl_list->de_entries[index + 1], | ||
1138 | (num_used - index - 1)*sizeof(struct ocfs2_dx_entry)); | ||
1139 | clear: | ||
1140 | num_used--; | ||
1141 | memset(&dl_list->de_entries[num_used], 0, | ||
1142 | sizeof(struct ocfs2_dx_entry)); | ||
1143 | dl_list->de_num_used = cpu_to_le16(num_used); | ||
1144 | } | ||
1145 | |||
1146 | static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, | ||
1147 | struct ocfs2_dir_lookup_result *lookup) | ||
1148 | { | ||
1149 | int ret, index; | ||
1150 | struct buffer_head *leaf_bh = lookup->dl_leaf_bh; | ||
1151 | struct ocfs2_dx_leaf *dx_leaf; | ||
1152 | struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry; | ||
1153 | |||
1154 | dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data; | ||
1155 | /* Neither of these are a disk corruption - that should have | ||
1156 | * been caught by lookup, before we got here. */ | ||
1157 | BUG_ON(le16_to_cpu(dx_leaf->dl_list.de_count) <= 0); | ||
1158 | BUG_ON(le16_to_cpu(dx_leaf->dl_list.de_num_used) <= 0); | ||
1159 | |||
1160 | index = (char *)dx_entry - (char *)dx_leaf->dl_list.de_entries; | ||
1161 | index /= sizeof(*dx_entry); | ||
1162 | |||
1163 | if (index >= le16_to_cpu(dx_leaf->dl_list.de_num_used)) { | ||
1164 | mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n", | ||
1165 | (unsigned long long)OCFS2_I(dir)->ip_blkno, index, dx_leaf, | ||
1166 | dx_entry); | ||
1167 | return -EIO; | ||
1168 | } | ||
1169 | |||
1170 | mlog(0, "Dir %llu: delete entry at index: %d\n", | ||
1171 | (unsigned long long)OCFS2_I(dir)->ip_blkno, index); | ||
1172 | |||
1173 | /* | ||
1174 | * Add the index leaf into the journal before removing the | ||
1175 | * unindexed entry. If we get an error return from | ||
1176 | * __ocfs2_delete_entry(), then it hasn't removed the entry | ||
1177 | * yet. Likewise, successful return means we *must* remove the | ||
1178 | * indexed entry. | ||
1179 | */ | ||
1180 | ret = ocfs2_journal_access_dl(handle, dir, lookup->dl_dx_leaf_bh, | ||
1181 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1182 | if (ret) { | ||
1183 | mlog_errno(ret); | ||
1184 | goto out; | ||
1185 | } | ||
1186 | |||
1187 | ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry, | ||
1188 | leaf_bh, leaf_bh->b_data, leaf_bh->b_size); | ||
1189 | if (ret) { | ||
1190 | mlog_errno(ret); | ||
1191 | goto out; | ||
1192 | } | ||
1193 | |||
1194 | ocfs2_dx_leaf_remove_entry(dx_leaf, index); | ||
1195 | |||
1196 | ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh); | ||
1197 | |||
1198 | out: | ||
1199 | return ret; | ||
1200 | } | ||
1201 | |||
605 | static inline int ocfs2_delete_entry_id(handle_t *handle, | 1202 | static inline int ocfs2_delete_entry_id(handle_t *handle, |
606 | struct inode *dir, | 1203 | struct inode *dir, |
607 | struct ocfs2_dir_entry *de_del, | 1204 | struct ocfs2_dir_entry *de_del, |
@@ -639,13 +1236,16 @@ static inline int ocfs2_delete_entry_el(handle_t *handle, | |||
639 | } | 1236 | } |
640 | 1237 | ||
641 | /* | 1238 | /* |
642 | * ocfs2_delete_entry deletes a directory entry by merging it with the | 1239 | * Delete a directory entry. Hide the details of directory |
643 | * previous entry | 1240 | * implementation from the caller. |
644 | */ | 1241 | */ |
645 | int ocfs2_delete_entry(handle_t *handle, | 1242 | int ocfs2_delete_entry(handle_t *handle, |
646 | struct inode *dir, | 1243 | struct inode *dir, |
647 | struct ocfs2_dir_lookup_result *res) | 1244 | struct ocfs2_dir_lookup_result *res) |
648 | { | 1245 | { |
1246 | if (ocfs2_dir_indexed(dir)) | ||
1247 | return ocfs2_delete_entry_dx(handle, dir, res); | ||
1248 | |||
649 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 1249 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
650 | return ocfs2_delete_entry_id(handle, dir, res->dl_entry, | 1250 | return ocfs2_delete_entry_id(handle, dir, res->dl_entry, |
651 | res->dl_leaf_bh); | 1251 | res->dl_leaf_bh); |
@@ -679,6 +1279,58 @@ static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de, | |||
679 | return 0; | 1279 | return 0; |
680 | } | 1280 | } |
681 | 1281 | ||
1282 | static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf, | ||
1283 | struct ocfs2_dx_entry *dx_new_entry) | ||
1284 | { | ||
1285 | int i; | ||
1286 | |||
1287 | i = le16_to_cpu(dx_leaf->dl_list.de_num_used); | ||
1288 | dx_leaf->dl_list.de_entries[i] = *dx_new_entry; | ||
1289 | |||
1290 | le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1); | ||
1291 | } | ||
1292 | |||
1293 | static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle, | ||
1294 | struct ocfs2_dx_hinfo *hinfo, | ||
1295 | u64 dirent_blk, | ||
1296 | struct buffer_head *dx_leaf_bh) | ||
1297 | { | ||
1298 | int ret, i; | ||
1299 | struct ocfs2_dx_entry *dx_entry; | ||
1300 | struct ocfs2_dx_leaf *dx_leaf; | ||
1301 | |||
1302 | ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, | ||
1303 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1304 | if (ret) { | ||
1305 | mlog_errno(ret); | ||
1306 | goto out; | ||
1307 | } | ||
1308 | |||
1309 | dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; | ||
1310 | i = le16_to_cpu(dx_leaf->dl_list.de_num_used); | ||
1311 | dx_entry = &dx_leaf->dl_list.de_entries[i]; | ||
1312 | |||
1313 | memset(dx_entry, 0, sizeof(*dx_entry)); | ||
1314 | dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash); | ||
1315 | dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash); | ||
1316 | dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk); | ||
1317 | |||
1318 | le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1); | ||
1319 | |||
1320 | ocfs2_journal_dirty(handle, dx_leaf_bh); | ||
1321 | |||
1322 | out: | ||
1323 | return ret; | ||
1324 | } | ||
1325 | |||
1326 | static int ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle, | ||
1327 | struct ocfs2_dir_lookup_result *lookup) | ||
1328 | { | ||
1329 | return __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo, | ||
1330 | lookup->dl_leaf_bh->b_blocknr, | ||
1331 | lookup->dl_dx_leaf_bh); | ||
1332 | } | ||
1333 | |||
682 | /* we don't always have a dentry for what we want to add, so people | 1334 | /* we don't always have a dentry for what we want to add, so people |
683 | * like orphan dir can call this instead. | 1335 | * like orphan dir can call this instead. |
684 | * | 1336 | * |
@@ -754,10 +1406,21 @@ int __ocfs2_add_entry(handle_t *handle, | |||
754 | status = ocfs2_journal_access_di(handle, dir, | 1406 | status = ocfs2_journal_access_di(handle, dir, |
755 | insert_bh, | 1407 | insert_bh, |
756 | OCFS2_JOURNAL_ACCESS_WRITE); | 1408 | OCFS2_JOURNAL_ACCESS_WRITE); |
757 | else | 1409 | else { |
758 | status = ocfs2_journal_access_db(handle, dir, | 1410 | status = ocfs2_journal_access_db(handle, dir, |
759 | insert_bh, | 1411 | insert_bh, |
760 | OCFS2_JOURNAL_ACCESS_WRITE); | 1412 | OCFS2_JOURNAL_ACCESS_WRITE); |
1413 | if (ocfs2_dir_indexed(dir)) { | ||
1414 | status = ocfs2_dx_dir_leaf_insert(dir, | ||
1415 | handle, | ||
1416 | lookup); | ||
1417 | if (status) { | ||
1418 | mlog_errno(status); | ||
1419 | goto bail; | ||
1420 | } | ||
1421 | } | ||
1422 | } | ||
1423 | |||
761 | /* By now the buffer is marked for journaling */ | 1424 | /* By now the buffer is marked for journaling */ |
762 | offset += le16_to_cpu(de->rec_len); | 1425 | offset += le16_to_cpu(de->rec_len); |
763 | if (le64_to_cpu(de->inode)) { | 1426 | if (le64_to_cpu(de->inode)) { |
@@ -887,6 +1550,10 @@ out: | |||
887 | return 0; | 1550 | return 0; |
888 | } | 1551 | } |
889 | 1552 | ||
1553 | /* | ||
1554 | * NOTE: This function can be called against unindexed directories, | ||
1555 | * and indexed ones. | ||
1556 | */ | ||
890 | static int ocfs2_dir_foreach_blk_el(struct inode *inode, | 1557 | static int ocfs2_dir_foreach_blk_el(struct inode *inode, |
891 | u64 *f_version, | 1558 | u64 *f_version, |
892 | loff_t *f_pos, void *priv, | 1559 | loff_t *f_pos, void *priv, |
@@ -1184,6 +1851,8 @@ static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, | |||
1184 | * routine to check that the specified directory is empty (for rmdir) | 1851 | * routine to check that the specified directory is empty (for rmdir) |
1185 | * | 1852 | * |
1186 | * Returns 1 if dir is empty, zero otherwise. | 1853 | * Returns 1 if dir is empty, zero otherwise. |
1854 | * | ||
1855 | * XXX: This is a performance problem | ||
1187 | */ | 1856 | */ |
1188 | int ocfs2_empty_dir(struct inode *inode) | 1857 | int ocfs2_empty_dir(struct inode *inode) |
1189 | { | 1858 | { |
@@ -1285,7 +1954,8 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1285 | struct inode *parent, | 1954 | struct inode *parent, |
1286 | struct inode *inode, | 1955 | struct inode *inode, |
1287 | struct buffer_head *fe_bh, | 1956 | struct buffer_head *fe_bh, |
1288 | struct ocfs2_alloc_context *data_ac) | 1957 | struct ocfs2_alloc_context *data_ac, |
1958 | struct buffer_head **ret_new_bh) | ||
1289 | { | 1959 | { |
1290 | int status; | 1960 | int status; |
1291 | unsigned int size = osb->sb->s_blocksize; | 1961 | unsigned int size = osb->sb->s_blocksize; |
@@ -1334,6 +2004,10 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1334 | } | 2004 | } |
1335 | 2005 | ||
1336 | status = 0; | 2006 | status = 0; |
2007 | if (ret_new_bh) { | ||
2008 | *ret_new_bh = new_bh; | ||
2009 | new_bh = NULL; | ||
2010 | } | ||
1337 | bail: | 2011 | bail: |
1338 | brelse(new_bh); | 2012 | brelse(new_bh); |
1339 | 2013 | ||
@@ -1341,20 +2015,382 @@ bail: | |||
1341 | return status; | 2015 | return status; |
1342 | } | 2016 | } |
1343 | 2017 | ||
2018 | static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, | ||
2019 | handle_t *handle, struct inode *dir, | ||
2020 | struct buffer_head *di_bh, | ||
2021 | struct ocfs2_alloc_context *meta_ac, | ||
2022 | struct buffer_head **ret_dx_root_bh) | ||
2023 | { | ||
2024 | int ret; | ||
2025 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | ||
2026 | u16 dr_suballoc_bit; | ||
2027 | u64 dr_blkno; | ||
2028 | unsigned int num_bits; | ||
2029 | struct buffer_head *dx_root_bh = NULL; | ||
2030 | struct ocfs2_dx_root_block *dx_root; | ||
2031 | |||
2032 | ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit, | ||
2033 | &num_bits, &dr_blkno); | ||
2034 | if (ret) { | ||
2035 | mlog_errno(ret); | ||
2036 | goto out; | ||
2037 | } | ||
2038 | |||
2039 | mlog(0, "Dir %llu, attach new index block: %llu\n", | ||
2040 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
2041 | (unsigned long long)dr_blkno); | ||
2042 | |||
2043 | dx_root_bh = sb_getblk(osb->sb, dr_blkno); | ||
2044 | if (dx_root_bh == NULL) { | ||
2045 | ret = -EIO; | ||
2046 | goto out; | ||
2047 | } | ||
2048 | ocfs2_set_new_buffer_uptodate(dir, dx_root_bh); | ||
2049 | |||
2050 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | ||
2051 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2052 | if (ret < 0) { | ||
2053 | mlog_errno(ret); | ||
2054 | goto out; | ||
2055 | } | ||
2056 | |||
2057 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
2058 | memset(dx_root, 0, osb->sb->s_blocksize); | ||
2059 | strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE); | ||
2060 | dx_root->dr_suballoc_slot = cpu_to_le16(osb->slot_num); | ||
2061 | dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit); | ||
2062 | dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); | ||
2063 | dx_root->dr_blkno = cpu_to_le64(dr_blkno); | ||
2064 | dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno); | ||
2065 | dx_root->dr_list.l_count = | ||
2066 | cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb)); | ||
2067 | |||
2068 | ret = ocfs2_journal_dirty(handle, dx_root_bh); | ||
2069 | if (ret) | ||
2070 | mlog_errno(ret); | ||
2071 | |||
2072 | ret = ocfs2_journal_access_di(handle, dir, di_bh, | ||
2073 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2074 | if (ret) { | ||
2075 | mlog_errno(ret); | ||
2076 | goto out; | ||
2077 | } | ||
2078 | |||
2079 | di->i_dx_root = cpu_to_le64(dr_blkno); | ||
2080 | |||
2081 | OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; | ||
2082 | di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); | ||
2083 | |||
2084 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
2085 | if (ret) | ||
2086 | mlog_errno(ret); | ||
2087 | |||
2088 | *ret_dx_root_bh = dx_root_bh; | ||
2089 | dx_root_bh = NULL; | ||
2090 | |||
2091 | out: | ||
2092 | brelse(dx_root_bh); | ||
2093 | return ret; | ||
2094 | } | ||
2095 | |||
2096 | static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb, | ||
2097 | handle_t *handle, struct inode *dir, | ||
2098 | struct buffer_head **dx_leaves, | ||
2099 | int num_dx_leaves, u64 start_blk) | ||
2100 | { | ||
2101 | int ret, i; | ||
2102 | struct ocfs2_dx_leaf *dx_leaf; | ||
2103 | struct buffer_head *bh; | ||
2104 | |||
2105 | for (i = 0; i < num_dx_leaves; i++) { | ||
2106 | bh = sb_getblk(osb->sb, start_blk + i); | ||
2107 | if (bh == NULL) { | ||
2108 | ret = -EIO; | ||
2109 | goto out; | ||
2110 | } | ||
2111 | dx_leaves[i] = bh; | ||
2112 | |||
2113 | ocfs2_set_new_buffer_uptodate(dir, bh); | ||
2114 | |||
2115 | ret = ocfs2_journal_access_dl(handle, dir, bh, | ||
2116 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2117 | if (ret < 0) { | ||
2118 | mlog_errno(ret); | ||
2119 | goto out; | ||
2120 | } | ||
2121 | |||
2122 | dx_leaf = (struct ocfs2_dx_leaf *) bh->b_data; | ||
2123 | |||
2124 | memset(dx_leaf, 0, osb->sb->s_blocksize); | ||
2125 | strcpy(dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE); | ||
2126 | dx_leaf->dl_fs_generation = cpu_to_le32(osb->fs_generation); | ||
2127 | dx_leaf->dl_blkno = cpu_to_le64(bh->b_blocknr); | ||
2128 | dx_leaf->dl_list.de_count = | ||
2129 | cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb)); | ||
2130 | |||
2131 | mlog(0, | ||
2132 | "Dir %llu, format dx_leaf: %llu, entry count: %u\n", | ||
2133 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
2134 | (unsigned long long)bh->b_blocknr, | ||
2135 | le16_to_cpu(dx_leaf->dl_list.de_count)); | ||
2136 | |||
2137 | ocfs2_journal_dirty(handle, bh); | ||
2138 | } | ||
2139 | |||
2140 | ret = 0; | ||
2141 | out: | ||
2142 | return ret; | ||
2143 | } | ||
2144 | |||
2145 | /* | ||
2146 | * Allocates and formats a new cluster for use in an indexed dir | ||
2147 | * leaf. This version will not do the extent insert, so that it can be | ||
2148 | * used by operations which need careful ordering. | ||
2149 | */ | ||
2150 | static int __ocfs2_dx_dir_new_cluster(struct inode *dir, | ||
2151 | u32 cpos, handle_t *handle, | ||
2152 | struct ocfs2_alloc_context *data_ac, | ||
2153 | struct buffer_head **dx_leaves, | ||
2154 | int num_dx_leaves, u64 *ret_phys_blkno) | ||
2155 | { | ||
2156 | int ret; | ||
2157 | u32 phys, num; | ||
2158 | u64 phys_blkno; | ||
2159 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2160 | |||
2161 | /* | ||
2162 | * XXX: For create, this should claim cluster for the index | ||
2163 | * *before* the unindexed insert so that we have a better | ||
2164 | * chance of contiguousness as the directory grows in number | ||
2165 | * of entries. | ||
2166 | */ | ||
2167 | ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 1, &phys, &num); | ||
2168 | if (ret) { | ||
2169 | mlog_errno(ret); | ||
2170 | goto out; | ||
2171 | } | ||
2172 | |||
2173 | /* | ||
2174 | * Format the new cluster first. That way, we're inserting | ||
2175 | * valid data. | ||
2176 | */ | ||
2177 | phys_blkno = ocfs2_clusters_to_blocks(osb->sb, phys); | ||
2178 | ret = ocfs2_dx_dir_format_cluster(osb, handle, dir, dx_leaves, | ||
2179 | num_dx_leaves, phys_blkno); | ||
2180 | if (ret) { | ||
2181 | mlog_errno(ret); | ||
2182 | goto out; | ||
2183 | } | ||
2184 | |||
2185 | *ret_phys_blkno = phys_blkno; | ||
2186 | out: | ||
2187 | return ret; | ||
2188 | } | ||
2189 | |||
2190 | static int ocfs2_dx_dir_new_cluster(struct inode *dir, | ||
2191 | struct ocfs2_extent_tree *et, | ||
2192 | u32 cpos, handle_t *handle, | ||
2193 | struct ocfs2_alloc_context *data_ac, | ||
2194 | struct ocfs2_alloc_context *meta_ac, | ||
2195 | struct buffer_head **dx_leaves, | ||
2196 | int num_dx_leaves) | ||
2197 | { | ||
2198 | int ret; | ||
2199 | u64 phys_blkno; | ||
2200 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2201 | |||
2202 | ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves, | ||
2203 | num_dx_leaves, &phys_blkno); | ||
2204 | if (ret) { | ||
2205 | mlog_errno(ret); | ||
2206 | goto out; | ||
2207 | } | ||
2208 | |||
2209 | ret = ocfs2_insert_extent(osb, handle, dir, et, cpos, phys_blkno, 1, 0, | ||
2210 | meta_ac); | ||
2211 | if (ret) | ||
2212 | mlog_errno(ret); | ||
2213 | out: | ||
2214 | return ret; | ||
2215 | } | ||
2216 | |||
2217 | static struct buffer_head **ocfs2_dx_dir_kmalloc_leaves(struct super_block *sb, | ||
2218 | int *ret_num_leaves) | ||
2219 | { | ||
2220 | int num_dx_leaves = ocfs2_clusters_to_blocks(sb, 1); | ||
2221 | struct buffer_head **dx_leaves; | ||
2222 | |||
2223 | dx_leaves = kcalloc(num_dx_leaves, sizeof(struct buffer_head *), | ||
2224 | GFP_NOFS); | ||
2225 | if (dx_leaves && ret_num_leaves) | ||
2226 | *ret_num_leaves = num_dx_leaves; | ||
2227 | |||
2228 | return dx_leaves; | ||
2229 | } | ||
2230 | |||
2231 | static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb, | ||
2232 | handle_t *handle, | ||
2233 | struct inode *parent, | ||
2234 | struct inode *inode, | ||
2235 | struct buffer_head *di_bh, | ||
2236 | struct ocfs2_alloc_context *data_ac, | ||
2237 | struct ocfs2_alloc_context *meta_ac) | ||
2238 | { | ||
2239 | int ret, num_dx_leaves, i; | ||
2240 | struct buffer_head *leaf_bh = NULL; | ||
2241 | struct buffer_head *dx_root_bh = NULL; | ||
2242 | struct buffer_head **dx_leaves = NULL; | ||
2243 | struct ocfs2_extent_tree et; | ||
2244 | struct ocfs2_dx_hinfo hinfo; | ||
2245 | u64 insert_blkno; | ||
2246 | |||
2247 | dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves); | ||
2248 | if (!dx_leaves) { | ||
2249 | ret = -ENOMEM; | ||
2250 | mlog_errno(ret); | ||
2251 | goto out; | ||
2252 | } | ||
2253 | |||
2254 | /* | ||
2255 | * Our strategy is to create the directory as though it were | ||
2256 | * unindexed, then add the index block. This works with very | ||
2257 | * little complication since the state of a new directory is a | ||
2258 | * very well known quantity. | ||
2259 | * | ||
2260 | * Essentially, we have two dirents ("." and ".."), in the 1st | ||
2261 | * block which need indexing. | ||
2262 | */ | ||
2263 | |||
2264 | ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, di_bh, | ||
2265 | data_ac, &leaf_bh); | ||
2266 | if (ret) { | ||
2267 | mlog_errno(ret); | ||
2268 | goto out; | ||
2269 | } | ||
2270 | |||
2271 | /* | ||
2272 | * Allocate and format the index leaf first, before attaching | ||
2273 | * the index root. That way we're sure that the main bitmap | ||
2274 | * won't -enospc on us with a half-created dir index. | ||
2275 | * | ||
2276 | * The meta data allocation for our index block will not | ||
2277 | * -enospc on us unless there is a disk corruption. | ||
2278 | */ | ||
2279 | |||
2280 | ret = __ocfs2_dx_dir_new_cluster(inode, 0, handle, data_ac, dx_leaves, | ||
2281 | num_dx_leaves, &insert_blkno); | ||
2282 | if (ret) { | ||
2283 | mlog_errno(ret); | ||
2284 | goto out; | ||
2285 | } | ||
2286 | |||
2287 | ocfs2_dx_dir_name_hash(inode, ".", 1, &hinfo); | ||
2288 | i = ocfs2_dx_dir_hash_idx(osb, &hinfo); | ||
2289 | ret = __ocfs2_dx_dir_leaf_insert(inode, handle, &hinfo, | ||
2290 | leaf_bh->b_blocknr, dx_leaves[i]); | ||
2291 | if (ret) { | ||
2292 | mlog_errno(ret); | ||
2293 | goto out; | ||
2294 | } | ||
2295 | |||
2296 | ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo); | ||
2297 | i = ocfs2_dx_dir_hash_idx(osb, &hinfo); | ||
2298 | ret = __ocfs2_dx_dir_leaf_insert(inode, handle, &hinfo, | ||
2299 | leaf_bh->b_blocknr, dx_leaves[i]); | ||
2300 | if (ret) { | ||
2301 | mlog_errno(ret); | ||
2302 | goto out; | ||
2303 | } | ||
2304 | |||
2305 | ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, meta_ac, | ||
2306 | &dx_root_bh); | ||
2307 | if (ret) { | ||
2308 | mlog_errno(ret); | ||
2309 | goto out; | ||
2310 | } | ||
2311 | |||
2312 | /* This should never fail considering we start with an empty | ||
2313 | * dx_root. */ | ||
2314 | ocfs2_init_dx_root_extent_tree(&et, inode, dx_root_bh); | ||
2315 | ret = ocfs2_insert_extent(osb, handle, inode, &et, 0, | ||
2316 | insert_blkno, 1, 0, NULL); | ||
2317 | if (ret) | ||
2318 | mlog_errno(ret); | ||
2319 | |||
2320 | out: | ||
2321 | if (dx_leaves) { | ||
2322 | for (i = 0; i < num_dx_leaves; i++) | ||
2323 | brelse(dx_leaves[i]); | ||
2324 | kfree(dx_leaves); | ||
2325 | } | ||
2326 | brelse(dx_root_bh); | ||
2327 | brelse(leaf_bh); | ||
2328 | return ret; | ||
2329 | } | ||
2330 | |||
1344 | int ocfs2_fill_new_dir(struct ocfs2_super *osb, | 2331 | int ocfs2_fill_new_dir(struct ocfs2_super *osb, |
1345 | handle_t *handle, | 2332 | handle_t *handle, |
1346 | struct inode *parent, | 2333 | struct inode *parent, |
1347 | struct inode *inode, | 2334 | struct inode *inode, |
1348 | struct buffer_head *fe_bh, | 2335 | struct buffer_head *fe_bh, |
1349 | struct ocfs2_alloc_context *data_ac) | 2336 | struct ocfs2_alloc_context *data_ac, |
2337 | struct ocfs2_alloc_context *meta_ac) | ||
2338 | |||
1350 | { | 2339 | { |
1351 | BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL); | 2340 | BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL); |
1352 | 2341 | ||
1353 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 2342 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
1354 | return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh); | 2343 | return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh); |
1355 | 2344 | ||
2345 | if (ocfs2_supports_indexed_dirs(osb)) | ||
2346 | return ocfs2_fill_new_dir_dx(osb, handle, parent, inode, fe_bh, | ||
2347 | data_ac, meta_ac); | ||
2348 | |||
1356 | return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh, | 2349 | return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh, |
1357 | data_ac); | 2350 | data_ac, NULL); |
2351 | } | ||
2352 | |||
2353 | static int ocfs2_dx_dir_index_block(struct inode *dir, | ||
2354 | handle_t *handle, | ||
2355 | struct buffer_head **dx_leaves, | ||
2356 | int num_dx_leaves, | ||
2357 | struct buffer_head *dirent_bh) | ||
2358 | { | ||
2359 | int ret, namelen, i; | ||
2360 | char *de_buf, *limit; | ||
2361 | struct ocfs2_dir_entry *de; | ||
2362 | struct buffer_head *dx_leaf_bh; | ||
2363 | struct ocfs2_dx_hinfo hinfo; | ||
2364 | u64 dirent_blk = dirent_bh->b_blocknr; | ||
2365 | |||
2366 | de_buf = dirent_bh->b_data; | ||
2367 | limit = de_buf + dir->i_sb->s_blocksize; | ||
2368 | |||
2369 | while (de_buf < limit) { | ||
2370 | de = (struct ocfs2_dir_entry *)de_buf; | ||
2371 | |||
2372 | namelen = de->name_len; | ||
2373 | if (!namelen || !de->inode) | ||
2374 | goto inc; | ||
2375 | |||
2376 | ocfs2_dx_dir_name_hash(dir, de->name, namelen, &hinfo); | ||
2377 | |||
2378 | i = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), &hinfo); | ||
2379 | dx_leaf_bh = dx_leaves[i]; | ||
2380 | |||
2381 | ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &hinfo, | ||
2382 | dirent_blk, dx_leaf_bh); | ||
2383 | if (ret) { | ||
2384 | mlog_errno(ret); | ||
2385 | goto out; | ||
2386 | } | ||
2387 | |||
2388 | inc: | ||
2389 | de_buf += le16_to_cpu(de->rec_len); | ||
2390 | } | ||
2391 | |||
2392 | out: | ||
2393 | return ret; | ||
1358 | } | 2394 | } |
1359 | 2395 | ||
1360 | /* | 2396 | /* |
@@ -1401,29 +2437,57 @@ static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, | |||
1401 | */ | 2437 | */ |
1402 | static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | 2438 | static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, |
1403 | unsigned int blocks_wanted, | 2439 | unsigned int blocks_wanted, |
2440 | struct ocfs2_dir_lookup_result *lookup, | ||
1404 | struct buffer_head **first_block_bh) | 2441 | struct buffer_head **first_block_bh) |
1405 | { | 2442 | { |
1406 | u32 alloc, bit_off, len; | 2443 | u32 alloc, dx_alloc, bit_off, len; |
1407 | struct super_block *sb = dir->i_sb; | 2444 | struct super_block *sb = dir->i_sb; |
1408 | int ret, credits = ocfs2_inline_to_extents_credits(sb); | 2445 | int ret, i, num_dx_leaves = 0, |
1409 | u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits; | 2446 | credits = ocfs2_inline_to_extents_credits(sb); |
2447 | u64 dx_insert_blkno, blkno, | ||
2448 | bytes = blocks_wanted << sb->s_blocksize_bits; | ||
1410 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | 2449 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
1411 | struct ocfs2_inode_info *oi = OCFS2_I(dir); | 2450 | struct ocfs2_inode_info *oi = OCFS2_I(dir); |
1412 | struct ocfs2_alloc_context *data_ac; | 2451 | struct ocfs2_alloc_context *data_ac; |
2452 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
1413 | struct buffer_head *dirdata_bh = NULL; | 2453 | struct buffer_head *dirdata_bh = NULL; |
2454 | struct buffer_head *dx_root_bh = NULL; | ||
2455 | struct buffer_head **dx_leaves = NULL; | ||
1414 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 2456 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
1415 | handle_t *handle; | 2457 | handle_t *handle; |
1416 | struct ocfs2_extent_tree et; | 2458 | struct ocfs2_extent_tree et; |
1417 | int did_quota = 0; | 2459 | struct ocfs2_extent_tree dx_et; |
2460 | int did_quota = 0, bytes_allocated = 0; | ||
1418 | 2461 | ||
1419 | ocfs2_init_dinode_extent_tree(&et, dir, di_bh); | 2462 | ocfs2_init_dinode_extent_tree(&et, dir, di_bh); |
1420 | 2463 | ||
1421 | alloc = ocfs2_clusters_for_bytes(sb, bytes); | 2464 | alloc = ocfs2_clusters_for_bytes(sb, bytes); |
2465 | dx_alloc = 0; | ||
2466 | |||
2467 | if (ocfs2_supports_indexed_dirs(osb)) { | ||
2468 | /* Add one more cluster for an index leaf */ | ||
2469 | dx_alloc++; | ||
2470 | credits += ocfs2_add_dir_index_credits(sb); | ||
2471 | |||
2472 | dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb, &num_dx_leaves); | ||
2473 | if (!dx_leaves) { | ||
2474 | ret = -ENOMEM; | ||
2475 | mlog_errno(ret); | ||
2476 | goto out; | ||
2477 | } | ||
2478 | |||
2479 | /* This gets us the dx_root */ | ||
2480 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); | ||
2481 | if (ret) { | ||
2482 | mlog_errno(ret); | ||
2483 | goto out; | ||
2484 | } | ||
2485 | } | ||
1422 | 2486 | ||
1423 | /* | 2487 | /* |
1424 | * We should never need more than 2 clusters for this - | 2488 | * We should never need more than 2 clusters for the unindexed |
1425 | * maximum dirent size is far less than one block. In fact, | 2489 | * tree - maximum dirent size is far less than one block. In |
1426 | * the only time we'd need more than one cluster is if | 2490 | * fact, the only time we'd need more than one cluster is if |
1427 | * blocksize == clustersize and the dirent won't fit in the | 2491 | * blocksize == clustersize and the dirent won't fit in the |
1428 | * extra space that the expansion to a single block gives. As | 2492 | * extra space that the expansion to a single block gives. As |
1429 | * of today, that only happens on 4k/4k file systems. | 2493 | * of today, that only happens on 4k/4k file systems. |
@@ -1440,7 +2504,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1440 | 2504 | ||
1441 | /* | 2505 | /* |
1442 | * Prepare for worst case allocation scenario of two separate | 2506 | * Prepare for worst case allocation scenario of two separate |
1443 | * extents. | 2507 | * extents in the unindexed tree. |
1444 | */ | 2508 | */ |
1445 | if (alloc == 2) | 2509 | if (alloc == 2) |
1446 | credits += OCFS2_SUBALLOC_ALLOC; | 2510 | credits += OCFS2_SUBALLOC_ALLOC; |
@@ -1453,11 +2517,29 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1453 | } | 2517 | } |
1454 | 2518 | ||
1455 | if (vfs_dq_alloc_space_nodirty(dir, | 2519 | if (vfs_dq_alloc_space_nodirty(dir, |
1456 | ocfs2_clusters_to_bytes(osb->sb, alloc))) { | 2520 | ocfs2_clusters_to_bytes(osb->sb, |
2521 | alloc + dx_alloc))) { | ||
1457 | ret = -EDQUOT; | 2522 | ret = -EDQUOT; |
1458 | goto out_commit; | 2523 | goto out_commit; |
1459 | } | 2524 | } |
1460 | did_quota = 1; | 2525 | did_quota = 1; |
2526 | |||
2527 | if (ocfs2_supports_indexed_dirs(osb)) { | ||
2528 | /* | ||
2529 | * Allocate our index cluster first, to maximize the | ||
2530 | * possibility that unindexed leaves grow | ||
2531 | * contiguously. | ||
2532 | */ | ||
2533 | ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, | ||
2534 | dx_leaves, num_dx_leaves, | ||
2535 | &dx_insert_blkno); | ||
2536 | if (ret) { | ||
2537 | mlog_errno(ret); | ||
2538 | goto out_commit; | ||
2539 | } | ||
2540 | bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1); | ||
2541 | } | ||
2542 | |||
1461 | /* | 2543 | /* |
1462 | * Try to claim as many clusters as the bitmap can give though | 2544 | * Try to claim as many clusters as the bitmap can give though |
1463 | * if we only get one now, that's enough to continue. The rest | 2545 | * if we only get one now, that's enough to continue. The rest |
@@ -1468,6 +2550,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1468 | mlog_errno(ret); | 2550 | mlog_errno(ret); |
1469 | goto out_commit; | 2551 | goto out_commit; |
1470 | } | 2552 | } |
2553 | bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1); | ||
1471 | 2554 | ||
1472 | /* | 2555 | /* |
1473 | * Operations are carefully ordered so that we set up the new | 2556 | * Operations are carefully ordered so that we set up the new |
@@ -1504,6 +2587,15 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1504 | goto out_commit; | 2587 | goto out_commit; |
1505 | } | 2588 | } |
1506 | 2589 | ||
2590 | if (ocfs2_supports_indexed_dirs(osb)) { | ||
2591 | ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves, | ||
2592 | num_dx_leaves, dirdata_bh); | ||
2593 | if (ret) { | ||
2594 | mlog_errno(ret); | ||
2595 | goto out_commit; | ||
2596 | } | ||
2597 | } | ||
2598 | |||
1507 | /* | 2599 | /* |
1508 | * Set extent, i_size, etc on the directory. After this, the | 2600 | * Set extent, i_size, etc on the directory. After this, the |
1509 | * inode should contain the same exact dirents as before and | 2601 | * inode should contain the same exact dirents as before and |
@@ -1556,6 +2648,21 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1556 | goto out_commit; | 2648 | goto out_commit; |
1557 | } | 2649 | } |
1558 | 2650 | ||
2651 | if (ocfs2_supports_indexed_dirs(osb)) { | ||
2652 | ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh, | ||
2653 | meta_ac, &dx_root_bh); | ||
2654 | if (ret) { | ||
2655 | mlog_errno(ret); | ||
2656 | goto out_commit; | ||
2657 | } | ||
2658 | |||
2659 | ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh); | ||
2660 | ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0, | ||
2661 | dx_insert_blkno, 1, 0, NULL); | ||
2662 | if (ret) | ||
2663 | mlog_errno(ret); | ||
2664 | } | ||
2665 | |||
1559 | /* | 2666 | /* |
1560 | * We asked for two clusters, but only got one in the 1st | 2667 | * We asked for two clusters, but only got one in the 1st |
1561 | * pass. Claim the 2nd cluster as a separate extent. | 2668 | * pass. Claim the 2nd cluster as a separate extent. |
@@ -1575,15 +2682,28 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1575 | mlog_errno(ret); | 2682 | mlog_errno(ret); |
1576 | goto out_commit; | 2683 | goto out_commit; |
1577 | } | 2684 | } |
2685 | bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1); | ||
1578 | } | 2686 | } |
1579 | 2687 | ||
1580 | *first_block_bh = dirdata_bh; | 2688 | *first_block_bh = dirdata_bh; |
1581 | dirdata_bh = NULL; | 2689 | dirdata_bh = NULL; |
2690 | if (ocfs2_supports_indexed_dirs(osb)) { | ||
2691 | unsigned int off; | ||
2692 | |||
2693 | /* | ||
2694 | * We need to return the correct block within the | ||
2695 | * cluster which should hold our entry. | ||
2696 | */ | ||
2697 | off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), | ||
2698 | &lookup->dl_hinfo); | ||
2699 | get_bh(dx_leaves[off]); | ||
2700 | lookup->dl_dx_leaf_bh = dx_leaves[off]; | ||
2701 | } | ||
1582 | 2702 | ||
1583 | out_commit: | 2703 | out_commit: |
1584 | if (ret < 0 && did_quota) | 2704 | if (ret < 0 && did_quota) |
1585 | vfs_dq_free_space_nodirty(dir, | 2705 | vfs_dq_free_space_nodirty(dir, bytes_allocated); |
1586 | ocfs2_clusters_to_bytes(osb->sb, 2)); | 2706 | |
1587 | ocfs2_commit_trans(osb, handle); | 2707 | ocfs2_commit_trans(osb, handle); |
1588 | 2708 | ||
1589 | out_sem: | 2709 | out_sem: |
@@ -1592,8 +2712,17 @@ out_sem: | |||
1592 | out: | 2712 | out: |
1593 | if (data_ac) | 2713 | if (data_ac) |
1594 | ocfs2_free_alloc_context(data_ac); | 2714 | ocfs2_free_alloc_context(data_ac); |
2715 | if (meta_ac) | ||
2716 | ocfs2_free_alloc_context(meta_ac); | ||
2717 | |||
2718 | if (dx_leaves) { | ||
2719 | for (i = 0; i < num_dx_leaves; i++) | ||
2720 | brelse(dx_leaves[i]); | ||
2721 | kfree(dx_leaves); | ||
2722 | } | ||
1595 | 2723 | ||
1596 | brelse(dirdata_bh); | 2724 | brelse(dirdata_bh); |
2725 | brelse(dx_root_bh); | ||
1597 | 2726 | ||
1598 | return ret; | 2727 | return ret; |
1599 | } | 2728 | } |
@@ -1668,6 +2797,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1668 | struct inode *dir, | 2797 | struct inode *dir, |
1669 | struct buffer_head *parent_fe_bh, | 2798 | struct buffer_head *parent_fe_bh, |
1670 | unsigned int blocks_wanted, | 2799 | unsigned int blocks_wanted, |
2800 | struct ocfs2_dir_lookup_result *lookup, | ||
1671 | struct buffer_head **new_de_bh) | 2801 | struct buffer_head **new_de_bh) |
1672 | { | 2802 | { |
1673 | int status = 0; | 2803 | int status = 0; |
@@ -1687,7 +2817,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1687 | 2817 | ||
1688 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 2818 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
1689 | status = ocfs2_expand_inline_dir(dir, parent_fe_bh, | 2819 | status = ocfs2_expand_inline_dir(dir, parent_fe_bh, |
1690 | blocks_wanted, &new_bh); | 2820 | blocks_wanted, lookup, |
2821 | &new_bh); | ||
1691 | if (status) { | 2822 | if (status) { |
1692 | mlog_errno(status); | 2823 | mlog_errno(status); |
1693 | goto bail; | 2824 | goto bail; |
@@ -1975,6 +3106,487 @@ bail: | |||
1975 | return status; | 3106 | return status; |
1976 | } | 3107 | } |
1977 | 3108 | ||
3109 | static int dx_leaf_sort_cmp(const void *a, const void *b) | ||
3110 | { | ||
3111 | const struct ocfs2_dx_entry *entry1 = a; | ||
3112 | const struct ocfs2_dx_entry *entry2 = b; | ||
3113 | u32 major_hash1 = le32_to_cpu(entry1->dx_major_hash); | ||
3114 | u32 major_hash2 = le32_to_cpu(entry2->dx_major_hash); | ||
3115 | u32 minor_hash1 = le32_to_cpu(entry1->dx_minor_hash); | ||
3116 | u32 minor_hash2 = le32_to_cpu(entry2->dx_minor_hash); | ||
3117 | |||
3118 | if (major_hash1 > major_hash2) | ||
3119 | return 1; | ||
3120 | if (major_hash1 < major_hash2) | ||
3121 | return -1; | ||
3122 | |||
3123 | /* | ||
3124 | * It is not strictly necessary to sort by minor | ||
3125 | */ | ||
3126 | if (minor_hash1 > minor_hash2) | ||
3127 | return 1; | ||
3128 | if (minor_hash1 < minor_hash2) | ||
3129 | return -1; | ||
3130 | return 0; | ||
3131 | } | ||
3132 | |||
3133 | static void dx_leaf_sort_swap(void *a, void *b, int size) | ||
3134 | { | ||
3135 | struct ocfs2_dx_entry *entry1 = a; | ||
3136 | struct ocfs2_dx_entry *entry2 = b; | ||
3137 | struct ocfs2_dx_entry tmp; | ||
3138 | |||
3139 | BUG_ON(size != sizeof(*entry1)); | ||
3140 | |||
3141 | tmp = *entry1; | ||
3142 | *entry1 = *entry2; | ||
3143 | *entry2 = tmp; | ||
3144 | } | ||
3145 | |||
3146 | static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf) | ||
3147 | { | ||
3148 | struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; | ||
3149 | int i, num = le16_to_cpu(dl_list->de_num_used); | ||
3150 | |||
3151 | for (i = 0; i < (num - 1); i++) { | ||
3152 | if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) != | ||
3153 | le32_to_cpu(dl_list->de_entries[i + 1].dx_major_hash)) | ||
3154 | return 0; | ||
3155 | } | ||
3156 | |||
3157 | return 1; | ||
3158 | } | ||
3159 | |||
3160 | /* | ||
3161 | * Find the optimal value to split this leaf on. This expects the leaf | ||
3162 | * entries to be in sorted order. | ||
3163 | * | ||
3164 | * leaf_cpos is the cpos of the leaf we're splitting. insert_hash is | ||
3165 | * the hash we want to insert. | ||
3166 | * | ||
3167 | * This function is only concerned with the major hash - that which | ||
3168 | * determines which cluster an item belongs to. | ||
3169 | */ | ||
3170 | static int ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf *dx_leaf, | ||
3171 | u32 leaf_cpos, u32 insert_hash, | ||
3172 | u32 *split_hash) | ||
3173 | { | ||
3174 | struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; | ||
3175 | int i, num_used = le16_to_cpu(dl_list->de_num_used); | ||
3176 | int allsame; | ||
3177 | |||
3178 | /* | ||
3179 | * There's a couple rare, but nasty corner cases we have to | ||
3180 | * check for here. All of them involve a leaf where all value | ||
3181 | * have the same hash, which is what we look for first. | ||
3182 | * | ||
3183 | * Most of the time, all of the above is false, and we simply | ||
3184 | * pick the median value for a split. | ||
3185 | */ | ||
3186 | allsame = ocfs2_dx_leaf_same_major(dx_leaf); | ||
3187 | if (allsame) { | ||
3188 | u32 val = le32_to_cpu(dl_list->de_entries[0].dx_major_hash); | ||
3189 | |||
3190 | if (val == insert_hash) { | ||
3191 | /* | ||
3192 | * No matter where we would choose to split, | ||
3193 | * the new entry would want to occupy the same | ||
3194 | * block as these. Since there's no space left | ||
3195 | * in their existing block, we know there | ||
3196 | * won't be space after the split. | ||
3197 | */ | ||
3198 | return -ENOSPC; | ||
3199 | } | ||
3200 | |||
3201 | if (val == leaf_cpos) { | ||
3202 | /* | ||
3203 | * Because val is the same as leaf_cpos (which | ||
3204 | * is the smallest value this leaf can have), | ||
3205 | * yet is not equal to insert_hash, then we | ||
3206 | * know that insert_hash *must* be larger than | ||
3207 | * val (and leaf_cpos). At least cpos+1 in value. | ||
3208 | * | ||
3209 | * We also know then, that there cannot be an | ||
3210 | * adjacent extent (otherwise we'd be looking | ||
3211 | * at it). Choosing this value gives us a | ||
3212 | * chance to get some contiguousness. | ||
3213 | */ | ||
3214 | *split_hash = leaf_cpos + 1; | ||
3215 | return 0; | ||
3216 | } | ||
3217 | |||
3218 | if (val > insert_hash) { | ||
3219 | /* | ||
3220 | * val can not be the same as insert hash, and | ||
3221 | * also must be larger than leaf_cpos. Also, | ||
3222 | * we know that there can't be a leaf between | ||
3223 | * cpos and val, otherwise the entries with | ||
3224 | * hash 'val' would be there. | ||
3225 | */ | ||
3226 | *split_hash = val; | ||
3227 | return 0; | ||
3228 | } | ||
3229 | |||
3230 | *split_hash = insert_hash; | ||
3231 | return 0; | ||
3232 | } | ||
3233 | |||
3234 | /* | ||
3235 | * Since the records are sorted and the checks above | ||
3236 | * guaranteed that not all records in this block are the same, | ||
3237 | * we simple travel forward, from the median, and pick the 1st | ||
3238 | * record whose value is larger than leaf_cpos. | ||
3239 | */ | ||
3240 | for (i = (num_used / 2); i < num_used; i++) | ||
3241 | if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) > | ||
3242 | leaf_cpos) | ||
3243 | break; | ||
3244 | |||
3245 | BUG_ON(i == num_used); /* Should be impossible */ | ||
3246 | *split_hash = le32_to_cpu(dl_list->de_entries[i].dx_major_hash); | ||
3247 | return 0; | ||
3248 | } | ||
3249 | |||
3250 | /* | ||
3251 | * Transfer all entries in orig_dx_leaves whose major hash is equal to or | ||
3252 | * larger than split_hash into new_dx_leaves. We use a temporary | ||
3253 | * buffer (tmp_dx_leaf) to make the changes to the original leaf blocks. | ||
3254 | * | ||
3255 | * Since the block offset inside a leaf (cluster) is a constant mask | ||
3256 | * of minor_hash, we can optimize - an item at block offset X within | ||
3257 | * the original cluster, will be at offset X within the new cluster. | ||
3258 | */ | ||
3259 | static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash, | ||
3260 | handle_t *handle, | ||
3261 | struct ocfs2_dx_leaf *tmp_dx_leaf, | ||
3262 | struct buffer_head **orig_dx_leaves, | ||
3263 | struct buffer_head **new_dx_leaves, | ||
3264 | int num_dx_leaves) | ||
3265 | { | ||
3266 | int i, j, num_used; | ||
3267 | u32 major_hash; | ||
3268 | struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf; | ||
3269 | struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list; | ||
3270 | struct ocfs2_dx_entry *dx_entry; | ||
3271 | |||
3272 | tmp_list = &tmp_dx_leaf->dl_list; | ||
3273 | |||
3274 | for (i = 0; i < num_dx_leaves; i++) { | ||
3275 | orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data; | ||
3276 | orig_list = &orig_dx_leaf->dl_list; | ||
3277 | new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data; | ||
3278 | new_list = &new_dx_leaf->dl_list; | ||
3279 | |||
3280 | num_used = le16_to_cpu(orig_list->de_num_used); | ||
3281 | |||
3282 | memcpy(tmp_dx_leaf, orig_dx_leaf, dir->i_sb->s_blocksize); | ||
3283 | tmp_list->de_num_used = cpu_to_le16(0); | ||
3284 | memset(&tmp_list->de_entries, 0, sizeof(*dx_entry)*num_used); | ||
3285 | |||
3286 | for (j = 0; j < num_used; j++) { | ||
3287 | dx_entry = &orig_list->de_entries[j]; | ||
3288 | major_hash = le32_to_cpu(dx_entry->dx_major_hash); | ||
3289 | if (major_hash >= split_hash) | ||
3290 | ocfs2_dx_dir_leaf_insert_tail(new_dx_leaf, | ||
3291 | dx_entry); | ||
3292 | else | ||
3293 | ocfs2_dx_dir_leaf_insert_tail(tmp_dx_leaf, | ||
3294 | dx_entry); | ||
3295 | } | ||
3296 | memcpy(orig_dx_leaf, tmp_dx_leaf, dir->i_sb->s_blocksize); | ||
3297 | |||
3298 | ocfs2_journal_dirty(handle, orig_dx_leaves[i]); | ||
3299 | ocfs2_journal_dirty(handle, new_dx_leaves[i]); | ||
3300 | } | ||
3301 | } | ||
3302 | |||
3303 | static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb, | ||
3304 | struct ocfs2_dx_root_block *dx_root) | ||
3305 | { | ||
3306 | int credits = ocfs2_clusters_to_blocks(osb->sb, 2); | ||
3307 | |||
3308 | credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list, 1); | ||
3309 | credits += ocfs2_quota_trans_credits(osb->sb); | ||
3310 | return credits; | ||
3311 | } | ||
3312 | |||
3313 | /* | ||
3314 | * Find the median value in dx_leaf_bh and allocate a new leaf to move | ||
3315 | * half our entries into. | ||
3316 | */ | ||
3317 | static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | ||
3318 | struct buffer_head *dx_root_bh, | ||
3319 | struct buffer_head *dx_leaf_bh, | ||
3320 | struct ocfs2_dx_hinfo *hinfo, u32 leaf_cpos, | ||
3321 | u64 leaf_blkno) | ||
3322 | { | ||
3323 | struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; | ||
3324 | int credits, ret, i, num_used, did_quota = 0; | ||
3325 | u32 cpos, split_hash, insert_hash = hinfo->major_hash; | ||
3326 | u64 orig_leaves_start; | ||
3327 | int num_dx_leaves; | ||
3328 | struct buffer_head **orig_dx_leaves = NULL; | ||
3329 | struct buffer_head **new_dx_leaves = NULL; | ||
3330 | struct ocfs2_alloc_context *data_ac = NULL, *meta_ac = NULL; | ||
3331 | struct ocfs2_extent_tree et; | ||
3332 | handle_t *handle = NULL; | ||
3333 | struct ocfs2_dx_root_block *dx_root; | ||
3334 | struct ocfs2_dx_leaf *tmp_dx_leaf = NULL; | ||
3335 | |||
3336 | mlog(0, "DX Dir: %llu, rebalance leaf leaf_blkno: %llu insert: %u\n", | ||
3337 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
3338 | (unsigned long long)leaf_blkno, insert_hash); | ||
3339 | |||
3340 | ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); | ||
3341 | |||
3342 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
3343 | /* | ||
3344 | * XXX: This is a rather large limit. We should use a more | ||
3345 | * realistic value. | ||
3346 | */ | ||
3347 | if (le32_to_cpu(dx_root->dr_clusters) == UINT_MAX) | ||
3348 | return -ENOSPC; | ||
3349 | |||
3350 | num_used = le16_to_cpu(dx_leaf->dl_list.de_num_used); | ||
3351 | if (num_used < le16_to_cpu(dx_leaf->dl_list.de_count)) { | ||
3352 | mlog(ML_ERROR, "DX Dir: %llu, Asked to rebalance empty leaf: " | ||
3353 | "%llu, %d\n", (unsigned long long)OCFS2_I(dir)->ip_blkno, | ||
3354 | (unsigned long long)leaf_blkno, num_used); | ||
3355 | ret = -EIO; | ||
3356 | goto out; | ||
3357 | } | ||
3358 | |||
3359 | orig_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves); | ||
3360 | if (!orig_dx_leaves) { | ||
3361 | ret = -ENOMEM; | ||
3362 | mlog_errno(ret); | ||
3363 | goto out; | ||
3364 | } | ||
3365 | |||
3366 | new_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, NULL); | ||
3367 | if (!new_dx_leaves) { | ||
3368 | ret = -ENOMEM; | ||
3369 | mlog_errno(ret); | ||
3370 | goto out; | ||
3371 | } | ||
3372 | |||
3373 | ret = ocfs2_lock_allocators(dir, &et, 1, 0, &data_ac, &meta_ac); | ||
3374 | if (ret) { | ||
3375 | if (ret != -ENOSPC) | ||
3376 | mlog_errno(ret); | ||
3377 | goto out; | ||
3378 | } | ||
3379 | |||
3380 | credits = ocfs2_dx_dir_rebalance_credits(osb, dx_root); | ||
3381 | handle = ocfs2_start_trans(osb, credits); | ||
3382 | if (IS_ERR(handle)) { | ||
3383 | ret = PTR_ERR(handle); | ||
3384 | handle = NULL; | ||
3385 | mlog_errno(ret); | ||
3386 | goto out; | ||
3387 | } | ||
3388 | |||
3389 | if (vfs_dq_alloc_space_nodirty(dir, | ||
3390 | ocfs2_clusters_to_bytes(dir->i_sb, 1))) { | ||
3391 | ret = -EDQUOT; | ||
3392 | goto out_commit; | ||
3393 | } | ||
3394 | did_quota = 1; | ||
3395 | |||
3396 | ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, | ||
3397 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3398 | if (ret) { | ||
3399 | mlog_errno(ret); | ||
3400 | goto out_commit; | ||
3401 | } | ||
3402 | |||
3403 | /* | ||
3404 | * This block is changing anyway, so we can sort it in place. | ||
3405 | */ | ||
3406 | sort(dx_leaf->dl_list.de_entries, num_used, | ||
3407 | sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp, | ||
3408 | dx_leaf_sort_swap); | ||
3409 | |||
3410 | ret = ocfs2_journal_dirty(handle, dx_leaf_bh); | ||
3411 | if (ret) { | ||
3412 | mlog_errno(ret); | ||
3413 | goto out_commit; | ||
3414 | } | ||
3415 | |||
3416 | ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash, | ||
3417 | &split_hash); | ||
3418 | if (ret) { | ||
3419 | mlog_errno(ret); | ||
3420 | goto out_commit; | ||
3421 | } | ||
3422 | |||
3423 | mlog(0, "Split leaf (%u) at %u, insert major hash is %u\n", | ||
3424 | leaf_cpos, split_hash, insert_hash); | ||
3425 | |||
3426 | /* | ||
3427 | * We have to carefully order operations here. There are items | ||
3428 | * which want to be in the new cluster before insert, but in | ||
3429 | * order to put those items in the new cluster, we alter the | ||
3430 | * old cluster. A failure to insert gets nasty. | ||
3431 | * | ||
3432 | * So, start by reserving writes to the old | ||
3433 | * cluster. ocfs2_dx_dir_new_cluster will reserve writes on | ||
3434 | * the new cluster for us, before inserting it. The insert | ||
3435 | * won't happen if there's an error before that. Once the | ||
3436 | * insert is done then, we can transfer from one leaf into the | ||
3437 | * other without fear of hitting any error. | ||
3438 | */ | ||
3439 | |||
3440 | /* | ||
3441 | * The leaf transfer wants some scratch space so that we don't | ||
3442 | * wind up doing a bunch of expensive memmove(). | ||
3443 | */ | ||
3444 | tmp_dx_leaf = kmalloc(osb->sb->s_blocksize, GFP_NOFS); | ||
3445 | if (!tmp_dx_leaf) { | ||
3446 | ret = -ENOMEM; | ||
3447 | mlog_errno(ret); | ||
3448 | goto out_commit; | ||
3449 | } | ||
3450 | |||
3451 | orig_leaves_start = leaf_blkno & ~(osb->s_clustersize_bits - | ||
3452 | osb->sb->s_blocksize_bits); | ||
3453 | ret = ocfs2_read_dx_leaves(dir, orig_leaves_start, num_dx_leaves, | ||
3454 | orig_dx_leaves); | ||
3455 | if (ret) { | ||
3456 | mlog_errno(ret); | ||
3457 | goto out_commit; | ||
3458 | } | ||
3459 | |||
3460 | for (i = 0; i < num_dx_leaves; i++) { | ||
3461 | ret = ocfs2_journal_access_dl(handle, dir, orig_dx_leaves[i], | ||
3462 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3463 | if (ret) { | ||
3464 | mlog_errno(ret); | ||
3465 | goto out_commit; | ||
3466 | } | ||
3467 | } | ||
3468 | |||
3469 | cpos = split_hash; | ||
3470 | ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, | ||
3471 | data_ac, meta_ac, new_dx_leaves, | ||
3472 | num_dx_leaves); | ||
3473 | if (ret) { | ||
3474 | mlog_errno(ret); | ||
3475 | goto out_commit; | ||
3476 | } | ||
3477 | |||
3478 | ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, | ||
3479 | orig_dx_leaves, new_dx_leaves, num_dx_leaves); | ||
3480 | |||
3481 | out_commit: | ||
3482 | if (ret < 0 && did_quota) | ||
3483 | vfs_dq_free_space_nodirty(dir, | ||
3484 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); | ||
3485 | |||
3486 | ocfs2_commit_trans(osb, handle); | ||
3487 | |||
3488 | out: | ||
3489 | if (orig_dx_leaves || new_dx_leaves) { | ||
3490 | for (i = 0; i < num_dx_leaves; i++) { | ||
3491 | if (orig_dx_leaves) | ||
3492 | brelse(orig_dx_leaves[i]); | ||
3493 | if (new_dx_leaves) | ||
3494 | brelse(new_dx_leaves[i]); | ||
3495 | } | ||
3496 | kfree(orig_dx_leaves); | ||
3497 | kfree(new_dx_leaves); | ||
3498 | } | ||
3499 | |||
3500 | if (meta_ac) | ||
3501 | ocfs2_free_alloc_context(meta_ac); | ||
3502 | if (data_ac) | ||
3503 | ocfs2_free_alloc_context(data_ac); | ||
3504 | |||
3505 | kfree(tmp_dx_leaf); | ||
3506 | return ret; | ||
3507 | } | ||
3508 | |||
3509 | static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir, | ||
3510 | struct buffer_head *di_bh, const char *name, | ||
3511 | int namelen, | ||
3512 | struct ocfs2_dir_lookup_result *lookup) | ||
3513 | { | ||
3514 | int ret, rebalanced = 0; | ||
3515 | struct buffer_head *dx_root_bh = NULL; | ||
3516 | struct ocfs2_dx_root_block *dx_root; | ||
3517 | struct buffer_head *dx_leaf_bh = NULL; | ||
3518 | struct ocfs2_dx_leaf *dx_leaf; | ||
3519 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
3520 | u64 blkno; | ||
3521 | u32 leaf_cpos; | ||
3522 | |||
3523 | ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); | ||
3524 | if (ret) { | ||
3525 | mlog_errno(ret); | ||
3526 | goto out; | ||
3527 | } | ||
3528 | |||
3529 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
3530 | |||
3531 | restart_search: | ||
3532 | ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo, | ||
3533 | &leaf_cpos, &blkno); | ||
3534 | if (ret) { | ||
3535 | mlog_errno(ret); | ||
3536 | goto out; | ||
3537 | } | ||
3538 | |||
3539 | ret = ocfs2_read_dx_leaf(dir, blkno, &dx_leaf_bh); | ||
3540 | if (ret) { | ||
3541 | mlog_errno(ret); | ||
3542 | goto out; | ||
3543 | } | ||
3544 | |||
3545 | dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data; | ||
3546 | |||
3547 | if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >= | ||
3548 | le16_to_cpu(dx_leaf->dl_list.de_count)) { | ||
3549 | if (rebalanced) { | ||
3550 | /* | ||
3551 | * Rebalancing should have provided us with | ||
3552 | * space in an appropriate leaf. | ||
3553 | * | ||
3554 | * XXX: Is this an abnormal condition then? | ||
3555 | * Should we print a message here? | ||
3556 | */ | ||
3557 | ret = -ENOSPC; | ||
3558 | goto out; | ||
3559 | } | ||
3560 | |||
3561 | ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh, | ||
3562 | &lookup->dl_hinfo, leaf_cpos, | ||
3563 | blkno); | ||
3564 | if (ret) { | ||
3565 | if (ret != -ENOSPC) | ||
3566 | mlog_errno(ret); | ||
3567 | goto out; | ||
3568 | } | ||
3569 | |||
3570 | /* | ||
3571 | * Restart the lookup. The rebalance might have | ||
3572 | * changed which block our item fits into. Mark our | ||
3573 | * progress, so we only execute this once. | ||
3574 | */ | ||
3575 | brelse(dx_leaf_bh); | ||
3576 | dx_leaf_bh = NULL; | ||
3577 | rebalanced = 1; | ||
3578 | goto restart_search; | ||
3579 | } | ||
3580 | |||
3581 | lookup->dl_dx_leaf_bh = dx_leaf_bh; | ||
3582 | dx_leaf_bh = NULL; | ||
3583 | |||
3584 | out: | ||
3585 | brelse(dx_leaf_bh); | ||
3586 | brelse(dx_root_bh); | ||
3587 | return ret; | ||
3588 | } | ||
3589 | |||
1978 | /* | 3590 | /* |
1979 | * Get a directory ready for insert. Any directory allocation required | 3591 | * Get a directory ready for insert. Any directory allocation required |
1980 | * happens here. Success returns zero, and enough context in the dir | 3592 | * happens here. Success returns zero, and enough context in the dir |
@@ -2001,6 +3613,34 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
2001 | goto out; | 3613 | goto out; |
2002 | } | 3614 | } |
2003 | 3615 | ||
3616 | /* | ||
3617 | * Do this up front to reduce confusion. | ||
3618 | * | ||
3619 | * The directory might start inline, then be turned into an | ||
3620 | * indexed one, in which case we'd need to hash deep inside | ||
3621 | * ocfs2_find_dir_space_id(). Since | ||
3622 | * ocfs2_prepare_dx_dir_for_insert() also needs this hash | ||
3623 | * done, there seems no point in spreading out the calls. We | ||
3624 | * can optimize away the case where the file system doesn't | ||
3625 | * support indexing. | ||
3626 | */ | ||
3627 | if (ocfs2_supports_indexed_dirs(osb)) | ||
3628 | ocfs2_dx_dir_name_hash(dir, name, namelen, &lookup->dl_hinfo); | ||
3629 | |||
3630 | if (ocfs2_dir_indexed(dir)) { | ||
3631 | ret = ocfs2_find_dir_space_dx(osb, dir, parent_fe_bh, name, | ||
3632 | namelen, lookup); | ||
3633 | if (ret) { | ||
3634 | mlog_errno(ret); | ||
3635 | goto out; | ||
3636 | } | ||
3637 | |||
3638 | /* | ||
3639 | * We intentionally fall through so that the unindexed | ||
3640 | * tree can also be prepared. | ||
3641 | */ | ||
3642 | } | ||
3643 | |||
2004 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 3644 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
2005 | ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name, | 3645 | ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name, |
2006 | namelen, &bh, &blocks_wanted); | 3646 | namelen, &bh, &blocks_wanted); |
@@ -2019,7 +3659,7 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
2019 | BUG_ON(bh); | 3659 | BUG_ON(bh); |
2020 | 3660 | ||
2021 | ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted, | 3661 | ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted, |
2022 | &bh); | 3662 | lookup, &bh); |
2023 | if (ret) { | 3663 | if (ret) { |
2024 | if (ret != -ENOSPC) | 3664 | if (ret != -ENOSPC) |
2025 | mlog_errno(ret); | 3665 | mlog_errno(ret); |
@@ -2035,3 +3675,145 @@ out: | |||
2035 | brelse(bh); | 3675 | brelse(bh); |
2036 | return ret; | 3676 | return ret; |
2037 | } | 3677 | } |
3678 | |||
3679 | static int ocfs2_dx_dir_remove_index(struct inode *dir, | ||
3680 | struct buffer_head *di_bh, | ||
3681 | struct buffer_head *dx_root_bh) | ||
3682 | { | ||
3683 | int ret; | ||
3684 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
3685 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
3686 | struct ocfs2_dx_root_block *dx_root; | ||
3687 | struct inode *dx_alloc_inode = NULL; | ||
3688 | struct buffer_head *dx_alloc_bh = NULL; | ||
3689 | handle_t *handle; | ||
3690 | u64 blk; | ||
3691 | u16 bit; | ||
3692 | u64 bg_blkno; | ||
3693 | |||
3694 | dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data; | ||
3695 | |||
3696 | dx_alloc_inode = ocfs2_get_system_file_inode(osb, | ||
3697 | EXTENT_ALLOC_SYSTEM_INODE, | ||
3698 | le16_to_cpu(dx_root->dr_suballoc_slot)); | ||
3699 | if (!dx_alloc_inode) { | ||
3700 | ret = -ENOMEM; | ||
3701 | mlog_errno(ret); | ||
3702 | goto out; | ||
3703 | } | ||
3704 | mutex_lock(&dx_alloc_inode->i_mutex); | ||
3705 | |||
3706 | ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1); | ||
3707 | if (ret) { | ||
3708 | mlog_errno(ret); | ||
3709 | goto out_mutex; | ||
3710 | } | ||
3711 | |||
3712 | handle = ocfs2_start_trans(osb, OCFS2_DX_ROOT_REMOVE_CREDITS); | ||
3713 | if (IS_ERR(handle)) { | ||
3714 | ret = PTR_ERR(handle); | ||
3715 | mlog_errno(ret); | ||
3716 | goto out_unlock; | ||
3717 | } | ||
3718 | |||
3719 | ret = ocfs2_journal_access_di(handle, dir, di_bh, | ||
3720 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3721 | if (ret) { | ||
3722 | mlog_errno(ret); | ||
3723 | goto out_commit; | ||
3724 | } | ||
3725 | |||
3726 | OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL; | ||
3727 | di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); | ||
3728 | di->i_dx_root = cpu_to_le64(0ULL); | ||
3729 | |||
3730 | ocfs2_journal_dirty(handle, di_bh); | ||
3731 | |||
3732 | blk = le64_to_cpu(dx_root->dr_blkno); | ||
3733 | bit = le16_to_cpu(dx_root->dr_suballoc_bit); | ||
3734 | bg_blkno = ocfs2_which_suballoc_group(blk, bit); | ||
3735 | ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh, | ||
3736 | bit, bg_blkno, 1); | ||
3737 | if (ret) | ||
3738 | mlog_errno(ret); | ||
3739 | |||
3740 | out_commit: | ||
3741 | ocfs2_commit_trans(osb, handle); | ||
3742 | |||
3743 | out_unlock: | ||
3744 | ocfs2_inode_unlock(dx_alloc_inode, 1); | ||
3745 | |||
3746 | out_mutex: | ||
3747 | mutex_unlock(&dx_alloc_inode->i_mutex); | ||
3748 | brelse(dx_alloc_bh); | ||
3749 | out: | ||
3750 | iput(dx_alloc_inode); | ||
3751 | return ret; | ||
3752 | } | ||
3753 | |||
3754 | int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh) | ||
3755 | { | ||
3756 | int ret; | ||
3757 | unsigned int uninitialized_var(clen); | ||
3758 | u32 major_hash = UINT_MAX, p_cpos, uninitialized_var(cpos); | ||
3759 | u64 uninitialized_var(blkno); | ||
3760 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
3761 | struct buffer_head *dx_root_bh = NULL; | ||
3762 | struct ocfs2_dx_root_block *dx_root; | ||
3763 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
3764 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
3765 | struct ocfs2_extent_tree et; | ||
3766 | |||
3767 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
3768 | |||
3769 | if (!ocfs2_dir_indexed(dir)) | ||
3770 | return 0; | ||
3771 | |||
3772 | ret = ocfs2_read_dx_root(dir, di, &dx_root_bh); | ||
3773 | if (ret) { | ||
3774 | mlog_errno(ret); | ||
3775 | goto out; | ||
3776 | } | ||
3777 | |||
3778 | ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); | ||
3779 | |||
3780 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | ||
3781 | |||
3782 | /* XXX: What if dr_clusters is too large? */ | ||
3783 | while (le32_to_cpu(dx_root->dr_clusters)) { | ||
3784 | ret = ocfs2_dx_dir_lookup_rec(dir, &dx_root->dr_list, | ||
3785 | major_hash, &cpos, &blkno, &clen); | ||
3786 | if (ret) { | ||
3787 | mlog_errno(ret); | ||
3788 | goto out; | ||
3789 | } | ||
3790 | |||
3791 | p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); | ||
3792 | |||
3793 | ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, | ||
3794 | &dealloc); | ||
3795 | if (ret) { | ||
3796 | mlog_errno(ret); | ||
3797 | goto out; | ||
3798 | } | ||
3799 | |||
3800 | if (cpos == 0) | ||
3801 | break; | ||
3802 | |||
3803 | major_hash = cpos - 1; | ||
3804 | } | ||
3805 | |||
3806 | ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh); | ||
3807 | if (ret) { | ||
3808 | mlog_errno(ret); | ||
3809 | goto out; | ||
3810 | } | ||
3811 | |||
3812 | ocfs2_remove_from_cache(dir, dx_root_bh); | ||
3813 | out: | ||
3814 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
3815 | ocfs2_run_deallocs(osb, &dealloc); | ||
3816 | |||
3817 | brelse(dx_root_bh); | ||
3818 | return ret; | ||
3819 | } | ||