aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/suballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/suballoc.c')
-rw-r--r--fs/ocfs2/suballoc.c254
1 files changed, 241 insertions, 13 deletions
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index a69628603e18..b4ca5911caaf 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -48,7 +48,8 @@
48#include "buffer_head_io.h" 48#include "buffer_head_io.h"
49 49
50#define NOT_ALLOC_NEW_GROUP 0 50#define NOT_ALLOC_NEW_GROUP 0
51#define ALLOC_NEW_GROUP 1 51#define ALLOC_NEW_GROUP 0x1
52#define ALLOC_GROUPS_FROM_GLOBAL 0x2
52 53
53#define OCFS2_MAX_INODES_TO_STEAL 1024 54#define OCFS2_MAX_INODES_TO_STEAL 1024
54 55
@@ -64,7 +65,9 @@ static int ocfs2_block_group_fill(handle_t *handle,
64static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 65static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
65 struct inode *alloc_inode, 66 struct inode *alloc_inode,
66 struct buffer_head *bh, 67 struct buffer_head *bh,
67 u64 max_block); 68 u64 max_block,
69 u64 *last_alloc_group,
70 int flags);
68 71
69static int ocfs2_cluster_group_search(struct inode *inode, 72static int ocfs2_cluster_group_search(struct inode *inode,
70 struct buffer_head *group_bh, 73 struct buffer_head *group_bh,
@@ -116,6 +119,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
116 u16 *bg_bit_off); 119 u16 *bg_bit_off);
117static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, 120static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
118 u32 bits_wanted, u64 max_block, 121 u32 bits_wanted, u64 max_block,
122 int flags,
119 struct ocfs2_alloc_context **ac); 123 struct ocfs2_alloc_context **ac);
120 124
121void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) 125void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
@@ -403,7 +407,9 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
403static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 407static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
404 struct inode *alloc_inode, 408 struct inode *alloc_inode,
405 struct buffer_head *bh, 409 struct buffer_head *bh,
406 u64 max_block) 410 u64 max_block,
411 u64 *last_alloc_group,
412 int flags)
407{ 413{
408 int status, credits; 414 int status, credits;
409 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; 415 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
@@ -423,7 +429,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
423 cl = &fe->id2.i_chain; 429 cl = &fe->id2.i_chain;
424 status = ocfs2_reserve_clusters_with_limit(osb, 430 status = ocfs2_reserve_clusters_with_limit(osb,
425 le16_to_cpu(cl->cl_cpg), 431 le16_to_cpu(cl->cl_cpg),
426 max_block, &ac); 432 max_block, flags, &ac);
427 if (status < 0) { 433 if (status < 0) {
428 if (status != -ENOSPC) 434 if (status != -ENOSPC)
429 mlog_errno(status); 435 mlog_errno(status);
@@ -440,6 +446,11 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
440 goto bail; 446 goto bail;
441 } 447 }
442 448
449 if (last_alloc_group && *last_alloc_group != 0) {
450 mlog(0, "use old allocation group %llu for block group alloc\n",
451 (unsigned long long)*last_alloc_group);
452 ac->ac_last_group = *last_alloc_group;
453 }
443 status = ocfs2_claim_clusters(osb, 454 status = ocfs2_claim_clusters(osb,
444 handle, 455 handle,
445 ac, 456 ac,
@@ -514,6 +525,11 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
514 alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); 525 alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
515 526
516 status = 0; 527 status = 0;
528
529 /* save the new last alloc group so that the caller can cache it. */
530 if (last_alloc_group)
531 *last_alloc_group = ac->ac_last_group;
532
517bail: 533bail:
518 if (handle) 534 if (handle)
519 ocfs2_commit_trans(osb, handle); 535 ocfs2_commit_trans(osb, handle);
@@ -531,7 +547,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
531 struct ocfs2_alloc_context *ac, 547 struct ocfs2_alloc_context *ac,
532 int type, 548 int type,
533 u32 slot, 549 u32 slot,
534 int alloc_new_group) 550 u64 *last_alloc_group,
551 int flags)
535{ 552{
536 int status; 553 int status;
537 u32 bits_wanted = ac->ac_bits_wanted; 554 u32 bits_wanted = ac->ac_bits_wanted;
@@ -587,7 +604,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
587 goto bail; 604 goto bail;
588 } 605 }
589 606
590 if (alloc_new_group != ALLOC_NEW_GROUP) { 607 if (!(flags & ALLOC_NEW_GROUP)) {
591 mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, " 608 mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
592 "and we don't alloc a new group for it.\n", 609 "and we don't alloc a new group for it.\n",
593 slot, bits_wanted, free_bits); 610 slot, bits_wanted, free_bits);
@@ -596,7 +613,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
596 } 613 }
597 614
598 status = ocfs2_block_group_alloc(osb, alloc_inode, bh, 615 status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
599 ac->ac_max_block); 616 ac->ac_max_block,
617 last_alloc_group, flags);
600 if (status < 0) { 618 if (status < 0) {
601 if (status != -ENOSPC) 619 if (status != -ENOSPC)
602 mlog_errno(status); 620 mlog_errno(status);
@@ -640,7 +658,7 @@ int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
640 658
641 status = ocfs2_reserve_suballoc_bits(osb, (*ac), 659 status = ocfs2_reserve_suballoc_bits(osb, (*ac),
642 EXTENT_ALLOC_SYSTEM_INODE, 660 EXTENT_ALLOC_SYSTEM_INODE,
643 slot, ALLOC_NEW_GROUP); 661 slot, NULL, ALLOC_NEW_GROUP);
644 if (status < 0) { 662 if (status < 0) {
645 if (status != -ENOSPC) 663 if (status != -ENOSPC)
646 mlog_errno(status); 664 mlog_errno(status);
@@ -686,7 +704,8 @@ static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
686 704
687 status = ocfs2_reserve_suballoc_bits(osb, ac, 705 status = ocfs2_reserve_suballoc_bits(osb, ac,
688 INODE_ALLOC_SYSTEM_INODE, 706 INODE_ALLOC_SYSTEM_INODE,
689 slot, NOT_ALLOC_NEW_GROUP); 707 slot, NULL,
708 NOT_ALLOC_NEW_GROUP);
690 if (status >= 0) { 709 if (status >= 0) {
691 ocfs2_set_inode_steal_slot(osb, slot); 710 ocfs2_set_inode_steal_slot(osb, slot);
692 break; 711 break;
@@ -703,6 +722,7 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
703{ 722{
704 int status; 723 int status;
705 s16 slot = ocfs2_get_inode_steal_slot(osb); 724 s16 slot = ocfs2_get_inode_steal_slot(osb);
725 u64 alloc_group;
706 726
707 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 727 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
708 if (!(*ac)) { 728 if (!(*ac)) {
@@ -738,12 +758,22 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
738 goto inode_steal; 758 goto inode_steal;
739 759
740 atomic_set(&osb->s_num_inodes_stolen, 0); 760 atomic_set(&osb->s_num_inodes_stolen, 0);
761 alloc_group = osb->osb_inode_alloc_group;
741 status = ocfs2_reserve_suballoc_bits(osb, *ac, 762 status = ocfs2_reserve_suballoc_bits(osb, *ac,
742 INODE_ALLOC_SYSTEM_INODE, 763 INODE_ALLOC_SYSTEM_INODE,
743 osb->slot_num, ALLOC_NEW_GROUP); 764 osb->slot_num,
765 &alloc_group,
766 ALLOC_NEW_GROUP |
767 ALLOC_GROUPS_FROM_GLOBAL);
744 if (status >= 0) { 768 if (status >= 0) {
745 status = 0; 769 status = 0;
746 770
771 spin_lock(&osb->osb_lock);
772 osb->osb_inode_alloc_group = alloc_group;
773 spin_unlock(&osb->osb_lock);
774 mlog(0, "after reservation, new allocation group is "
775 "%llu\n", (unsigned long long)alloc_group);
776
747 /* 777 /*
748 * Some inodes must be freed by us, so try to allocate 778 * Some inodes must be freed by us, so try to allocate
749 * from our own next time. 779 * from our own next time.
@@ -790,7 +820,7 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
790 820
791 status = ocfs2_reserve_suballoc_bits(osb, ac, 821 status = ocfs2_reserve_suballoc_bits(osb, ac,
792 GLOBAL_BITMAP_SYSTEM_INODE, 822 GLOBAL_BITMAP_SYSTEM_INODE,
793 OCFS2_INVALID_SLOT, 823 OCFS2_INVALID_SLOT, NULL,
794 ALLOC_NEW_GROUP); 824 ALLOC_NEW_GROUP);
795 if (status < 0 && status != -ENOSPC) { 825 if (status < 0 && status != -ENOSPC) {
796 mlog_errno(status); 826 mlog_errno(status);
@@ -806,6 +836,7 @@ bail:
806 * things a bit. */ 836 * things a bit. */
807static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, 837static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
808 u32 bits_wanted, u64 max_block, 838 u32 bits_wanted, u64 max_block,
839 int flags,
809 struct ocfs2_alloc_context **ac) 840 struct ocfs2_alloc_context **ac)
810{ 841{
811 int status; 842 int status;
@@ -823,7 +854,8 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
823 (*ac)->ac_max_block = max_block; 854 (*ac)->ac_max_block = max_block;
824 855
825 status = -ENOSPC; 856 status = -ENOSPC;
826 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { 857 if (!(flags & ALLOC_GROUPS_FROM_GLOBAL) &&
858 ocfs2_alloc_should_use_local(osb, bits_wanted)) {
827 status = ocfs2_reserve_local_alloc_bits(osb, 859 status = ocfs2_reserve_local_alloc_bits(osb,
828 bits_wanted, 860 bits_wanted,
829 *ac); 861 *ac);
@@ -861,7 +893,8 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
861 u32 bits_wanted, 893 u32 bits_wanted,
862 struct ocfs2_alloc_context **ac) 894 struct ocfs2_alloc_context **ac)
863{ 895{
864 return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac); 896 return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0,
897 ALLOC_NEW_GROUP, ac);
865} 898}
866 899
867/* 900/*
@@ -1618,8 +1651,41 @@ bail:
1618 return status; 1651 return status;
1619} 1652}
1620 1653
1654static void ocfs2_init_inode_ac_group(struct inode *dir,
1655 struct buffer_head *parent_fe_bh,
1656 struct ocfs2_alloc_context *ac)
1657{
1658 struct ocfs2_dinode *fe = (struct ocfs2_dinode *)parent_fe_bh->b_data;
1659 /*
1660 * Try to allocate inodes from some specific group.
1661 *
1662 * If the parent dir has recorded the last group used in allocation,
1663 * cool, use it. Otherwise if we try to allocate new inode from the
1664 * same slot the parent dir belongs to, use the same chunk.
1665 *
1666 * We are very careful here to avoid the mistake of setting
1667 * ac_last_group to a group descriptor from a different (unlocked) slot.
1668 */
1669 if (OCFS2_I(dir)->ip_last_used_group &&
1670 OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot)
1671 ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group;
1672 else if (le16_to_cpu(fe->i_suballoc_slot) == ac->ac_alloc_slot)
1673 ac->ac_last_group = ocfs2_which_suballoc_group(
1674 le64_to_cpu(fe->i_blkno),
1675 le16_to_cpu(fe->i_suballoc_bit));
1676}
1677
1678static inline void ocfs2_save_inode_ac_group(struct inode *dir,
1679 struct ocfs2_alloc_context *ac)
1680{
1681 OCFS2_I(dir)->ip_last_used_group = ac->ac_last_group;
1682 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
1683}
1684
1621int ocfs2_claim_new_inode(struct ocfs2_super *osb, 1685int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1622 handle_t *handle, 1686 handle_t *handle,
1687 struct inode *dir,
1688 struct buffer_head *parent_fe_bh,
1623 struct ocfs2_alloc_context *ac, 1689 struct ocfs2_alloc_context *ac,
1624 u16 *suballoc_bit, 1690 u16 *suballoc_bit,
1625 u64 *fe_blkno) 1691 u64 *fe_blkno)
@@ -1635,6 +1701,8 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1635 BUG_ON(ac->ac_bits_wanted != 1); 1701 BUG_ON(ac->ac_bits_wanted != 1);
1636 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); 1702 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
1637 1703
1704 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
1705
1638 status = ocfs2_claim_suballoc_bits(osb, 1706 status = ocfs2_claim_suballoc_bits(osb,
1639 ac, 1707 ac,
1640 handle, 1708 handle,
@@ -1653,6 +1721,7 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1653 1721
1654 *fe_blkno = bg_blkno + (u64) (*suballoc_bit); 1722 *fe_blkno = bg_blkno + (u64) (*suballoc_bit);
1655 ac->ac_bits_given++; 1723 ac->ac_bits_given++;
1724 ocfs2_save_inode_ac_group(dir, ac);
1656 status = 0; 1725 status = 0;
1657bail: 1726bail:
1658 mlog_exit(status); 1727 mlog_exit(status);
@@ -2116,3 +2185,162 @@ out:
2116 2185
2117 return ret; 2186 return ret;
2118} 2187}
2188
2189/*
2190 * Read the inode specified by blkno to get suballoc_slot and
2191 * suballoc_bit.
2192 */
2193static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2194 u16 *suballoc_slot, u16 *suballoc_bit)
2195{
2196 int status;
2197 struct buffer_head *inode_bh = NULL;
2198 struct ocfs2_dinode *inode_fe;
2199
2200 mlog_entry("blkno: %llu\n", blkno);
2201
2202 /* dirty read disk */
2203 status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
2204 if (status < 0) {
2205 mlog(ML_ERROR, "read block %llu failed %d\n", blkno, status);
2206 goto bail;
2207 }
2208
2209 inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
2210 if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
2211 mlog(ML_ERROR, "invalid inode %llu requested\n", blkno);
2212 status = -EINVAL;
2213 goto bail;
2214 }
2215
2216 if (le16_to_cpu(inode_fe->i_suballoc_slot) != OCFS2_INVALID_SLOT &&
2217 (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
2218 mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
2219 blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
2220 status = -EINVAL;
2221 goto bail;
2222 }
2223
2224 if (suballoc_slot)
2225 *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
2226 if (suballoc_bit)
2227 *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
2228
2229bail:
2230 brelse(inode_bh);
2231
2232 mlog_exit(status);
2233 return status;
2234}
2235
2236/*
2237 * test whether bit is SET in allocator bitmap or not. on success, 0
2238 * is returned and *res is 1 for SET; 0 otherwise. when fails, errno
2239 * is returned and *res is meaningless. Call this after you have
2240 * cluster locked against suballoc, or you may get a result based on
2241 * non-up2date contents
2242 */
2243static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2244 struct inode *suballoc,
2245 struct buffer_head *alloc_bh, u64 blkno,
2246 u16 bit, int *res)
2247{
2248 struct ocfs2_dinode *alloc_fe;
2249 struct ocfs2_group_desc *group;
2250 struct buffer_head *group_bh = NULL;
2251 u64 bg_blkno;
2252 int status;
2253
2254 mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit);
2255
2256 alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data;
2257 if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) {
2258 mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
2259 (unsigned int)bit,
2260 ocfs2_bits_per_group(&alloc_fe->id2.i_chain));
2261 status = -EINVAL;
2262 goto bail;
2263 }
2264
2265 bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
2266 status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno,
2267 &group_bh);
2268 if (status < 0) {
2269 mlog(ML_ERROR, "read group %llu failed %d\n", bg_blkno, status);
2270 goto bail;
2271 }
2272
2273 group = (struct ocfs2_group_desc *) group_bh->b_data;
2274 *res = ocfs2_test_bit(bit, (unsigned long *)group->bg_bitmap);
2275
2276bail:
2277 brelse(group_bh);
2278
2279 mlog_exit(status);
2280 return status;
2281}
2282
2283/*
2284 * Test if the bit representing this inode (blkno) is set in the
2285 * suballocator.
2286 *
2287 * On success, 0 is returned and *res is 1 for SET; 0 otherwise.
2288 *
2289 * In the event of failure, a negative value is returned and *res is
2290 * meaningless.
2291 *
2292 * Callers must make sure to hold nfs_sync_lock to prevent
2293 * ocfs2_delete_inode() on another node from accessing the same
2294 * suballocator concurrently.
2295 */
2296int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2297{
2298 int status;
2299 u16 suballoc_bit = 0, suballoc_slot = 0;
2300 struct inode *inode_alloc_inode;
2301 struct buffer_head *alloc_bh = NULL;
2302
2303 mlog_entry("blkno: %llu", blkno);
2304
2305 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
2306 &suballoc_bit);
2307 if (status < 0) {
2308 mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
2309 goto bail;
2310 }
2311
2312 inode_alloc_inode =
2313 ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
2314 suballoc_slot);
2315 if (!inode_alloc_inode) {
2316 /* the error code could be inaccurate, but we are not able to
2317 * get the correct one. */
2318 status = -EINVAL;
2319 mlog(ML_ERROR, "unable to get alloc inode in slot %u\n",
2320 (u32)suballoc_slot);
2321 goto bail;
2322 }
2323
2324 mutex_lock(&inode_alloc_inode->i_mutex);
2325 status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
2326 if (status < 0) {
2327 mutex_unlock(&inode_alloc_inode->i_mutex);
2328 mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
2329 (u32)suballoc_slot, status);
2330 goto bail;
2331 }
2332
2333 status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
2334 blkno, suballoc_bit, res);
2335 if (status < 0)
2336 mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
2337
2338 ocfs2_inode_unlock(inode_alloc_inode, 0);
2339 mutex_unlock(&inode_alloc_inode->i_mutex);
2340
2341 iput(inode_alloc_inode);
2342 brelse(alloc_bh);
2343bail:
2344 mlog_exit(status);
2345 return status;
2346}