diff options
author | Tao Ma <tao.ma@oracle.com> | 2008-03-05 03:11:46 -0500 |
---|---|---|
committer | Mark Fasheh <mfasheh@suse.com> | 2008-04-18 11:56:10 -0400 |
commit | 4d0ddb2ce25db2254d468233d942276ecf40bff8 (patch) | |
tree | 6a114da03bb9911de637146f2b24866f1ae96718 | |
parent | a4a4891164d4f6f383cc17e7c90828a7ca6a1146 (diff) |
ocfs2: Add inode stealing for ocfs2_reserve_new_inode
Inode allocation is modified to look in other nodes allocators during
extreme out of space situations. We retry our own slot when space is freed
back to the global bitmap, or whenever we've allocated more than 1024 inodes
from another slot.
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
-rw-r--r-- | fs/ocfs2/alloc.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/localalloc.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/namei.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 34 | ||||
-rw-r--r-- | fs/ocfs2/suballoc.c | 80 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 1 |
6 files changed, 116 insertions, 5 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 7d81aa6f5672..a26882144e8f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -5150,6 +5150,8 @@ static void ocfs2_truncate_log_worker(struct work_struct *work) | |||
5150 | status = ocfs2_flush_truncate_log(osb); | 5150 | status = ocfs2_flush_truncate_log(osb); |
5151 | if (status < 0) | 5151 | if (status < 0) |
5152 | mlog_errno(status); | 5152 | mlog_errno(status); |
5153 | else | ||
5154 | ocfs2_init_inode_steal_slot(osb); | ||
5153 | 5155 | ||
5154 | mlog_exit(status); | 5156 | mlog_exit(status); |
5155 | } | 5157 | } |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index b6d07198118c..ce0dc147602a 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -447,6 +447,8 @@ out_mutex: | |||
447 | iput(main_bm_inode); | 447 | iput(main_bm_inode); |
448 | 448 | ||
449 | out: | 449 | out: |
450 | if (!status) | ||
451 | ocfs2_init_inode_steal_slot(osb); | ||
450 | mlog_exit(status); | 452 | mlog_exit(status); |
451 | return status; | 453 | return status; |
452 | } | 454 | } |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index ae9ad9587516..ab5a2272d0eb 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -424,7 +424,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
424 | fe->i_fs_generation = cpu_to_le32(osb->fs_generation); | 424 | fe->i_fs_generation = cpu_to_le32(osb->fs_generation); |
425 | fe->i_blkno = cpu_to_le64(fe_blkno); | 425 | fe->i_blkno = cpu_to_le64(fe_blkno); |
426 | fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); | 426 | fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); |
427 | fe->i_suballoc_slot = cpu_to_le16(osb->slot_num); | 427 | fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); |
428 | fe->i_uid = cpu_to_le32(current->fsuid); | 428 | fe->i_uid = cpu_to_le32(current->fsuid); |
429 | if (dir->i_mode & S_ISGID) { | 429 | if (dir->i_mode & S_ISGID) { |
430 | fe->i_gid = cpu_to_le32(dir->i_gid); | 430 | fe->i_gid = cpu_to_le32(dir->i_gid); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 9ff5811345a9..31692379c170 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -208,11 +208,14 @@ struct ocfs2_super | |||
208 | u32 s_feature_incompat; | 208 | u32 s_feature_incompat; |
209 | u32 s_feature_ro_compat; | 209 | u32 s_feature_ro_compat; |
210 | 210 | ||
211 | /* Protects s_next_generaion, osb_flags. Could protect more on | 211 | /* Protects s_next_generation, osb_flags and s_inode_steal_slot. |
212 | * osb as it's very short lived. */ | 212 | * Could protect more on osb as it's very short lived. |
213 | */ | ||
213 | spinlock_t osb_lock; | 214 | spinlock_t osb_lock; |
214 | u32 s_next_generation; | 215 | u32 s_next_generation; |
215 | unsigned long osb_flags; | 216 | unsigned long osb_flags; |
217 | s16 s_inode_steal_slot; | ||
218 | atomic_t s_num_inodes_stolen; | ||
216 | 219 | ||
217 | unsigned long s_mount_opt; | 220 | unsigned long s_mount_opt; |
218 | unsigned int s_atime_quantum; | 221 | unsigned int s_atime_quantum; |
@@ -537,6 +540,33 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) | |||
537 | return pages_per_cluster; | 540 | return pages_per_cluster; |
538 | } | 541 | } |
539 | 542 | ||
543 | static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) | ||
544 | { | ||
545 | spin_lock(&osb->osb_lock); | ||
546 | osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; | ||
547 | spin_unlock(&osb->osb_lock); | ||
548 | atomic_set(&osb->s_num_inodes_stolen, 0); | ||
549 | } | ||
550 | |||
551 | static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb, | ||
552 | s16 slot) | ||
553 | { | ||
554 | spin_lock(&osb->osb_lock); | ||
555 | osb->s_inode_steal_slot = slot; | ||
556 | spin_unlock(&osb->osb_lock); | ||
557 | } | ||
558 | |||
559 | static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) | ||
560 | { | ||
561 | s16 slot; | ||
562 | |||
563 | spin_lock(&osb->osb_lock); | ||
564 | slot = osb->s_inode_steal_slot; | ||
565 | spin_unlock(&osb->osb_lock); | ||
566 | |||
567 | return slot; | ||
568 | } | ||
569 | |||
540 | #define ocfs2_set_bit ext2_set_bit | 570 | #define ocfs2_set_bit ext2_set_bit |
541 | #define ocfs2_clear_bit ext2_clear_bit | 571 | #define ocfs2_clear_bit ext2_clear_bit |
542 | #define ocfs2_test_bit ext2_test_bit | 572 | #define ocfs2_test_bit ext2_test_bit |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 33d55734c514..d2d278fb9819 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -49,6 +49,8 @@ | |||
49 | #define NOT_ALLOC_NEW_GROUP 0 | 49 | #define NOT_ALLOC_NEW_GROUP 0 |
50 | #define ALLOC_NEW_GROUP 1 | 50 | #define ALLOC_NEW_GROUP 1 |
51 | 51 | ||
52 | #define OCFS2_MAX_INODES_TO_STEAL 1024 | ||
53 | |||
52 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); | 54 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); |
53 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); | 55 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); |
54 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); | 56 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); |
@@ -109,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, | |||
109 | u64 *bg_blkno, | 111 | u64 *bg_blkno, |
110 | u16 *bg_bit_off); | 112 | u16 *bg_bit_off); |
111 | 113 | ||
112 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | 114 | static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) |
113 | { | 115 | { |
114 | struct inode *inode = ac->ac_inode; | 116 | struct inode *inode = ac->ac_inode; |
115 | 117 | ||
@@ -120,9 +122,17 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | |||
120 | mutex_unlock(&inode->i_mutex); | 122 | mutex_unlock(&inode->i_mutex); |
121 | 123 | ||
122 | iput(inode); | 124 | iput(inode); |
125 | ac->ac_inode = NULL; | ||
123 | } | 126 | } |
124 | if (ac->ac_bh) | 127 | if (ac->ac_bh) { |
125 | brelse(ac->ac_bh); | 128 | brelse(ac->ac_bh); |
129 | ac->ac_bh = NULL; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | ||
134 | { | ||
135 | ocfs2_free_ac_resource(ac); | ||
126 | kfree(ac); | 136 | kfree(ac); |
127 | } | 137 | } |
128 | 138 | ||
@@ -522,10 +532,42 @@ bail: | |||
522 | return status; | 532 | return status; |
523 | } | 533 | } |
524 | 534 | ||
535 | static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, | ||
536 | struct ocfs2_alloc_context *ac) | ||
537 | { | ||
538 | int i, status = -ENOSPC; | ||
539 | s16 slot = ocfs2_get_inode_steal_slot(osb); | ||
540 | |||
541 | /* Start to steal inodes from the first slot after ours. */ | ||
542 | if (slot == OCFS2_INVALID_SLOT) | ||
543 | slot = osb->slot_num + 1; | ||
544 | |||
545 | for (i = 0; i < osb->max_slots; i++, slot++) { | ||
546 | if (slot == osb->max_slots) | ||
547 | slot = 0; | ||
548 | |||
549 | if (slot == osb->slot_num) | ||
550 | continue; | ||
551 | |||
552 | status = ocfs2_reserve_suballoc_bits(osb, ac, | ||
553 | INODE_ALLOC_SYSTEM_INODE, | ||
554 | slot, NOT_ALLOC_NEW_GROUP); | ||
555 | if (status >= 0) { | ||
556 | ocfs2_set_inode_steal_slot(osb, slot); | ||
557 | break; | ||
558 | } | ||
559 | |||
560 | ocfs2_free_ac_resource(ac); | ||
561 | } | ||
562 | |||
563 | return status; | ||
564 | } | ||
565 | |||
525 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | 566 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, |
526 | struct ocfs2_alloc_context **ac) | 567 | struct ocfs2_alloc_context **ac) |
527 | { | 568 | { |
528 | int status; | 569 | int status; |
570 | s16 slot = ocfs2_get_inode_steal_slot(osb); | ||
529 | 571 | ||
530 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); | 572 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); |
531 | if (!(*ac)) { | 573 | if (!(*ac)) { |
@@ -539,9 +581,43 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
539 | 581 | ||
540 | (*ac)->ac_group_search = ocfs2_block_group_search; | 582 | (*ac)->ac_group_search = ocfs2_block_group_search; |
541 | 583 | ||
584 | /* | ||
585 | * slot is set when we successfully steal inode from other nodes. | ||
586 | * It is reset in 3 places: | ||
587 | * 1. when we flush the truncate log | ||
588 | * 2. when we complete local alloc recovery. | ||
589 | * 3. when we successfully allocate from our own slot. | ||
590 | * After it is set, we will go on stealing inodes until we find the | ||
591 | * need to check our slots to see whether there is some space for us. | ||
592 | */ | ||
593 | if (slot != OCFS2_INVALID_SLOT && | ||
594 | atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL) | ||
595 | goto inode_steal; | ||
596 | |||
597 | atomic_set(&osb->s_num_inodes_stolen, 0); | ||
542 | status = ocfs2_reserve_suballoc_bits(osb, *ac, | 598 | status = ocfs2_reserve_suballoc_bits(osb, *ac, |
543 | INODE_ALLOC_SYSTEM_INODE, | 599 | INODE_ALLOC_SYSTEM_INODE, |
544 | osb->slot_num, ALLOC_NEW_GROUP); | 600 | osb->slot_num, ALLOC_NEW_GROUP); |
601 | if (status >= 0) { | ||
602 | status = 0; | ||
603 | |||
604 | /* | ||
605 | * Some inodes must be freed by us, so try to allocate | ||
606 | * from our own next time. | ||
607 | */ | ||
608 | if (slot != OCFS2_INVALID_SLOT) | ||
609 | ocfs2_init_inode_steal_slot(osb); | ||
610 | goto bail; | ||
611 | } else if (status < 0 && status != -ENOSPC) { | ||
612 | mlog_errno(status); | ||
613 | goto bail; | ||
614 | } | ||
615 | |||
616 | ocfs2_free_ac_resource(*ac); | ||
617 | |||
618 | inode_steal: | ||
619 | status = ocfs2_steal_inode_from_other_nodes(osb, *ac); | ||
620 | atomic_inc(&osb->s_num_inodes_stolen); | ||
545 | if (status < 0) { | 621 | if (status < 0) { |
546 | if (status != -ENOSPC) | 622 | if (status != -ENOSPC) |
547 | mlog_errno(status); | 623 | mlog_errno(status); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 96ebe36d5d77..df63ba20ae90 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1394,6 +1394,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1394 | INIT_LIST_HEAD(&osb->blocked_lock_list); | 1394 | INIT_LIST_HEAD(&osb->blocked_lock_list); |
1395 | osb->blocked_lock_count = 0; | 1395 | osb->blocked_lock_count = 0; |
1396 | spin_lock_init(&osb->osb_lock); | 1396 | spin_lock_init(&osb->osb_lock); |
1397 | ocfs2_init_inode_steal_slot(osb); | ||
1397 | 1398 | ||
1398 | atomic_set(&osb->alloc_stats.moves, 0); | 1399 | atomic_set(&osb->alloc_stats.moves, 0); |
1399 | atomic_set(&osb->alloc_stats.local_data, 0); | 1400 | atomic_set(&osb->alloc_stats.local_data, 0); |