aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTao Ma <tao.ma@oracle.com>2008-03-05 03:11:46 -0500
committerMark Fasheh <mfasheh@suse.com>2008-04-18 11:56:10 -0400
commit4d0ddb2ce25db2254d468233d942276ecf40bff8 (patch)
tree6a114da03bb9911de637146f2b24866f1ae96718
parenta4a4891164d4f6f383cc17e7c90828a7ca6a1146 (diff)
ocfs2: Add inode stealing for ocfs2_reserve_new_inode
Inode allocation is modified to look in other nodes allocators during extreme out of space situations. We retry our own slot when space is freed back to the global bitmap, or whenever we've allocated more than 1024 inodes from another slot. Signed-off-by: Tao Ma <tao.ma@oracle.com> Signed-off-by: Mark Fasheh <mfasheh@suse.com>
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/localalloc.c2
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/ocfs2.h34
-rw-r--r--fs/ocfs2/suballoc.c80
-rw-r--r--fs/ocfs2/super.c1
6 files changed, 116 insertions, 5 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7d81aa6f5672..a26882144e8f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5150,6 +5150,8 @@ static void ocfs2_truncate_log_worker(struct work_struct *work)
5150 status = ocfs2_flush_truncate_log(osb); 5150 status = ocfs2_flush_truncate_log(osb);
5151 if (status < 0) 5151 if (status < 0)
5152 mlog_errno(status); 5152 mlog_errno(status);
5153 else
5154 ocfs2_init_inode_steal_slot(osb);
5153 5155
5154 mlog_exit(status); 5156 mlog_exit(status);
5155} 5157}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index b6d07198118c..ce0dc147602a 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -447,6 +447,8 @@ out_mutex:
447 iput(main_bm_inode); 447 iput(main_bm_inode);
448 448
449out: 449out:
450 if (!status)
451 ocfs2_init_inode_steal_slot(osb);
450 mlog_exit(status); 452 mlog_exit(status);
451 return status; 453 return status;
452} 454}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ae9ad9587516..ab5a2272d0eb 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -424,7 +424,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
424 fe->i_fs_generation = cpu_to_le32(osb->fs_generation); 424 fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
425 fe->i_blkno = cpu_to_le64(fe_blkno); 425 fe->i_blkno = cpu_to_le64(fe_blkno);
426 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); 426 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
427 fe->i_suballoc_slot = cpu_to_le16(osb->slot_num); 427 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
428 fe->i_uid = cpu_to_le32(current->fsuid); 428 fe->i_uid = cpu_to_le32(current->fsuid);
429 if (dir->i_mode & S_ISGID) { 429 if (dir->i_mode & S_ISGID) {
430 fe->i_gid = cpu_to_le32(dir->i_gid); 430 fe->i_gid = cpu_to_le32(dir->i_gid);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 9ff5811345a9..31692379c170 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -208,11 +208,14 @@ struct ocfs2_super
208 u32 s_feature_incompat; 208 u32 s_feature_incompat;
209 u32 s_feature_ro_compat; 209 u32 s_feature_ro_compat;
210 210
211 /* Protects s_next_generaion, osb_flags. Could protect more on 211 /* Protects s_next_generation, osb_flags and s_inode_steal_slot.
212 * osb as it's very short lived. */ 212 * Could protect more on osb as it's very short lived.
213 */
213 spinlock_t osb_lock; 214 spinlock_t osb_lock;
214 u32 s_next_generation; 215 u32 s_next_generation;
215 unsigned long osb_flags; 216 unsigned long osb_flags;
217 s16 s_inode_steal_slot;
218 atomic_t s_num_inodes_stolen;
216 219
217 unsigned long s_mount_opt; 220 unsigned long s_mount_opt;
218 unsigned int s_atime_quantum; 221 unsigned int s_atime_quantum;
@@ -537,6 +540,33 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
537 return pages_per_cluster; 540 return pages_per_cluster;
538} 541}
539 542
543static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
544{
545 spin_lock(&osb->osb_lock);
546 osb->s_inode_steal_slot = OCFS2_INVALID_SLOT;
547 spin_unlock(&osb->osb_lock);
548 atomic_set(&osb->s_num_inodes_stolen, 0);
549}
550
551static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb,
552 s16 slot)
553{
554 spin_lock(&osb->osb_lock);
555 osb->s_inode_steal_slot = slot;
556 spin_unlock(&osb->osb_lock);
557}
558
559static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
560{
561 s16 slot;
562
563 spin_lock(&osb->osb_lock);
564 slot = osb->s_inode_steal_slot;
565 spin_unlock(&osb->osb_lock);
566
567 return slot;
568}
569
540#define ocfs2_set_bit ext2_set_bit 570#define ocfs2_set_bit ext2_set_bit
541#define ocfs2_clear_bit ext2_clear_bit 571#define ocfs2_clear_bit ext2_clear_bit
542#define ocfs2_test_bit ext2_test_bit 572#define ocfs2_test_bit ext2_test_bit
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 33d55734c514..d2d278fb9819 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -49,6 +49,8 @@
49#define NOT_ALLOC_NEW_GROUP 0 49#define NOT_ALLOC_NEW_GROUP 0
50#define ALLOC_NEW_GROUP 1 50#define ALLOC_NEW_GROUP 1
51 51
52#define OCFS2_MAX_INODES_TO_STEAL 1024
53
52static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 54static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
53static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); 55static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
54static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); 56static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
@@ -109,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
109 u64 *bg_blkno, 111 u64 *bg_blkno,
110 u16 *bg_bit_off); 112 u16 *bg_bit_off);
111 113
112void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 114static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
113{ 115{
114 struct inode *inode = ac->ac_inode; 116 struct inode *inode = ac->ac_inode;
115 117
@@ -120,9 +122,17 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
120 mutex_unlock(&inode->i_mutex); 122 mutex_unlock(&inode->i_mutex);
121 123
122 iput(inode); 124 iput(inode);
125 ac->ac_inode = NULL;
123 } 126 }
124 if (ac->ac_bh) 127 if (ac->ac_bh) {
125 brelse(ac->ac_bh); 128 brelse(ac->ac_bh);
129 ac->ac_bh = NULL;
130 }
131}
132
133void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
134{
135 ocfs2_free_ac_resource(ac);
126 kfree(ac); 136 kfree(ac);
127} 137}
128 138
@@ -522,10 +532,42 @@ bail:
522 return status; 532 return status;
523} 533}
524 534
535static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
536 struct ocfs2_alloc_context *ac)
537{
538 int i, status = -ENOSPC;
539 s16 slot = ocfs2_get_inode_steal_slot(osb);
540
541 /* Start to steal inodes from the first slot after ours. */
542 if (slot == OCFS2_INVALID_SLOT)
543 slot = osb->slot_num + 1;
544
545 for (i = 0; i < osb->max_slots; i++, slot++) {
546 if (slot == osb->max_slots)
547 slot = 0;
548
549 if (slot == osb->slot_num)
550 continue;
551
552 status = ocfs2_reserve_suballoc_bits(osb, ac,
553 INODE_ALLOC_SYSTEM_INODE,
554 slot, NOT_ALLOC_NEW_GROUP);
555 if (status >= 0) {
556 ocfs2_set_inode_steal_slot(osb, slot);
557 break;
558 }
559
560 ocfs2_free_ac_resource(ac);
561 }
562
563 return status;
564}
565
525int ocfs2_reserve_new_inode(struct ocfs2_super *osb, 566int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
526 struct ocfs2_alloc_context **ac) 567 struct ocfs2_alloc_context **ac)
527{ 568{
528 int status; 569 int status;
570 s16 slot = ocfs2_get_inode_steal_slot(osb);
529 571
530 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 572 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
531 if (!(*ac)) { 573 if (!(*ac)) {
@@ -539,9 +581,43 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
539 581
540 (*ac)->ac_group_search = ocfs2_block_group_search; 582 (*ac)->ac_group_search = ocfs2_block_group_search;
541 583
584 /*
585 * slot is set when we successfully steal inode from other nodes.
586 * It is reset in 3 places:
587 * 1. when we flush the truncate log
588 * 2. when we complete local alloc recovery.
589 * 3. when we successfully allocate from our own slot.
590 * After it is set, we will go on stealing inodes until we find the
591 * need to check our slots to see whether there is some space for us.
592 */
593 if (slot != OCFS2_INVALID_SLOT &&
594 atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL)
595 goto inode_steal;
596
597 atomic_set(&osb->s_num_inodes_stolen, 0);
542 status = ocfs2_reserve_suballoc_bits(osb, *ac, 598 status = ocfs2_reserve_suballoc_bits(osb, *ac,
543 INODE_ALLOC_SYSTEM_INODE, 599 INODE_ALLOC_SYSTEM_INODE,
544 osb->slot_num, ALLOC_NEW_GROUP); 600 osb->slot_num, ALLOC_NEW_GROUP);
601 if (status >= 0) {
602 status = 0;
603
604 /*
605 * Some inodes must be freed by us, so try to allocate
606 * from our own next time.
607 */
608 if (slot != OCFS2_INVALID_SLOT)
609 ocfs2_init_inode_steal_slot(osb);
610 goto bail;
611 } else if (status < 0 && status != -ENOSPC) {
612 mlog_errno(status);
613 goto bail;
614 }
615
616 ocfs2_free_ac_resource(*ac);
617
618inode_steal:
619 status = ocfs2_steal_inode_from_other_nodes(osb, *ac);
620 atomic_inc(&osb->s_num_inodes_stolen);
545 if (status < 0) { 621 if (status < 0) {
546 if (status != -ENOSPC) 622 if (status != -ENOSPC)
547 mlog_errno(status); 623 mlog_errno(status);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 96ebe36d5d77..df63ba20ae90 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1394,6 +1394,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1394 INIT_LIST_HEAD(&osb->blocked_lock_list); 1394 INIT_LIST_HEAD(&osb->blocked_lock_list);
1395 osb->blocked_lock_count = 0; 1395 osb->blocked_lock_count = 0;
1396 spin_lock_init(&osb->osb_lock); 1396 spin_lock_init(&osb->osb_lock);
1397 ocfs2_init_inode_steal_slot(osb);
1397 1398
1398 atomic_set(&osb->alloc_stats.moves, 0); 1399 atomic_set(&osb->alloc_stats.moves, 0);
1399 atomic_set(&osb->alloc_stats.local_data, 0); 1400 atomic_set(&osb->alloc_stats.local_data, 0);