aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTao Ma <tao.ma@oracle.com>2009-02-24 11:53:23 -0500
committerMark Fasheh <mfasheh@suse.com>2009-04-03 14:39:17 -0400
commit138211515c102807a16c02fdc15feef1f6ef8124 (patch)
tree9b6fff8512a19792f2e29458292607f4efb413c4
parent1d46dc08d33138c29c63d717807c08ab704fc773 (diff)
ocfs2: Optimize inode allocation by remembering last group
In ocfs2, the inode block search looks for the "emptiest" inode group to allocate from. So if an inode alloc file has many equally (or almost equally) empty groups, new inodes will tend to get spread out amongst them, which in turn can put them all over the disk. This is undesirable because directory operations on conceptually "nearby" inodes force a large number of seeks. So we add ip_last_used_group in core directory inodes which records the last used allocation group. Another field named ip_last_used_slot is also added in case inode stealing happens. When claiming new inode, we passed in directory's inode so that the allocation can use this information. For more details, please see http://oss.oracle.com/osswiki/OCFS2/DesignDocs/InodeAllocationStrategy. Signed-off-by: Tao Ma <tao.ma@oracle.com> Signed-off-by: Mark Fasheh <mfasheh@suse.com>
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/ocfs2/inode.h4
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/suballoc.c36
-rw-r--r--fs/ocfs2/suballoc.h2
5 files changed, 46 insertions, 2 deletions
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index f1f77b2f5947..4a88bce35079 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -352,6 +352,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
352 352
353 ocfs2_set_inode_flags(inode); 353 ocfs2_set_inode_flags(inode);
354 354
355 OCFS2_I(inode)->ip_last_used_slot = 0;
356 OCFS2_I(inode)->ip_last_used_group = 0;
355 mlog_exit_void(); 357 mlog_exit_void();
356} 358}
357 359
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index eb3c302b38d3..e1978acbf65e 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -72,6 +72,10 @@ struct ocfs2_inode_info
72 72
73 struct inode vfs_inode; 73 struct inode vfs_inode;
74 struct jbd2_inode ip_jinode; 74 struct jbd2_inode ip_jinode;
75
76 /* Only valid if the inode is the dir. */
77 u32 ip_last_used_slot;
78 u64 ip_last_used_group;
75}; 79};
76 80
77/* 81/*
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 58c318d2f061..2220f93f668b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -485,8 +485,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
485 485
486 *new_fe_bh = NULL; 486 *new_fe_bh = NULL;
487 487
488 status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit, 488 status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh,
489 &fe_blkno); 489 inode_ac, &suballoc_bit, &fe_blkno);
490 if (status < 0) { 490 if (status < 0) {
491 mlog_errno(status); 491 mlog_errno(status);
492 goto leave; 492 goto leave;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index a69628603e18..487f00c45f84 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1618,8 +1618,41 @@ bail:
1618 return status; 1618 return status;
1619} 1619}
1620 1620
1621static void ocfs2_init_inode_ac_group(struct inode *dir,
1622 struct buffer_head *parent_fe_bh,
1623 struct ocfs2_alloc_context *ac)
1624{
1625 struct ocfs2_dinode *fe = (struct ocfs2_dinode *)parent_fe_bh->b_data;
1626 /*
1627 * Try to allocate inodes from some specific group.
1628 *
1629 * If the parent dir has recorded the last group used in allocation,
1630 * cool, use it. Otherwise if we try to allocate new inode from the
1631 * same slot the parent dir belongs to, use the same chunk.
1632 *
1633 * We are very careful here to avoid the mistake of setting
1634 * ac_last_group to a group descriptor from a different (unlocked) slot.
1635 */
1636 if (OCFS2_I(dir)->ip_last_used_group &&
1637 OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot)
1638 ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group;
1639 else if (le16_to_cpu(fe->i_suballoc_slot) == ac->ac_alloc_slot)
1640 ac->ac_last_group = ocfs2_which_suballoc_group(
1641 le64_to_cpu(fe->i_blkno),
1642 le16_to_cpu(fe->i_suballoc_bit));
1643}
1644
1645static inline void ocfs2_save_inode_ac_group(struct inode *dir,
1646 struct ocfs2_alloc_context *ac)
1647{
1648 OCFS2_I(dir)->ip_last_used_group = ac->ac_last_group;
1649 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
1650}
1651
1621int ocfs2_claim_new_inode(struct ocfs2_super *osb, 1652int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1622 handle_t *handle, 1653 handle_t *handle,
1654 struct inode *dir,
1655 struct buffer_head *parent_fe_bh,
1623 struct ocfs2_alloc_context *ac, 1656 struct ocfs2_alloc_context *ac,
1624 u16 *suballoc_bit, 1657 u16 *suballoc_bit,
1625 u64 *fe_blkno) 1658 u64 *fe_blkno)
@@ -1635,6 +1668,8 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1635 BUG_ON(ac->ac_bits_wanted != 1); 1668 BUG_ON(ac->ac_bits_wanted != 1);
1636 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); 1669 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
1637 1670
1671 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
1672
1638 status = ocfs2_claim_suballoc_bits(osb, 1673 status = ocfs2_claim_suballoc_bits(osb,
1639 ac, 1674 ac,
1640 handle, 1675 handle,
@@ -1653,6 +1688,7 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1653 1688
1654 *fe_blkno = bg_blkno + (u64) (*suballoc_bit); 1689 *fe_blkno = bg_blkno + (u64) (*suballoc_bit);
1655 ac->ac_bits_given++; 1690 ac->ac_bits_given++;
1691 ocfs2_save_inode_ac_group(dir, ac);
1656 status = 0; 1692 status = 0;
1657bail: 1693bail:
1658 mlog_exit(status); 1694 mlog_exit(status);
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index e3c13c77f9e8..ea85a4c8b4b1 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -88,6 +88,8 @@ int ocfs2_claim_metadata(struct ocfs2_super *osb,
88 u64 *blkno_start); 88 u64 *blkno_start);
89int ocfs2_claim_new_inode(struct ocfs2_super *osb, 89int ocfs2_claim_new_inode(struct ocfs2_super *osb,
90 handle_t *handle, 90 handle_t *handle,
91 struct inode *dir,
92 struct buffer_head *parent_fe_bh,
91 struct ocfs2_alloc_context *ac, 93 struct ocfs2_alloc_context *ac,
92 u16 *suballoc_bit, 94 u16 *suballoc_bit,
93 u64 *fe_blkno); 95 u64 *fe_blkno);