aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe David Borba Manana <fdmanana@gmail.com>2013-10-06 17:22:33 -0400
committerChris Mason <chris.mason@fusionio.com>2013-11-11 21:55:19 -0500
commit778ba82b1796e75e719a52679ae431371ca73988 (patch)
tree40d2c90652d2f6ca053e2007740869bf8b40c75c
parent3d41d70252234db153ea1b037052278ff5786ad5 (diff)
Btrfs: improve inode hash function/inode lookup
Currently the hash value used for adding an inode to the VFS's inode hash table consists of the plain inode number, which is a 64 bits integer. This results in hash table buckets (hlist_head lists) with too many elements for at least 2 important scenarios: 1) When we have many subvolumes. Each subvolume has its own btree where its files and directories are added to, and each has its own objectid (inode number) namespace. This means that if we have N subvolumes, and all have inode number X associated to a file or directory, the corresponding inodes all map to the same hash table entry, resulting in a bucket (hlist_head list) with N elements; 2) On 32 bits machines. Th VFS hash values are unsigned longs, which are 32 bits wide on 32 bits machines, and the inode (objectid) numbers are 64 bits unsigned integers. We simply cast the inode numbers to hash values, which means that for all inodes with the same 32 bits lower half, the same hash bucket is used for all of them. For example, all inodes with a number (objectid) between 0x0000_0000_ffff_ffff and 0xffff_ffff_ffff_ffff will end up in the same hash table bucket. This change ensures the inode's hash value depends both on the objectid (inode number) and its subvolume's (btree root) objectid. For 32 bits machines, this change gives better entropy by making the hash value depend on both the upper and lower 32 bits of the 64 bits hash previously computed. Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
-rw-r--r--fs/btrfs/btrfs_inode.h20
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/inode.c6
3 files changed, 25 insertions, 3 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 71f074e1870b..ac0b39db27d1 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -19,6 +19,7 @@
19#ifndef __BTRFS_I__ 19#ifndef __BTRFS_I__
20#define __BTRFS_I__ 20#define __BTRFS_I__
21 21
22#include <linux/hash.h>
22#include "extent_map.h" 23#include "extent_map.h"
23#include "extent_io.h" 24#include "extent_io.h"
24#include "ordered-data.h" 25#include "ordered-data.h"
@@ -179,6 +180,25 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
179 return container_of(inode, struct btrfs_inode, vfs_inode); 180 return container_of(inode, struct btrfs_inode, vfs_inode);
180} 181}
181 182
183static inline unsigned long btrfs_inode_hash(u64 objectid,
184 const struct btrfs_root *root)
185{
186 u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME);
187
188#if BITS_PER_LONG == 32
189 h = (h >> 32) ^ (h & 0xffffffff);
190#endif
191
192 return (unsigned long)h;
193}
194
195static inline void btrfs_insert_inode_hash(struct inode *inode)
196{
197 unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root);
198
199 __insert_inode_hash(inode, h);
200}
201
182static inline u64 btrfs_ino(struct inode *inode) 202static inline u64 btrfs_ino(struct inode *inode)
183{ 203{
184 u64 ino = BTRFS_I(inode)->location.objectid; 204 u64 ino = BTRFS_I(inode)->location.objectid;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ade6c0e79616..d205bddc7776 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2294,7 +2294,7 @@ int open_ctree(struct super_block *sb,
2294 sizeof(struct btrfs_key)); 2294 sizeof(struct btrfs_key));
2295 set_bit(BTRFS_INODE_DUMMY, 2295 set_bit(BTRFS_INODE_DUMMY,
2296 &BTRFS_I(fs_info->btree_inode)->runtime_flags); 2296 &BTRFS_I(fs_info->btree_inode)->runtime_flags);
2297 insert_inode_hash(fs_info->btree_inode); 2297 btrfs_insert_inode_hash(fs_info->btree_inode);
2298 2298
2299 spin_lock_init(&fs_info->block_group_cache_lock); 2299 spin_lock_init(&fs_info->block_group_cache_lock);
2300 fs_info->block_group_cache_tree = RB_ROOT; 2300 fs_info->block_group_cache_tree = RB_ROOT;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1ca49eaba3bb..bb242f2fb51e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4837,10 +4837,12 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
4837{ 4837{
4838 struct inode *inode; 4838 struct inode *inode;
4839 struct btrfs_iget_args args; 4839 struct btrfs_iget_args args;
4840 unsigned long hashval = btrfs_inode_hash(objectid, root);
4841
4840 args.ino = objectid; 4842 args.ino = objectid;
4841 args.root = root; 4843 args.root = root;
4842 4844
4843 inode = iget5_locked(s, objectid, btrfs_find_actor, 4845 inode = iget5_locked(s, hashval, btrfs_find_actor,
4844 btrfs_init_locked_inode, 4846 btrfs_init_locked_inode,
4845 (void *)&args); 4847 (void *)&args);
4846 return inode; 4848 return inode;
@@ -5460,7 +5462,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5460 BTRFS_INODE_NODATASUM; 5462 BTRFS_INODE_NODATASUM;
5461 } 5463 }
5462 5464
5463 insert_inode_hash(inode); 5465 btrfs_insert_inode_hash(inode);
5464 inode_tree_add(inode); 5466 inode_tree_add(inode);
5465 5467
5466 trace_btrfs_inode_new(inode); 5468 trace_btrfs_inode_new(inode);