diff options
author | Sage Weil <sage@newdream.net> | 2009-11-06 19:44:05 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2009-11-06 19:44:05 -0500 |
commit | cfbbcd24a6bfd794295ee7ad76dfbff40ad6b934 (patch) | |
tree | 26a9640291ae3af1a9f1dd56cd8b3a2e3016b3ff /fs/ceph/ceph_fs.c | |
parent | c6cf726316abd613cfb7c325d950f3629f964ec6 (diff) |
ceph: use strong hash function for mapping objects to pgs
We were using the (weak) dcache hash function, but it was leaving lower
bits consecutive for consecutive (inode) objects. We really want to make
the object to pg mapping random and uniform, so use a proper hash function
here.
This is Robert Jenkin's public domain hash function (with some minor
cleanup):
http://burtleburtle.net/bob/hash/evahash.html
This is a protocol revision.
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph/ceph_fs.c')
-rw-r--r-- | fs/ceph/ceph_fs.c | 93 |
1 files changed, 70 insertions, 23 deletions
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c index a950b4083577..b3ecf1b07521 100644 --- a/fs/ceph/ceph_fs.c +++ b/fs/ceph/ceph_fs.c | |||
@@ -73,32 +73,79 @@ int ceph_caps_for_mode(int mode) | |||
73 | return 0; | 73 | return 0; |
74 | } | 74 | } |
75 | 75 | ||
76 | /* Name hashing routines. Initial hash value */ | ||
77 | /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ | ||
78 | #define ceph_init_name_hash() 0 | ||
79 | |||
80 | /* partial hash update function. Assume roughly 4 bits per character */ | ||
81 | static unsigned long ceph_partial_name_hash(unsigned long c, | ||
82 | unsigned long prevhash) | ||
83 | { | ||
84 | return (prevhash + (c << 4) + (c >> 4)) * 11; | ||
85 | } | ||
86 | |||
87 | /* | 76 | /* |
88 | * Finally: cut down the number of bits to a int value (and try to avoid | 77 | * Robert Jenkin's hash function. |
89 | * losing bits) | 78 | * http://burtleburtle.net/bob/hash/evahash.html |
79 | * This is in the public domain. | ||
90 | */ | 80 | */ |
91 | static unsigned long ceph_end_name_hash(unsigned long hash) | 81 | #define mix(a, b, c) \ |
92 | { | 82 | do { \ |
93 | return hash & 0xffffffff; | 83 | a = a - b; a = a - c; a = a ^ (c >> 13); \ |
94 | } | 84 | b = b - c; b = b - a; b = b ^ (a << 8); \ |
85 | c = c - a; c = c - b; c = c ^ (b >> 13); \ | ||
86 | a = a - b; a = a - c; a = a ^ (c >> 12); \ | ||
87 | b = b - c; b = b - a; b = b ^ (a << 16); \ | ||
88 | c = c - a; c = c - b; c = c ^ (b >> 5); \ | ||
89 | a = a - b; a = a - c; a = a ^ (c >> 3); \ | ||
90 | b = b - c; b = b - a; b = b ^ (a << 10); \ | ||
91 | c = c - a; c = c - b; c = c ^ (b >> 15); \ | ||
92 | } while (0) | ||
95 | 93 | ||
96 | /* Compute the hash for a name string. */ | 94 | unsigned int ceph_full_name_hash(const char *str, unsigned int length) |
97 | unsigned int ceph_full_name_hash(const char *name, unsigned int len) | ||
98 | { | 95 | { |
99 | unsigned long hash = ceph_init_name_hash(); | 96 | const unsigned char *k = (const unsigned char *)str; |
100 | while (len--) | 97 | __u32 a, b, c; /* the internal state */ |
101 | hash = ceph_partial_name_hash(*name++, hash); | 98 | __u32 len; /* how many key bytes still need mixing */ |
102 | return ceph_end_name_hash(hash); | 99 | |
100 | /* Set up the internal state */ | ||
101 | len = length; | ||
102 | a = 0x9e3779b9; /* the golden ratio; an arbitrary value */ | ||
103 | b = a; | ||
104 | c = 0; /* variable initialization of internal state */ | ||
105 | |||
106 | /* handle most of the key */ | ||
107 | while (len >= 12) { | ||
108 | a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) + | ||
109 | ((__u32)k[3] << 24)); | ||
110 | b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) + | ||
111 | ((__u32)k[7] << 24)); | ||
112 | c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) + | ||
113 | ((__u32)k[11] << 24)); | ||
114 | mix(a, b, c); | ||
115 | k = k + 12; | ||
116 | len = len - 12; | ||
117 | } | ||
118 | |||
119 | /* handle the last 11 bytes */ | ||
120 | c = c + length; | ||
121 | switch (len) { /* all the case statements fall through */ | ||
122 | case 11: | ||
123 | c = c + ((__u32)k[10] << 24); | ||
124 | case 10: | ||
125 | c = c + ((__u32)k[9] << 16); | ||
126 | case 9: | ||
127 | c = c + ((__u32)k[8] << 8); | ||
128 | /* the first byte of c is reserved for the length */ | ||
129 | case 8: | ||
130 | b = b + ((__u32)k[7] << 24); | ||
131 | case 7: | ||
132 | b = b + ((__u32)k[6] << 16); | ||
133 | case 6: | ||
134 | b = b + ((__u32)k[5] << 8); | ||
135 | case 5: | ||
136 | b = b + k[4]; | ||
137 | case 4: | ||
138 | a = a + ((__u32)k[3] << 24); | ||
139 | case 3: | ||
140 | a = a + ((__u32)k[2] << 16); | ||
141 | case 2: | ||
142 | a = a + ((__u32)k[1] << 8); | ||
143 | case 1: | ||
144 | a = a + k[0]; | ||
145 | /* case 0: nothing left to add */ | ||
146 | } | ||
147 | mix(a, b, c); | ||
148 | |||
149 | return c; | ||
103 | } | 150 | } |
104 | 151 | ||