diff options
author | Sage Weil <sage@newdream.net> | 2009-11-07 00:55:25 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2009-11-07 00:55:25 -0500 |
commit | 1654dd0cf5ee1827322aca156af7d96d757201c7 (patch) | |
tree | ba71b1a1e214b929937f02a1794b3d32b8ff1342 | |
parent | cfbbcd24a6bfd794295ee7ad76dfbff40ad6b934 (diff) |
ceph: make object hash a pg_pool property
The object will be hashed to a placement seed (ps) based on the pg_pool's
hash function. This allows new hashes to be introduced into an existing
object store, or selection of a hash appropriate to the objects that
will be stored in a particular pool.
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | fs/ceph/Makefile | 2 | ||||
-rw-r--r-- | fs/ceph/README | 2 | ||||
-rw-r--r-- | fs/ceph/ceph_fs.c | 77 | ||||
-rw-r--r-- | fs/ceph/ceph_fs.h | 2 | ||||
-rw-r--r-- | fs/ceph/ceph_hash.c | 118 | ||||
-rw-r--r-- | fs/ceph/ceph_hash.h | 13 | ||||
-rw-r--r-- | fs/ceph/osdmap.c | 2 | ||||
-rw-r--r-- | fs/ceph/rados.h | 1 | ||||
-rw-r--r-- | fs/ceph/types.h | 1 |
9 files changed, 137 insertions, 81 deletions
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 8bad70aac363..bdd3e6fc1609 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
@@ -13,7 +13,7 @@ ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ | |||
13 | mon_client.o \ | 13 | mon_client.o \ |
14 | osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ | 14 | osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ |
15 | debugfs.o \ | 15 | debugfs.o \ |
16 | ceph_fs.o ceph_strings.o ceph_frag.o | 16 | ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o |
17 | 17 | ||
18 | else | 18 | else |
19 | #Otherwise we were called directly from the command | 19 | #Otherwise we were called directly from the command |
diff --git a/fs/ceph/README b/fs/ceph/README index 660e00074d59..18352fab37c0 100644 --- a/fs/ceph/README +++ b/fs/ceph/README | |||
@@ -10,6 +10,8 @@ src/include/rados.h fs/ceph/rados.h | |||
10 | src/include/ceph_strings.cc fs/ceph/ceph_strings.c | 10 | src/include/ceph_strings.cc fs/ceph/ceph_strings.c |
11 | src/include/ceph_frag.h fs/ceph/ceph_frag.h | 11 | src/include/ceph_frag.h fs/ceph/ceph_frag.h |
12 | src/include/ceph_frag.cc fs/ceph/ceph_frag.c | 12 | src/include/ceph_frag.cc fs/ceph/ceph_frag.c |
13 | src/include/ceph_hash.h fs/ceph/ceph_hash.h | ||
14 | src/include/ceph_hash.cc fs/ceph/ceph_hash.c | ||
13 | src/crush/crush.c fs/ceph/crush/crush.c | 15 | src/crush/crush.c fs/ceph/crush/crush.c |
14 | src/crush/crush.h fs/ceph/crush/crush.h | 16 | src/crush/crush.h fs/ceph/crush/crush.h |
15 | src/crush/mapper.c fs/ceph/crush/mapper.c | 17 | src/crush/mapper.c fs/ceph/crush/mapper.c |
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c index b3ecf1b07521..79d76bc4303f 100644 --- a/fs/ceph/ceph_fs.c +++ b/fs/ceph/ceph_fs.c | |||
@@ -72,80 +72,3 @@ int ceph_caps_for_mode(int mode) | |||
72 | } | 72 | } |
73 | return 0; | 73 | return 0; |
74 | } | 74 | } |
75 | |||
76 | /* | ||
77 | * Robert Jenkin's hash function. | ||
78 | * http://burtleburtle.net/bob/hash/evahash.html | ||
79 | * This is in the public domain. | ||
80 | */ | ||
81 | #define mix(a, b, c) \ | ||
82 | do { \ | ||
83 | a = a - b; a = a - c; a = a ^ (c >> 13); \ | ||
84 | b = b - c; b = b - a; b = b ^ (a << 8); \ | ||
85 | c = c - a; c = c - b; c = c ^ (b >> 13); \ | ||
86 | a = a - b; a = a - c; a = a ^ (c >> 12); \ | ||
87 | b = b - c; b = b - a; b = b ^ (a << 16); \ | ||
88 | c = c - a; c = c - b; c = c ^ (b >> 5); \ | ||
89 | a = a - b; a = a - c; a = a ^ (c >> 3); \ | ||
90 | b = b - c; b = b - a; b = b ^ (a << 10); \ | ||
91 | c = c - a; c = c - b; c = c ^ (b >> 15); \ | ||
92 | } while (0) | ||
93 | |||
94 | unsigned int ceph_full_name_hash(const char *str, unsigned int length) | ||
95 | { | ||
96 | const unsigned char *k = (const unsigned char *)str; | ||
97 | __u32 a, b, c; /* the internal state */ | ||
98 | __u32 len; /* how many key bytes still need mixing */ | ||
99 | |||
100 | /* Set up the internal state */ | ||
101 | len = length; | ||
102 | a = 0x9e3779b9; /* the golden ratio; an arbitrary value */ | ||
103 | b = a; | ||
104 | c = 0; /* variable initialization of internal state */ | ||
105 | |||
106 | /* handle most of the key */ | ||
107 | while (len >= 12) { | ||
108 | a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) + | ||
109 | ((__u32)k[3] << 24)); | ||
110 | b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) + | ||
111 | ((__u32)k[7] << 24)); | ||
112 | c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) + | ||
113 | ((__u32)k[11] << 24)); | ||
114 | mix(a, b, c); | ||
115 | k = k + 12; | ||
116 | len = len - 12; | ||
117 | } | ||
118 | |||
119 | /* handle the last 11 bytes */ | ||
120 | c = c + length; | ||
121 | switch (len) { /* all the case statements fall through */ | ||
122 | case 11: | ||
123 | c = c + ((__u32)k[10] << 24); | ||
124 | case 10: | ||
125 | c = c + ((__u32)k[9] << 16); | ||
126 | case 9: | ||
127 | c = c + ((__u32)k[8] << 8); | ||
128 | /* the first byte of c is reserved for the length */ | ||
129 | case 8: | ||
130 | b = b + ((__u32)k[7] << 24); | ||
131 | case 7: | ||
132 | b = b + ((__u32)k[6] << 16); | ||
133 | case 6: | ||
134 | b = b + ((__u32)k[5] << 8); | ||
135 | case 5: | ||
136 | b = b + k[4]; | ||
137 | case 4: | ||
138 | a = a + ((__u32)k[3] << 24); | ||
139 | case 3: | ||
140 | a = a + ((__u32)k[2] << 16); | ||
141 | case 2: | ||
142 | a = a + ((__u32)k[1] << 8); | ||
143 | case 1: | ||
144 | a = a + k[0]; | ||
145 | /* case 0: nothing left to add */ | ||
146 | } | ||
147 | mix(a, b, c); | ||
148 | |||
149 | return c; | ||
150 | } | ||
151 | |||
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 25fc537f4140..36becb024788 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h | |||
@@ -49,8 +49,6 @@ | |||
49 | #define CEPH_MAX_MON 31 | 49 | #define CEPH_MAX_MON 31 |
50 | 50 | ||
51 | 51 | ||
52 | unsigned int ceph_full_name_hash(const char *name, unsigned int len); | ||
53 | |||
54 | 52 | ||
55 | /* | 53 | /* |
56 | * ceph_file_layout - describe data layout for a file/inode | 54 | * ceph_file_layout - describe data layout for a file/inode |
diff --git a/fs/ceph/ceph_hash.c b/fs/ceph/ceph_hash.c new file mode 100644 index 000000000000..ac8be54631fe --- /dev/null +++ b/fs/ceph/ceph_hash.c | |||
@@ -0,0 +1,118 @@ | |||
1 | |||
2 | #include "types.h" | ||
3 | |||
4 | /* | ||
5 | * Robert Jenkin's hash function. | ||
6 | * http://burtleburtle.net/bob/hash/evahash.html | ||
7 | * This is in the public domain. | ||
8 | */ | ||
9 | #define mix(a, b, c) \ | ||
10 | do { \ | ||
11 | a = a - b; a = a - c; a = a ^ (c >> 13); \ | ||
12 | b = b - c; b = b - a; b = b ^ (a << 8); \ | ||
13 | c = c - a; c = c - b; c = c ^ (b >> 13); \ | ||
14 | a = a - b; a = a - c; a = a ^ (c >> 12); \ | ||
15 | b = b - c; b = b - a; b = b ^ (a << 16); \ | ||
16 | c = c - a; c = c - b; c = c ^ (b >> 5); \ | ||
17 | a = a - b; a = a - c; a = a ^ (c >> 3); \ | ||
18 | b = b - c; b = b - a; b = b ^ (a << 10); \ | ||
19 | c = c - a; c = c - b; c = c ^ (b >> 15); \ | ||
20 | } while (0) | ||
21 | |||
22 | unsigned ceph_str_hash_rjenkins(const char *str, unsigned length) | ||
23 | { | ||
24 | const unsigned char *k = (const unsigned char *)str; | ||
25 | __u32 a, b, c; /* the internal state */ | ||
26 | __u32 len; /* how many key bytes still need mixing */ | ||
27 | |||
28 | /* Set up the internal state */ | ||
29 | len = length; | ||
30 | a = 0x9e3779b9; /* the golden ratio; an arbitrary value */ | ||
31 | b = a; | ||
32 | c = 0; /* variable initialization of internal state */ | ||
33 | |||
34 | /* handle most of the key */ | ||
35 | while (len >= 12) { | ||
36 | a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) + | ||
37 | ((__u32)k[3] << 24)); | ||
38 | b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) + | ||
39 | ((__u32)k[7] << 24)); | ||
40 | c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) + | ||
41 | ((__u32)k[11] << 24)); | ||
42 | mix(a, b, c); | ||
43 | k = k + 12; | ||
44 | len = len - 12; | ||
45 | } | ||
46 | |||
47 | /* handle the last 11 bytes */ | ||
48 | c = c + length; | ||
49 | switch (len) { /* all the case statements fall through */ | ||
50 | case 11: | ||
51 | c = c + ((__u32)k[10] << 24); | ||
52 | case 10: | ||
53 | c = c + ((__u32)k[9] << 16); | ||
54 | case 9: | ||
55 | c = c + ((__u32)k[8] << 8); | ||
56 | /* the first byte of c is reserved for the length */ | ||
57 | case 8: | ||
58 | b = b + ((__u32)k[7] << 24); | ||
59 | case 7: | ||
60 | b = b + ((__u32)k[6] << 16); | ||
61 | case 6: | ||
62 | b = b + ((__u32)k[5] << 8); | ||
63 | case 5: | ||
64 | b = b + k[4]; | ||
65 | case 4: | ||
66 | a = a + ((__u32)k[3] << 24); | ||
67 | case 3: | ||
68 | a = a + ((__u32)k[2] << 16); | ||
69 | case 2: | ||
70 | a = a + ((__u32)k[1] << 8); | ||
71 | case 1: | ||
72 | a = a + k[0]; | ||
73 | /* case 0: nothing left to add */ | ||
74 | } | ||
75 | mix(a, b, c); | ||
76 | |||
77 | return c; | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * linux dcache hash | ||
82 | */ | ||
83 | unsigned ceph_str_hash_linux(const char *str, unsigned length) | ||
84 | { | ||
85 | unsigned long hash = 0; | ||
86 | unsigned char c; | ||
87 | |||
88 | while (length-- > 0) { | ||
89 | c = *str++; | ||
90 | hash = (hash + (c << 4) + (c >> 4)) * 11; | ||
91 | } | ||
92 | return hash; | ||
93 | } | ||
94 | |||
95 | |||
96 | unsigned ceph_str_hash(int type, const char *s, unsigned len) | ||
97 | { | ||
98 | switch (type) { | ||
99 | case CEPH_STR_HASH_LINUX: | ||
100 | return ceph_str_hash_linux(s, len); | ||
101 | case CEPH_STR_HASH_RJENKINS: | ||
102 | return ceph_str_hash_rjenkins(s, len); | ||
103 | default: | ||
104 | return -1; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | const char *ceph_str_hash_name(int type) | ||
109 | { | ||
110 | switch (type) { | ||
111 | case CEPH_STR_HASH_LINUX: | ||
112 | return "linux"; | ||
113 | case CEPH_STR_HASH_RJENKINS: | ||
114 | return "rjenkins"; | ||
115 | default: | ||
116 | return "unknown"; | ||
117 | } | ||
118 | } | ||
diff --git a/fs/ceph/ceph_hash.h b/fs/ceph/ceph_hash.h new file mode 100644 index 000000000000..5ac470c433c9 --- /dev/null +++ b/fs/ceph/ceph_hash.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef _FS_CEPH_HASH_H | ||
2 | #define _FS_CEPH_HASH_H | ||
3 | |||
4 | #define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ | ||
5 | #define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ | ||
6 | |||
7 | extern unsigned ceph_str_hash_linux(const char *s, unsigned len); | ||
8 | extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len); | ||
9 | |||
10 | extern unsigned ceph_str_hash(int type, const char *s, unsigned len); | ||
11 | extern const char *ceph_str_hash_name(int type); | ||
12 | |||
13 | #endif | ||
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index a025555b70af..68478270ad70 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -809,7 +809,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
809 | return -EIO; | 809 | return -EIO; |
810 | 810 | ||
811 | pool = &osdmap->pg_pool[poolid]; | 811 | pool = &osdmap->pg_pool[poolid]; |
812 | ps = ceph_full_name_hash(oid, strlen(oid)); | 812 | ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); |
813 | if (preferred >= 0) { | 813 | if (preferred >= 0) { |
814 | ps += preferred; | 814 | ps += preferred; |
815 | num = le32_to_cpu(pool->v.lpg_num); | 815 | num = le32_to_cpu(pool->v.lpg_num); |
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index 85bdef78d142..fb23ff9297c9 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -84,6 +84,7 @@ struct ceph_pg_pool { | |||
84 | __u8 type; /* CEPH_PG_TYPE_* */ | 84 | __u8 type; /* CEPH_PG_TYPE_* */ |
85 | __u8 size; /* number of osds in each pg */ | 85 | __u8 size; /* number of osds in each pg */ |
86 | __u8 crush_ruleset; /* crush placement rule */ | 86 | __u8 crush_ruleset; /* crush placement rule */ |
87 | __u8 object_hash; /* hash mapping object name to ps */ | ||
87 | __le32 pg_num, pgp_num; /* number of pg's */ | 88 | __le32 pg_num, pgp_num; /* number of pg's */ |
88 | __le32 lpg_num, lpgp_num; /* number of localized pg's */ | 89 | __le32 lpg_num, lpgp_num; /* number of localized pg's */ |
89 | __le32 last_change; /* most recent epoch changed */ | 90 | __le32 last_change; /* most recent epoch changed */ |
diff --git a/fs/ceph/types.h b/fs/ceph/types.h index 8a514568cab2..28b35a005ec2 100644 --- a/fs/ceph/types.h +++ b/fs/ceph/types.h | |||
@@ -9,6 +9,7 @@ | |||
9 | 9 | ||
10 | #include "ceph_fs.h" | 10 | #include "ceph_fs.h" |
11 | #include "ceph_frag.h" | 11 | #include "ceph_frag.h" |
12 | #include "ceph_hash.h" | ||
12 | 13 | ||
13 | /* | 14 | /* |
14 | * Identify inodes by both their ino AND snapshot id (a u64). | 15 | * Identify inodes by both their ino AND snapshot id (a u64). |