aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2009-11-07 00:55:25 -0500
committerSage Weil <sage@newdream.net>2009-11-07 00:55:25 -0500
commit1654dd0cf5ee1827322aca156af7d96d757201c7 (patch)
treeba71b1a1e214b929937f02a1794b3d32b8ff1342 /fs/ceph
parentcfbbcd24a6bfd794295ee7ad76dfbff40ad6b934 (diff)
ceph: make object hash a pg_pool property
The object will be hashed to a placement seed (ps) based on the pg_pool's hash function. This allows new hashes to be introduced into an existing object store, or selection of a hash appropriate to the objects that will be stored in a particular pool. Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/Makefile2
-rw-r--r--fs/ceph/README2
-rw-r--r--fs/ceph/ceph_fs.c77
-rw-r--r--fs/ceph/ceph_fs.h2
-rw-r--r--fs/ceph/ceph_hash.c118
-rw-r--r--fs/ceph/ceph_hash.h13
-rw-r--r--fs/ceph/osdmap.c2
-rw-r--r--fs/ceph/rados.h1
-rw-r--r--fs/ceph/types.h1
9 files changed, 137 insertions, 81 deletions
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index 8bad70aac363..bdd3e6fc1609 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -13,7 +13,7 @@ ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \
13 mon_client.o \ 13 mon_client.o \
14 osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ 14 osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
15 debugfs.o \ 15 debugfs.o \
16 ceph_fs.o ceph_strings.o ceph_frag.o 16 ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o
17 17
18else 18else
19#Otherwise we were called directly from the command 19#Otherwise we were called directly from the command
diff --git a/fs/ceph/README b/fs/ceph/README
index 660e00074d59..18352fab37c0 100644
--- a/fs/ceph/README
+++ b/fs/ceph/README
@@ -10,6 +10,8 @@ src/include/rados.h fs/ceph/rados.h
10src/include/ceph_strings.cc fs/ceph/ceph_strings.c 10src/include/ceph_strings.cc fs/ceph/ceph_strings.c
11src/include/ceph_frag.h fs/ceph/ceph_frag.h 11src/include/ceph_frag.h fs/ceph/ceph_frag.h
12src/include/ceph_frag.cc fs/ceph/ceph_frag.c 12src/include/ceph_frag.cc fs/ceph/ceph_frag.c
13src/include/ceph_hash.h fs/ceph/ceph_hash.h
14src/include/ceph_hash.cc fs/ceph/ceph_hash.c
13src/crush/crush.c fs/ceph/crush/crush.c 15src/crush/crush.c fs/ceph/crush/crush.c
14src/crush/crush.h fs/ceph/crush/crush.h 16src/crush/crush.h fs/ceph/crush/crush.h
15src/crush/mapper.c fs/ceph/crush/mapper.c 17src/crush/mapper.c fs/ceph/crush/mapper.c
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c
index b3ecf1b07521..79d76bc4303f 100644
--- a/fs/ceph/ceph_fs.c
+++ b/fs/ceph/ceph_fs.c
@@ -72,80 +72,3 @@ int ceph_caps_for_mode(int mode)
72 } 72 }
73 return 0; 73 return 0;
74} 74}
75
76/*
77 * Robert Jenkin's hash function.
78 * http://burtleburtle.net/bob/hash/evahash.html
79 * This is in the public domain.
80 */
81#define mix(a, b, c) \
82 do { \
83 a = a - b; a = a - c; a = a ^ (c >> 13); \
84 b = b - c; b = b - a; b = b ^ (a << 8); \
85 c = c - a; c = c - b; c = c ^ (b >> 13); \
86 a = a - b; a = a - c; a = a ^ (c >> 12); \
87 b = b - c; b = b - a; b = b ^ (a << 16); \
88 c = c - a; c = c - b; c = c ^ (b >> 5); \
89 a = a - b; a = a - c; a = a ^ (c >> 3); \
90 b = b - c; b = b - a; b = b ^ (a << 10); \
91 c = c - a; c = c - b; c = c ^ (b >> 15); \
92 } while (0)
93
94unsigned int ceph_full_name_hash(const char *str, unsigned int length)
95{
96 const unsigned char *k = (const unsigned char *)str;
97 __u32 a, b, c; /* the internal state */
98 __u32 len; /* how many key bytes still need mixing */
99
100 /* Set up the internal state */
101 len = length;
102 a = 0x9e3779b9; /* the golden ratio; an arbitrary value */
103 b = a;
104 c = 0; /* variable initialization of internal state */
105
106 /* handle most of the key */
107 while (len >= 12) {
108 a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) +
109 ((__u32)k[3] << 24));
110 b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) +
111 ((__u32)k[7] << 24));
112 c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) +
113 ((__u32)k[11] << 24));
114 mix(a, b, c);
115 k = k + 12;
116 len = len - 12;
117 }
118
119 /* handle the last 11 bytes */
120 c = c + length;
121 switch (len) { /* all the case statements fall through */
122 case 11:
123 c = c + ((__u32)k[10] << 24);
124 case 10:
125 c = c + ((__u32)k[9] << 16);
126 case 9:
127 c = c + ((__u32)k[8] << 8);
128 /* the first byte of c is reserved for the length */
129 case 8:
130 b = b + ((__u32)k[7] << 24);
131 case 7:
132 b = b + ((__u32)k[6] << 16);
133 case 6:
134 b = b + ((__u32)k[5] << 8);
135 case 5:
136 b = b + k[4];
137 case 4:
138 a = a + ((__u32)k[3] << 24);
139 case 3:
140 a = a + ((__u32)k[2] << 16);
141 case 2:
142 a = a + ((__u32)k[1] << 8);
143 case 1:
144 a = a + k[0];
145 /* case 0: nothing left to add */
146 }
147 mix(a, b, c);
148
149 return c;
150}
151
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h
index 25fc537f4140..36becb024788 100644
--- a/fs/ceph/ceph_fs.h
+++ b/fs/ceph/ceph_fs.h
@@ -49,8 +49,6 @@
49#define CEPH_MAX_MON 31 49#define CEPH_MAX_MON 31
50 50
51 51
52unsigned int ceph_full_name_hash(const char *name, unsigned int len);
53
54 52
55/* 53/*
56 * ceph_file_layout - describe data layout for a file/inode 54 * ceph_file_layout - describe data layout for a file/inode
diff --git a/fs/ceph/ceph_hash.c b/fs/ceph/ceph_hash.c
new file mode 100644
index 000000000000..ac8be54631fe
--- /dev/null
+++ b/fs/ceph/ceph_hash.c
@@ -0,0 +1,118 @@
1
2#include "types.h"
3
4/*
5 * Robert Jenkin's hash function.
6 * http://burtleburtle.net/bob/hash/evahash.html
7 * This is in the public domain.
8 */
9#define mix(a, b, c) \
10 do { \
11 a = a - b; a = a - c; a = a ^ (c >> 13); \
12 b = b - c; b = b - a; b = b ^ (a << 8); \
13 c = c - a; c = c - b; c = c ^ (b >> 13); \
14 a = a - b; a = a - c; a = a ^ (c >> 12); \
15 b = b - c; b = b - a; b = b ^ (a << 16); \
16 c = c - a; c = c - b; c = c ^ (b >> 5); \
17 a = a - b; a = a - c; a = a ^ (c >> 3); \
18 b = b - c; b = b - a; b = b ^ (a << 10); \
19 c = c - a; c = c - b; c = c ^ (b >> 15); \
20 } while (0)
21
22unsigned ceph_str_hash_rjenkins(const char *str, unsigned length)
23{
24 const unsigned char *k = (const unsigned char *)str;
25 __u32 a, b, c; /* the internal state */
26 __u32 len; /* how many key bytes still need mixing */
27
28 /* Set up the internal state */
29 len = length;
30 a = 0x9e3779b9; /* the golden ratio; an arbitrary value */
31 b = a;
32 c = 0; /* variable initialization of internal state */
33
34 /* handle most of the key */
35 while (len >= 12) {
36 a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) +
37 ((__u32)k[3] << 24));
38 b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) +
39 ((__u32)k[7] << 24));
40 c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) +
41 ((__u32)k[11] << 24));
42 mix(a, b, c);
43 k = k + 12;
44 len = len - 12;
45 }
46
47 /* handle the last 11 bytes */
48 c = c + length;
49 switch (len) { /* all the case statements fall through */
50 case 11:
51 c = c + ((__u32)k[10] << 24);
52 case 10:
53 c = c + ((__u32)k[9] << 16);
54 case 9:
55 c = c + ((__u32)k[8] << 8);
56 /* the first byte of c is reserved for the length */
57 case 8:
58 b = b + ((__u32)k[7] << 24);
59 case 7:
60 b = b + ((__u32)k[6] << 16);
61 case 6:
62 b = b + ((__u32)k[5] << 8);
63 case 5:
64 b = b + k[4];
65 case 4:
66 a = a + ((__u32)k[3] << 24);
67 case 3:
68 a = a + ((__u32)k[2] << 16);
69 case 2:
70 a = a + ((__u32)k[1] << 8);
71 case 1:
72 a = a + k[0];
73 /* case 0: nothing left to add */
74 }
75 mix(a, b, c);
76
77 return c;
78}
79
80/*
81 * linux dcache hash
82 */
83unsigned ceph_str_hash_linux(const char *str, unsigned length)
84{
85 unsigned long hash = 0;
86 unsigned char c;
87
88 while (length-- > 0) {
89 c = *str++;
90 hash = (hash + (c << 4) + (c >> 4)) * 11;
91 }
92 return hash;
93}
94
95
96unsigned ceph_str_hash(int type, const char *s, unsigned len)
97{
98 switch (type) {
99 case CEPH_STR_HASH_LINUX:
100 return ceph_str_hash_linux(s, len);
101 case CEPH_STR_HASH_RJENKINS:
102 return ceph_str_hash_rjenkins(s, len);
103 default:
104 return -1;
105 }
106}
107
108const char *ceph_str_hash_name(int type)
109{
110 switch (type) {
111 case CEPH_STR_HASH_LINUX:
112 return "linux";
113 case CEPH_STR_HASH_RJENKINS:
114 return "rjenkins";
115 default:
116 return "unknown";
117 }
118}
diff --git a/fs/ceph/ceph_hash.h b/fs/ceph/ceph_hash.h
new file mode 100644
index 000000000000..5ac470c433c9
--- /dev/null
+++ b/fs/ceph/ceph_hash.h
@@ -0,0 +1,13 @@
1#ifndef _FS_CEPH_HASH_H
2#define _FS_CEPH_HASH_H
3
4#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */
5#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */
6
7extern unsigned ceph_str_hash_linux(const char *s, unsigned len);
8extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len);
9
10extern unsigned ceph_str_hash(int type, const char *s, unsigned len);
11extern const char *ceph_str_hash_name(int type);
12
13#endif
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index a025555b70af..68478270ad70 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -809,7 +809,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
809 return -EIO; 809 return -EIO;
810 810
811 pool = &osdmap->pg_pool[poolid]; 811 pool = &osdmap->pg_pool[poolid];
812 ps = ceph_full_name_hash(oid, strlen(oid)); 812 ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
813 if (preferred >= 0) { 813 if (preferred >= 0) {
814 ps += preferred; 814 ps += preferred;
815 num = le32_to_cpu(pool->v.lpg_num); 815 num = le32_to_cpu(pool->v.lpg_num);
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index 85bdef78d142..fb23ff9297c9 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -84,6 +84,7 @@ struct ceph_pg_pool {
84 __u8 type; /* CEPH_PG_TYPE_* */ 84 __u8 type; /* CEPH_PG_TYPE_* */
85 __u8 size; /* number of osds in each pg */ 85 __u8 size; /* number of osds in each pg */
86 __u8 crush_ruleset; /* crush placement rule */ 86 __u8 crush_ruleset; /* crush placement rule */
87 __u8 object_hash; /* hash mapping object name to ps */
87 __le32 pg_num, pgp_num; /* number of pg's */ 88 __le32 pg_num, pgp_num; /* number of pg's */
88 __le32 lpg_num, lpgp_num; /* number of localized pg's */ 89 __le32 lpg_num, lpgp_num; /* number of localized pg's */
89 __le32 last_change; /* most recent epoch changed */ 90 __le32 last_change; /* most recent epoch changed */
diff --git a/fs/ceph/types.h b/fs/ceph/types.h
index 8a514568cab2..28b35a005ec2 100644
--- a/fs/ceph/types.h
+++ b/fs/ceph/types.h
@@ -9,6 +9,7 @@
9 9
10#include "ceph_fs.h" 10#include "ceph_fs.h"
11#include "ceph_frag.h" 11#include "ceph_frag.h"
12#include "ceph_hash.h"
12 13
13/* 14/*
14 * Identify inodes by both their ino AND snapshot id (a u64). 15 * Identify inodes by both their ino AND snapshot id (a u64).