diff options
Diffstat (limited to 'fs/ceph/crush/crush.h')
-rw-r--r-- | fs/ceph/crush/crush.h | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/fs/ceph/crush/crush.h b/fs/ceph/crush/crush.h new file mode 100644 index 000000000000..dcd7e7523700 --- /dev/null +++ b/fs/ceph/crush/crush.h | |||
@@ -0,0 +1,180 @@ | |||
1 | #ifndef _CRUSH_CRUSH_H | ||
2 | #define _CRUSH_CRUSH_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | |||
6 | /* | ||
7 | * CRUSH is a pseudo-random data distribution algorithm that | ||
8 | * efficiently distributes input values (typically, data objects) | ||
9 | * across a heterogeneous, structured storage cluster. | ||
10 | * | ||
11 | * The algorithm was originally described in detail in this paper | ||
12 | * (although the algorithm has evolved somewhat since then): | ||
13 | * | ||
14 | * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf | ||
15 | * | ||
16 | * LGPL2 | ||
17 | */ | ||
18 | |||
19 | |||
20 | #define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ | ||
21 | |||
22 | |||
23 | #define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ | ||
24 | #define CRUSH_MAX_SET 10 /* max size of a mapping result */ | ||
25 | |||
26 | |||
27 | /* | ||
28 | * CRUSH uses user-defined "rules" to describe how inputs should be | ||
29 | * mapped to devices. A rule consists of sequence of steps to perform | ||
30 | * to generate the set of output devices. | ||
31 | */ | ||
32 | struct crush_rule_step { | ||
33 | __u32 op; | ||
34 | __s32 arg1; | ||
35 | __s32 arg2; | ||
36 | }; | ||
37 | |||
38 | /* step op codes */ | ||
39 | enum { | ||
40 | CRUSH_RULE_NOOP = 0, | ||
41 | CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */ | ||
42 | CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */ | ||
43 | /* arg2 = type */ | ||
44 | CRUSH_RULE_CHOOSE_INDEP = 3, /* same */ | ||
45 | CRUSH_RULE_EMIT = 4, /* no args */ | ||
46 | CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6, | ||
47 | CRUSH_RULE_CHOOSE_LEAF_INDEP = 7, | ||
48 | }; | ||
49 | |||
50 | /* | ||
51 | * for specifying choose num (arg1) relative to the max parameter | ||
52 | * passed to do_rule | ||
53 | */ | ||
54 | #define CRUSH_CHOOSE_N 0 | ||
55 | #define CRUSH_CHOOSE_N_MINUS(x) (-(x)) | ||
56 | |||
57 | /* | ||
58 | * The rule mask is used to describe what the rule is intended for. | ||
59 | * Given a ruleset and size of output set, we search through the | ||
60 | * rule list for a matching rule_mask. | ||
61 | */ | ||
62 | struct crush_rule_mask { | ||
63 | __u8 ruleset; | ||
64 | __u8 type; | ||
65 | __u8 min_size; | ||
66 | __u8 max_size; | ||
67 | }; | ||
68 | |||
69 | struct crush_rule { | ||
70 | __u32 len; | ||
71 | struct crush_rule_mask mask; | ||
72 | struct crush_rule_step steps[0]; | ||
73 | }; | ||
74 | |||
75 | #define crush_rule_size(len) (sizeof(struct crush_rule) + \ | ||
76 | (len)*sizeof(struct crush_rule_step)) | ||
77 | |||
78 | |||
79 | |||
80 | /* | ||
81 | * A bucket is a named container of other items (either devices or | ||
82 | * other buckets). Items within a bucket are chosen using one of a | ||
83 | * few different algorithms. The table summarizes how the speed of | ||
84 | * each option measures up against mapping stability when items are | ||
85 | * added or removed. | ||
86 | * | ||
87 | * Bucket Alg Speed Additions Removals | ||
88 | * ------------------------------------------------ | ||
89 | * uniform O(1) poor poor | ||
90 | * list O(n) optimal poor | ||
91 | * tree O(log n) good good | ||
92 | * straw O(n) optimal optimal | ||
93 | */ | ||
94 | enum { | ||
95 | CRUSH_BUCKET_UNIFORM = 1, | ||
96 | CRUSH_BUCKET_LIST = 2, | ||
97 | CRUSH_BUCKET_TREE = 3, | ||
98 | CRUSH_BUCKET_STRAW = 4 | ||
99 | }; | ||
100 | extern const char *crush_bucket_alg_name(int alg); | ||
101 | |||
102 | struct crush_bucket { | ||
103 | __s32 id; /* this'll be negative */ | ||
104 | __u16 type; /* non-zero; type=0 is reserved for devices */ | ||
105 | __u8 alg; /* one of CRUSH_BUCKET_* */ | ||
106 | __u8 hash; /* which hash function to use, CRUSH_HASH_* */ | ||
107 | __u32 weight; /* 16-bit fixed point */ | ||
108 | __u32 size; /* num items */ | ||
109 | __s32 *items; | ||
110 | |||
111 | /* | ||
112 | * cached random permutation: used for uniform bucket and for | ||
113 | * the linear search fallback for the other bucket types. | ||
114 | */ | ||
115 | __u32 perm_x; /* @x for which *perm is defined */ | ||
116 | __u32 perm_n; /* num elements of *perm that are permuted/defined */ | ||
117 | __u32 *perm; | ||
118 | }; | ||
119 | |||
120 | struct crush_bucket_uniform { | ||
121 | struct crush_bucket h; | ||
122 | __u32 item_weight; /* 16-bit fixed point; all items equally weighted */ | ||
123 | }; | ||
124 | |||
125 | struct crush_bucket_list { | ||
126 | struct crush_bucket h; | ||
127 | __u32 *item_weights; /* 16-bit fixed point */ | ||
128 | __u32 *sum_weights; /* 16-bit fixed point. element i is sum | ||
129 | of weights 0..i, inclusive */ | ||
130 | }; | ||
131 | |||
132 | struct crush_bucket_tree { | ||
133 | struct crush_bucket h; /* note: h.size is _tree_ size, not number of | ||
134 | actual items */ | ||
135 | __u8 num_nodes; | ||
136 | __u32 *node_weights; | ||
137 | }; | ||
138 | |||
139 | struct crush_bucket_straw { | ||
140 | struct crush_bucket h; | ||
141 | __u32 *item_weights; /* 16-bit fixed point */ | ||
142 | __u32 *straws; /* 16-bit fixed point */ | ||
143 | }; | ||
144 | |||
145 | |||
146 | |||
147 | /* | ||
148 | * CRUSH map includes all buckets, rules, etc. | ||
149 | */ | ||
150 | struct crush_map { | ||
151 | struct crush_bucket **buckets; | ||
152 | struct crush_rule **rules; | ||
153 | |||
154 | /* | ||
155 | * Parent pointers to identify the parent bucket a device or | ||
156 | * bucket in the hierarchy. If an item appears more than | ||
157 | * once, this is the _last_ time it appeared (where buckets | ||
158 | * are processed in bucket id order, from -1 on down to | ||
159 | * -max_buckets. | ||
160 | */ | ||
161 | __u32 *bucket_parents; | ||
162 | __u32 *device_parents; | ||
163 | |||
164 | __s32 max_buckets; | ||
165 | __u32 max_rules; | ||
166 | __s32 max_devices; | ||
167 | }; | ||
168 | |||
169 | |||
170 | /* crush.c */ | ||
171 | extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos); | ||
172 | extern void crush_calc_parents(struct crush_map *map); | ||
173 | extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b); | ||
174 | extern void crush_destroy_bucket_list(struct crush_bucket_list *b); | ||
175 | extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b); | ||
176 | extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b); | ||
177 | extern void crush_destroy_bucket(struct crush_bucket *b); | ||
178 | extern void crush_destroy(struct crush_map *map); | ||
179 | |||
180 | #endif | ||