aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/crush/crush.h
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/crush/crush.h')
-rw-r--r--fs/ceph/crush/crush.h188
1 files changed, 188 insertions, 0 deletions
diff --git a/fs/ceph/crush/crush.h b/fs/ceph/crush/crush.h
new file mode 100644
index 000000000000..9ac7e091126f
--- /dev/null
+++ b/fs/ceph/crush/crush.h
@@ -0,0 +1,188 @@
1#ifndef _CRUSH_CRUSH_H
2#define _CRUSH_CRUSH_H
3
4#include <linux/types.h>
5
6/*
7 * CRUSH is a pseudo-random data distribution algorithm that
8 * efficiently distributes input values (typically, data objects)
9 * across a heterogeneous, structured storage cluster.
10 *
11 * The algorithm was originally described in detail in this paper
12 * (although the algorithm has evolved somewhat since then):
13 *
14 * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf
15 *
16 * LGPL2
17 */
18
19
20#define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */
21
22
23#define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */
24#define CRUSH_MAX_SET 10 /* max size of a mapping result */
25
26
27/*
28 * CRUSH uses user-defined "rules" to describe how inputs should be
29 * mapped to devices. A rule consists of sequence of steps to perform
30 * to generate the set of output devices.
31 */
32struct crush_rule_step {
33 __u32 op;
34 __s32 arg1;
35 __s32 arg2;
36};
37
38/* step op codes */
39enum {
40 CRUSH_RULE_NOOP = 0,
41 CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */
42 CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */
43 /* arg2 = type */
44 CRUSH_RULE_CHOOSE_INDEP = 3, /* same */
45 CRUSH_RULE_EMIT = 4, /* no args */
46 CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6,
47 CRUSH_RULE_CHOOSE_LEAF_INDEP = 7,
48};
49
50/*
51 * for specifying choose num (arg1) relative to the max parameter
52 * passed to do_rule
53 */
54#define CRUSH_CHOOSE_N 0
55#define CRUSH_CHOOSE_N_MINUS(x) (-(x))
56
57/*
58 * The rule mask is used to describe what the rule is intended for.
59 * Given a ruleset and size of output set, we search through the
60 * rule list for a matching rule_mask.
61 */
62struct crush_rule_mask {
63 __u8 ruleset;
64 __u8 type;
65 __u8 min_size;
66 __u8 max_size;
67};
68
69struct crush_rule {
70 __u32 len;
71 struct crush_rule_mask mask;
72 struct crush_rule_step steps[0];
73};
74
75#define crush_rule_size(len) (sizeof(struct crush_rule) + \
76 (len)*sizeof(struct crush_rule_step))
77
78
79
80/*
81 * A bucket is a named container of other items (either devices or
82 * other buckets). Items within a bucket are chosen using one of a
83 * few different algorithms. The table summarizes how the speed of
84 * each option measures up against mapping stability when items are
85 * added or removed.
86 *
87 * Bucket Alg Speed Additions Removals
88 * ------------------------------------------------
89 * uniform O(1) poor poor
90 * list O(n) optimal poor
91 * tree O(log n) good good
92 * straw O(n) optimal optimal
93 */
94enum {
95 CRUSH_BUCKET_UNIFORM = 1,
96 CRUSH_BUCKET_LIST = 2,
97 CRUSH_BUCKET_TREE = 3,
98 CRUSH_BUCKET_STRAW = 4
99};
100static inline const char *crush_bucket_alg_name(int alg)
101{
102 switch (alg) {
103 case CRUSH_BUCKET_UNIFORM: return "uniform";
104 case CRUSH_BUCKET_LIST: return "list";
105 case CRUSH_BUCKET_TREE: return "tree";
106 case CRUSH_BUCKET_STRAW: return "straw";
107 default: return "unknown";
108 }
109}
110
111struct crush_bucket {
112 __s32 id; /* this'll be negative */
113 __u16 type; /* non-zero; type=0 is reserved for devices */
114 __u16 alg; /* one of CRUSH_BUCKET_* */
115 __u32 weight; /* 16-bit fixed point */
116 __u32 size; /* num items */
117 __s32 *items;
118
119 /*
120 * cached random permutation: used for uniform bucket and for
121 * the linear search fallback for the other bucket types.
122 */
123 __u32 perm_x; /* @x for which *perm is defined */
124 __u32 perm_n; /* num elements of *perm that are permuted/defined */
125 __u32 *perm;
126};
127
128struct crush_bucket_uniform {
129 struct crush_bucket h;
130 __u32 item_weight; /* 16-bit fixed point; all items equally weighted */
131};
132
133struct crush_bucket_list {
134 struct crush_bucket h;
135 __u32 *item_weights; /* 16-bit fixed point */
136 __u32 *sum_weights; /* 16-bit fixed point. element i is sum
137 of weights 0..i, inclusive */
138};
139
140struct crush_bucket_tree {
141 struct crush_bucket h; /* note: h.size is _tree_ size, not number of
142 actual items */
143 __u8 num_nodes;
144 __u32 *node_weights;
145};
146
147struct crush_bucket_straw {
148 struct crush_bucket h;
149 __u32 *item_weights; /* 16-bit fixed point */
150 __u32 *straws; /* 16-bit fixed point */
151};
152
153
154
155/*
156 * CRUSH map includes all buckets, rules, etc.
157 */
158struct crush_map {
159 struct crush_bucket **buckets;
160 struct crush_rule **rules;
161
162 /*
163 * Parent pointers to identify the parent bucket a device or
164 * bucket in the hierarchy. If an item appears more than
165 * once, this is the _last_ time it appeared (where buckets
166 * are processed in bucket id order, from -1 on down to
167 * -max_buckets.
168 */
169 __u32 *bucket_parents;
170 __u32 *device_parents;
171
172 __s32 max_buckets;
173 __u32 max_rules;
174 __s32 max_devices;
175};
176
177
178/* crush.c */
179extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
180extern void crush_calc_parents(struct crush_map *map);
181extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
182extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
183extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
184extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
185extern void crush_destroy_bucket(struct crush_bucket *b);
186extern void crush_destroy(struct crush_map *map);
187
188#endif