diff options
Diffstat (limited to 'include/linux/ceph/rados.h')
-rw-r--r-- | include/linux/ceph/rados.h | 158 |
1 files changed, 75 insertions, 83 deletions
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2c04afeead1c..68c96a508ac2 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h | |||
@@ -9,14 +9,6 @@ | |||
9 | #include <linux/ceph/msgr.h> | 9 | #include <linux/ceph/msgr.h> |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * osdmap encoding versions | ||
13 | */ | ||
14 | #define CEPH_OSDMAP_INC_VERSION 5 | ||
15 | #define CEPH_OSDMAP_INC_VERSION_EXT 6 | ||
16 | #define CEPH_OSDMAP_VERSION 5 | ||
17 | #define CEPH_OSDMAP_VERSION_EXT 6 | ||
18 | |||
19 | /* | ||
20 | * fs id | 12 | * fs id |
21 | */ | 13 | */ |
22 | struct ceph_fsid { | 14 | struct ceph_fsid { |
@@ -64,7 +56,7 @@ struct ceph_timespec { | |||
64 | * placement group. | 56 | * placement group. |
65 | * we encode this into one __le64. | 57 | * we encode this into one __le64. |
66 | */ | 58 | */ |
67 | struct ceph_pg { | 59 | struct ceph_pg_v1 { |
68 | __le16 preferred; /* preferred primary osd */ | 60 | __le16 preferred; /* preferred primary osd */ |
69 | __le16 ps; /* placement seed */ | 61 | __le16 ps; /* placement seed */ |
70 | __le32 pool; /* object pool */ | 62 | __le32 pool; /* object pool */ |
@@ -91,21 +83,6 @@ struct ceph_pg { | |||
91 | 83 | ||
92 | #define CEPH_PG_TYPE_REP 1 | 84 | #define CEPH_PG_TYPE_REP 1 |
93 | #define CEPH_PG_TYPE_RAID4 2 | 85 | #define CEPH_PG_TYPE_RAID4 2 |
94 | #define CEPH_PG_POOL_VERSION 2 | ||
95 | struct ceph_pg_pool { | ||
96 | __u8 type; /* CEPH_PG_TYPE_* */ | ||
97 | __u8 size; /* number of osds in each pg */ | ||
98 | __u8 crush_ruleset; /* crush placement rule */ | ||
99 | __u8 object_hash; /* hash mapping object name to ps */ | ||
100 | __le32 pg_num, pgp_num; /* number of pg's */ | ||
101 | __le32 lpg_num, lpgp_num; /* number of localized pg's */ | ||
102 | __le32 last_change; /* most recent epoch changed */ | ||
103 | __le64 snap_seq; /* seq for per-pool snapshot */ | ||
104 | __le32 snap_epoch; /* epoch of last snap */ | ||
105 | __le32 num_snaps; | ||
106 | __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ | ||
107 | __le64 auid; /* who owns the pg */ | ||
108 | } __attribute__ ((packed)); | ||
109 | 86 | ||
110 | /* | 87 | /* |
111 | * stable_mod func is used to control number of placement groups. | 88 | * stable_mod func is used to control number of placement groups. |
@@ -128,7 +105,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask) | |||
128 | * object layout - how a given object should be stored. | 105 | * object layout - how a given object should be stored. |
129 | */ | 106 | */ |
130 | struct ceph_object_layout { | 107 | struct ceph_object_layout { |
131 | struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */ | 108 | struct ceph_pg_v1 ol_pgid; /* raw pg, with _full_ ps precision. */ |
132 | __le32 ol_stripe_unit; /* for per-object parity, if any */ | 109 | __le32 ol_stripe_unit; /* for per-object parity, if any */ |
133 | } __attribute__ ((packed)); | 110 | } __attribute__ ((packed)); |
134 | 111 | ||
@@ -145,8 +122,12 @@ struct ceph_eversion { | |||
145 | */ | 122 | */ |
146 | 123 | ||
147 | /* status bits */ | 124 | /* status bits */ |
148 | #define CEPH_OSD_EXISTS 1 | 125 | #define CEPH_OSD_EXISTS (1<<0) |
149 | #define CEPH_OSD_UP 2 | 126 | #define CEPH_OSD_UP (1<<1) |
127 | #define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ | ||
128 | #define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ | ||
129 | |||
130 | extern const char *ceph_osd_state_name(int s); | ||
150 | 131 | ||
151 | /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ | 132 | /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ |
152 | #define CEPH_OSD_IN 0x10000 | 133 | #define CEPH_OSD_IN 0x10000 |
@@ -161,9 +142,25 @@ struct ceph_eversion { | |||
161 | #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ | 142 | #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ |
162 | #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ | 143 | #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ |
163 | #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ | 144 | #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ |
145 | #define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */ | ||
146 | #define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */ | ||
147 | #define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */ | ||
148 | #define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ | ||
149 | #define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ | ||
150 | #define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ | ||
151 | |||
152 | /* | ||
153 | * The error code to return when an OSD can't handle a write | ||
154 | * because it is too large. | ||
155 | */ | ||
156 | #define OSD_WRITETOOBIG EMSGSIZE | ||
164 | 157 | ||
165 | /* | 158 | /* |
166 | * osd ops | 159 | * osd ops |
160 | * | ||
161 | * WARNING: do not use these op codes directly. Use the helpers | ||
162 | * defined below instead. In certain cases, op code behavior was | ||
163 | * redefined, resulting in special-cases in the helpers. | ||
167 | */ | 164 | */ |
168 | #define CEPH_OSD_OP_MODE 0xf000 | 165 | #define CEPH_OSD_OP_MODE 0xf000 |
169 | #define CEPH_OSD_OP_MODE_RD 0x1000 | 166 | #define CEPH_OSD_OP_MODE_RD 0x1000 |
@@ -177,6 +174,7 @@ struct ceph_eversion { | |||
177 | #define CEPH_OSD_OP_TYPE_ATTR 0x0300 | 174 | #define CEPH_OSD_OP_TYPE_ATTR 0x0300 |
178 | #define CEPH_OSD_OP_TYPE_EXEC 0x0400 | 175 | #define CEPH_OSD_OP_TYPE_EXEC 0x0400 |
179 | #define CEPH_OSD_OP_TYPE_PG 0x0500 | 176 | #define CEPH_OSD_OP_TYPE_PG 0x0500 |
177 | #define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */ | ||
180 | 178 | ||
181 | enum { | 179 | enum { |
182 | /** data **/ | 180 | /** data **/ |
@@ -217,6 +215,23 @@ enum { | |||
217 | 215 | ||
218 | CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, | 216 | CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, |
219 | 217 | ||
218 | /* omap */ | ||
219 | CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17, | ||
220 | CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18, | ||
221 | CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19, | ||
222 | CEPH_OSD_OP_OMAPGETVALSBYKEYS = | ||
223 | CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20, | ||
224 | CEPH_OSD_OP_OMAPSETVALS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21, | ||
225 | CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22, | ||
226 | CEPH_OSD_OP_OMAPCLEAR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23, | ||
227 | CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, | ||
228 | CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, | ||
229 | |||
230 | /** multi **/ | ||
231 | CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, | ||
232 | CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, | ||
233 | CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3, | ||
234 | |||
220 | /** attrs **/ | 235 | /** attrs **/ |
221 | /* read */ | 236 | /* read */ |
222 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, | 237 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, |
@@ -238,6 +253,7 @@ enum { | |||
238 | CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, | 253 | CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, |
239 | CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, | 254 | CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, |
240 | CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, | 255 | CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, |
256 | CEPH_OSD_OP_SCRUB_MAP = CEPH_OSD_OP_MODE_SUB | 9, | ||
241 | 257 | ||
242 | /** lock **/ | 258 | /** lock **/ |
243 | CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, | 259 | CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, |
@@ -248,10 +264,12 @@ enum { | |||
248 | CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, | 264 | CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, |
249 | 265 | ||
250 | /** exec **/ | 266 | /** exec **/ |
267 | /* note: the RD bit here is wrong; see special-case below in helper */ | ||
251 | CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, | 268 | CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, |
252 | 269 | ||
253 | /** pg **/ | 270 | /** pg **/ |
254 | CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, | 271 | CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, |
272 | CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2, | ||
255 | }; | 273 | }; |
256 | 274 | ||
257 | static inline int ceph_osd_op_type_lock(int op) | 275 | static inline int ceph_osd_op_type_lock(int op) |
@@ -274,6 +292,10 @@ static inline int ceph_osd_op_type_pg(int op) | |||
274 | { | 292 | { |
275 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; | 293 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; |
276 | } | 294 | } |
295 | static inline int ceph_osd_op_type_multi(int op) | ||
296 | { | ||
297 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI; | ||
298 | } | ||
277 | 299 | ||
278 | static inline int ceph_osd_op_mode_subop(int op) | 300 | static inline int ceph_osd_op_mode_subop(int op) |
279 | { | 301 | { |
@@ -281,11 +303,12 @@ static inline int ceph_osd_op_mode_subop(int op) | |||
281 | } | 303 | } |
282 | static inline int ceph_osd_op_mode_read(int op) | 304 | static inline int ceph_osd_op_mode_read(int op) |
283 | { | 305 | { |
284 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; | 306 | return (op & CEPH_OSD_OP_MODE_RD) && |
307 | op != CEPH_OSD_OP_CALL; | ||
285 | } | 308 | } |
286 | static inline int ceph_osd_op_mode_modify(int op) | 309 | static inline int ceph_osd_op_mode_modify(int op) |
287 | { | 310 | { |
288 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; | 311 | return op & CEPH_OSD_OP_MODE_WR; |
289 | } | 312 | } |
290 | 313 | ||
291 | /* | 314 | /* |
@@ -294,34 +317,38 @@ static inline int ceph_osd_op_mode_modify(int op) | |||
294 | */ | 317 | */ |
295 | #define CEPH_OSD_TMAP_HDR 'h' | 318 | #define CEPH_OSD_TMAP_HDR 'h' |
296 | #define CEPH_OSD_TMAP_SET 's' | 319 | #define CEPH_OSD_TMAP_SET 's' |
320 | #define CEPH_OSD_TMAP_CREATE 'c' /* create key */ | ||
297 | #define CEPH_OSD_TMAP_RM 'r' | 321 | #define CEPH_OSD_TMAP_RM 'r' |
322 | #define CEPH_OSD_TMAP_RMSLOPPY 'R' | ||
298 | 323 | ||
299 | extern const char *ceph_osd_op_name(int op); | 324 | extern const char *ceph_osd_op_name(int op); |
300 | 325 | ||
301 | |||
302 | /* | 326 | /* |
303 | * osd op flags | 327 | * osd op flags |
304 | * | 328 | * |
305 | * An op may be READ, WRITE, or READ|WRITE. | 329 | * An op may be READ, WRITE, or READ|WRITE. |
306 | */ | 330 | */ |
307 | enum { | 331 | enum { |
308 | CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ | 332 | CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */ |
309 | CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ | 333 | CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */ |
310 | CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ | 334 | CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */ |
311 | CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ | 335 | CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */ |
312 | CEPH_OSD_FLAG_READ = 16, /* op may read */ | 336 | CEPH_OSD_FLAG_READ = 0x0010, /* op may read */ |
313 | CEPH_OSD_FLAG_WRITE = 32, /* op may write */ | 337 | CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */ |
314 | CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ | 338 | CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */ |
315 | CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ | 339 | CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */ |
316 | CEPH_OSD_FLAG_BALANCE_READS = 256, | 340 | CEPH_OSD_FLAG_BALANCE_READS = 0x0100, |
317 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ | 341 | CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */ |
318 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ | 342 | CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */ |
319 | CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ | 343 | CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */ |
320 | CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ | 344 | CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */ |
345 | CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */ | ||
346 | CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */ | ||
321 | }; | 347 | }; |
322 | 348 | ||
323 | enum { | 349 | enum { |
324 | CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ | 350 | CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ |
351 | CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */ | ||
325 | }; | 352 | }; |
326 | 353 | ||
327 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ | 354 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ |
@@ -381,48 +408,13 @@ struct ceph_osd_op { | |||
381 | __le64 ver; | 408 | __le64 ver; |
382 | __u8 flag; /* 0 = unwatch, 1 = watch */ | 409 | __u8 flag; /* 0 = unwatch, 1 = watch */ |
383 | } __attribute__ ((packed)) watch; | 410 | } __attribute__ ((packed)) watch; |
384 | }; | 411 | struct { |
412 | __le64 offset, length; | ||
413 | __le64 src_offset; | ||
414 | } __attribute__ ((packed)) clonerange; | ||
415 | }; | ||
385 | __le32 payload_len; | 416 | __le32 payload_len; |
386 | } __attribute__ ((packed)); | 417 | } __attribute__ ((packed)); |
387 | 418 | ||
388 | /* | ||
389 | * osd request message header. each request may include multiple | ||
390 | * ceph_osd_op object operations. | ||
391 | */ | ||
392 | struct ceph_osd_request_head { | ||
393 | __le32 client_inc; /* client incarnation */ | ||
394 | struct ceph_object_layout layout; /* pgid */ | ||
395 | __le32 osdmap_epoch; /* client's osdmap epoch */ | ||
396 | |||
397 | __le32 flags; | ||
398 | |||
399 | struct ceph_timespec mtime; /* for mutations only */ | ||
400 | struct ceph_eversion reassert_version; /* if we are replaying op */ | ||
401 | |||
402 | __le32 object_len; /* length of object name */ | ||
403 | |||
404 | __le64 snapid; /* snapid to read */ | ||
405 | __le64 snap_seq; /* writer's snap context */ | ||
406 | __le32 num_snaps; | ||
407 | |||
408 | __le16 num_ops; | ||
409 | struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */ | ||
410 | } __attribute__ ((packed)); | ||
411 | |||
412 | struct ceph_osd_reply_head { | ||
413 | __le32 client_inc; /* client incarnation */ | ||
414 | __le32 flags; | ||
415 | struct ceph_object_layout layout; | ||
416 | __le32 osdmap_epoch; | ||
417 | struct ceph_eversion reassert_version; /* for replaying uncommitted */ | ||
418 | |||
419 | __le32 result; /* result code */ | ||
420 | |||
421 | __le32 object_len; /* length of object name */ | ||
422 | __le32 num_ops; | ||
423 | struct ceph_osd_op ops[0]; /* ops[], object */ | ||
424 | } __attribute__ ((packed)); | ||
425 | |||
426 | |||
427 | 419 | ||
428 | #endif | 420 | #endif |