aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/ceph')
-rw-r--r--include/linux/ceph/ceph_features.h38
-rw-r--r--include/linux/ceph/ceph_fs.h32
-rw-r--r--include/linux/ceph/decode.h29
-rw-r--r--include/linux/ceph/libceph.h16
-rw-r--r--include/linux/ceph/mdsmap.h4
-rw-r--r--include/linux/ceph/messenger.h2
-rw-r--r--include/linux/ceph/osd_client.h74
-rw-r--r--include/linux/ceph/osdmap.h30
-rw-r--r--include/linux/ceph/rados.h158
9 files changed, 225 insertions, 158 deletions
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index dad579b0c0e6..76554cecaab2 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -12,16 +12,46 @@
12#define CEPH_FEATURE_MONNAMES (1<<5) 12#define CEPH_FEATURE_MONNAMES (1<<5)
13#define CEPH_FEATURE_RECONNECT_SEQ (1<<6) 13#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
14#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) 14#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
15/* bits 8-17 defined by user-space; not supported yet here */ 15#define CEPH_FEATURE_OBJECTLOCATOR (1<<8)
16#define CEPH_FEATURE_PGID64 (1<<9)
17#define CEPH_FEATURE_INCSUBOSDMAP (1<<10)
18#define CEPH_FEATURE_PGPOOL3 (1<<11)
19#define CEPH_FEATURE_OSDREPLYMUX (1<<12)
20#define CEPH_FEATURE_OSDENC (1<<13)
21#define CEPH_FEATURE_OMAP (1<<14)
22#define CEPH_FEATURE_MONENC (1<<15)
23#define CEPH_FEATURE_QUERY_T (1<<16)
24#define CEPH_FEATURE_INDEP_PG_MAP (1<<17)
16#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) 25#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
26#define CEPH_FEATURE_CHUNKY_SCRUB (1<<19)
27#define CEPH_FEATURE_MON_NULLROUTE (1<<20)
28#define CEPH_FEATURE_MON_GV (1<<21)
29#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22)
30#define CEPH_FEATURE_MSG_AUTH (1<<23)
31#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24)
32#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
33#define CEPH_FEATURE_CREATEPOOLID (1<<26)
34#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
35#define CEPH_FEATURE_OSD_HBMSGS (1<<28)
36#define CEPH_FEATURE_MDSENC (1<<29)
37#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30)
17 38
18/* 39/*
19 * Features supported. 40 * Features supported.
20 */ 41 */
21#define CEPH_FEATURES_SUPPORTED_DEFAULT \ 42#define CEPH_FEATURES_SUPPORTED_DEFAULT \
22 (CEPH_FEATURE_NOSRCADDR | \ 43 (CEPH_FEATURE_NOSRCADDR | \
23 CEPH_FEATURE_CRUSH_TUNABLES) 44 CEPH_FEATURE_PGID64 | \
45 CEPH_FEATURE_PGPOOL3 | \
46 CEPH_FEATURE_OSDENC | \
47 CEPH_FEATURE_CRUSH_TUNABLES | \
48 CEPH_FEATURE_CRUSH_TUNABLES2 | \
49 CEPH_FEATURE_REPLY_CREATE_INODE | \
50 CEPH_FEATURE_OSDHASHPSPOOL)
24 51
25#define CEPH_FEATURES_REQUIRED_DEFAULT \ 52#define CEPH_FEATURES_REQUIRED_DEFAULT \
26 (CEPH_FEATURE_NOSRCADDR) 53 (CEPH_FEATURE_NOSRCADDR | \
54 CEPH_FEATURE_PGID64 | \
55 CEPH_FEATURE_PGPOOL3 | \
56 CEPH_FEATURE_OSDENC)
27#endif 57#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index cf6f4d998a76..2ad7b860f062 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -21,16 +21,14 @@
21 * internal cluster protocols separately from the public, 21 * internal cluster protocols separately from the public,
22 * client-facing protocol. 22 * client-facing protocol.
23 */ 23 */
24#define CEPH_OSD_PROTOCOL 8 /* cluster internal */
25#define CEPH_MDS_PROTOCOL 12 /* cluster internal */
26#define CEPH_MON_PROTOCOL 5 /* cluster internal */
27#define CEPH_OSDC_PROTOCOL 24 /* server/client */ 24#define CEPH_OSDC_PROTOCOL 24 /* server/client */
28#define CEPH_MDSC_PROTOCOL 32 /* server/client */ 25#define CEPH_MDSC_PROTOCOL 32 /* server/client */
29#define CEPH_MONC_PROTOCOL 15 /* server/client */ 26#define CEPH_MONC_PROTOCOL 15 /* server/client */
30 27
31 28
32#define CEPH_INO_ROOT 1 29#define CEPH_INO_ROOT 1
33#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ 30#define CEPH_INO_CEPH 2 /* hidden .ceph dir */
31#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */
34 32
35/* arbitrary limit on max # of monitors (cluster of 3 is typical) */ 33/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
36#define CEPH_MAX_MON 31 34#define CEPH_MAX_MON 31
@@ -51,7 +49,7 @@ struct ceph_file_layout {
51 __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ 49 __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */
52 50
53 /* object -> pg layout */ 51 /* object -> pg layout */
54 __le32 fl_unused; /* unused; used to be preferred primary (-1) */ 52 __le32 fl_unused; /* unused; used to be preferred primary for pg (-1 for none) */
55 __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ 53 __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
56} __attribute__ ((packed)); 54} __attribute__ ((packed));
57 55
@@ -101,6 +99,8 @@ struct ceph_dir_layout {
101#define CEPH_MSG_MON_SUBSCRIBE_ACK 16 99#define CEPH_MSG_MON_SUBSCRIBE_ACK 16
102#define CEPH_MSG_AUTH 17 100#define CEPH_MSG_AUTH 17
103#define CEPH_MSG_AUTH_REPLY 18 101#define CEPH_MSG_AUTH_REPLY 18
102#define CEPH_MSG_MON_GET_VERSION 19
103#define CEPH_MSG_MON_GET_VERSION_REPLY 20
104 104
105/* client <-> mds */ 105/* client <-> mds */
106#define CEPH_MSG_MDS_MAP 21 106#define CEPH_MSG_MDS_MAP 21
@@ -221,6 +221,11 @@ struct ceph_mon_subscribe_ack {
221} __attribute__ ((packed)); 221} __attribute__ ((packed));
222 222
223/* 223/*
224 * mdsmap flags
225 */
226#define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */
227
228/*
224 * mds states 229 * mds states
225 * > 0 -> in 230 * > 0 -> in
226 * <= 0 -> out 231 * <= 0 -> out
@@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack {
233#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ 238#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */
234#define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ 239#define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */
235#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ 240#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */
241#define CEPH_MDS_STATE_REPLAYONCE -9 /* up, replaying an active node's journal */
236 242
237#define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ 243#define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */
238#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed 244#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed
@@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s);
264#define CEPH_LOCK_IXATTR 2048 270#define CEPH_LOCK_IXATTR 2048
265#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ 271#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */
266#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ 272#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */
273#define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */
267 274
268/* client_session ops */ 275/* client_session ops */
269enum { 276enum {
@@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op);
338#define CEPH_SETATTR_SIZE 32 345#define CEPH_SETATTR_SIZE 32
339#define CEPH_SETATTR_CTIME 64 346#define CEPH_SETATTR_CTIME 64
340 347
348/*
349 * Ceph setxattr request flags.
350 */
351#define CEPH_XATTR_CREATE 1
352#define CEPH_XATTR_REPLACE 2
353
341union ceph_mds_request_args { 354union ceph_mds_request_args {
342 struct { 355 struct {
343 __le32 mask; /* CEPH_CAP_* */ 356 __le32 mask; /* CEPH_CAP_* */
@@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags);
522#define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ 535#define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */
523#define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ 536#define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */
524 537
538#define CEPH_CAP_SIMPLE_BITS 2
539#define CEPH_CAP_FILE_BITS 8
540
525/* per-lock shift */ 541/* per-lock shift */
526#define CEPH_CAP_SAUTH 2 542#define CEPH_CAP_SAUTH 2
527#define CEPH_CAP_SLINK 4 543#define CEPH_CAP_SLINK 4
528#define CEPH_CAP_SXATTR 6 544#define CEPH_CAP_SXATTR 6
529#define CEPH_CAP_SFILE 8 545#define CEPH_CAP_SFILE 8
530#define CEPH_CAP_SFLOCK 20 546#define CEPH_CAP_SFLOCK 20
531 547
532#define CEPH_CAP_BITS 22 548#define CEPH_CAP_BITS 22
533 549
534/* composed values */ 550/* composed values */
535#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) 551#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH)
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 63d092822bad..360d9d08ca9e 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
52 return end >= *p && n <= end - *p; 52 return end >= *p && n <= end - *p;
53} 53}
54 54
55#define ceph_decode_need(p, end, n, bad) \ 55#define ceph_decode_need(p, end, n, bad) \
56 do { \ 56 do { \
57 if (!likely(ceph_has_room(p, end, n))) \ 57 if (!likely(ceph_has_room(p, end, n))) \
58 goto bad; \ 58 goto bad; \
59 } while (0) 59 } while (0)
60 60
61#define ceph_decode_64_safe(p, end, v, bad) \ 61#define ceph_decode_64_safe(p, end, v, bad) \
@@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
99 * 99 *
100 * There are two possible failures: 100 * There are two possible failures:
101 * - converting the string would require accessing memory at or 101 * - converting the string would require accessing memory at or
102 * beyond the "end" pointer provided (-E 102 * beyond the "end" pointer provided (-ERANGE)
103 * - memory could not be allocated for the result 103 * - memory could not be allocated for the result (-ENOMEM)
104 */ 104 */
105static inline char *ceph_extract_encoded_string(void **p, void *end, 105static inline char *ceph_extract_encoded_string(void **p, void *end,
106 size_t *lenp, gfp_t gfp) 106 size_t *lenp, gfp_t gfp)
@@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end,
217 *p += len; 217 *p += len;
218} 218}
219 219
220#define ceph_encode_need(p, end, n, bad) \ 220#define ceph_encode_need(p, end, n, bad) \
221 do { \ 221 do { \
222 if (!likely(ceph_has_room(p, end, n))) \ 222 if (!likely(ceph_has_room(p, end, n))) \
223 goto bad; \ 223 goto bad; \
224 } while (0) 224 } while (0)
225 225
226#define ceph_encode_64_safe(p, end, v, bad) \ 226#define ceph_encode_64_safe(p, end, v, bad) \
@@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end,
231#define ceph_encode_32_safe(p, end, v, bad) \ 231#define ceph_encode_32_safe(p, end, v, bad) \
232 do { \ 232 do { \
233 ceph_encode_need(p, end, sizeof(u32), bad); \ 233 ceph_encode_need(p, end, sizeof(u32), bad); \
234 ceph_encode_32(p, v); \ 234 ceph_encode_32(p, v); \
235 } while (0) 235 } while (0)
236#define ceph_encode_16_safe(p, end, v, bad) \ 236#define ceph_encode_16_safe(p, end, v, bad) \
237 do { \ 237 do { \
238 ceph_encode_need(p, end, sizeof(u16), bad); \ 238 ceph_encode_need(p, end, sizeof(u16), bad); \
239 ceph_encode_16(p, v); \ 239 ceph_encode_16(p, v); \
240 } while (0)
241#define ceph_encode_8_safe(p, end, v, bad) \
242 do { \
243 ceph_encode_need(p, end, sizeof(u8), bad); \
244 ceph_encode_8(p, v); \
240 } while (0) 245 } while (0)
241 246
242#define ceph_encode_copy_safe(p, end, pv, n, bad) \ 247#define ceph_encode_copy_safe(p, end, pv, n, bad) \
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 084d3c622b12..29818fc3fa49 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len)
193} 193}
194 194
195/* ceph_common.c */ 195/* ceph_common.c */
196extern bool libceph_compatible(void *data);
197
196extern const char *ceph_msg_type_name(int type); 198extern const char *ceph_msg_type_name(int type);
197extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); 199extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
198extern struct kmem_cache *ceph_inode_cachep; 200extern struct kmem_cache *ceph_inode_cachep;
@@ -220,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client);
220/* pagevec.c */ 222/* pagevec.c */
221extern void ceph_release_page_vector(struct page **pages, int num_pages); 223extern void ceph_release_page_vector(struct page **pages, int num_pages);
222 224
223extern struct page **ceph_get_direct_page_vector(const char __user *data, 225extern struct page **ceph_get_direct_page_vector(const void __user *data,
224 int num_pages, 226 int num_pages,
225 bool write_page); 227 bool write_page);
226extern void ceph_put_page_vector(struct page **pages, int num_pages, 228extern void ceph_put_page_vector(struct page **pages, int num_pages,
@@ -228,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages,
228extern void ceph_release_page_vector(struct page **pages, int num_pages); 230extern void ceph_release_page_vector(struct page **pages, int num_pages);
229extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); 231extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
230extern int ceph_copy_user_to_page_vector(struct page **pages, 232extern int ceph_copy_user_to_page_vector(struct page **pages,
231 const char __user *data, 233 const void __user *data,
232 loff_t off, size_t len); 234 loff_t off, size_t len);
233extern int ceph_copy_to_page_vector(struct page **pages, 235extern void ceph_copy_to_page_vector(struct page **pages,
234 const char *data, 236 const void *data,
235 loff_t off, size_t len); 237 loff_t off, size_t len);
236extern int ceph_copy_from_page_vector(struct page **pages, 238extern void ceph_copy_from_page_vector(struct page **pages,
237 char *data, 239 void *data,
238 loff_t off, size_t len); 240 loff_t off, size_t len);
239extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, 241extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data,
240 loff_t off, size_t len); 242 loff_t off, size_t len);
241extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); 243extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
242 244
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index cb15b5d867c7..87ed09f54800 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -29,8 +29,8 @@ struct ceph_mdsmap {
29 29
30 /* which object pools file data can be stored in */ 30 /* which object pools file data can be stored in */
31 int m_num_data_pg_pools; 31 int m_num_data_pg_pools;
32 u32 *m_data_pg_pools; 32 u64 *m_data_pg_pools;
33 u32 m_cas_pg_pool; 33 u64 m_cas_pg_pool;
34}; 34};
35 35
36static inline struct ceph_entity_addr * 36static inline struct ceph_entity_addr *
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 14ba5ee738a9..60903e0f665c 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -83,9 +83,11 @@ struct ceph_msg {
83 struct list_head list_head; 83 struct list_head list_head;
84 84
85 struct kref kref; 85 struct kref kref;
86#ifdef CONFIG_BLOCK
86 struct bio *bio; /* instead of pages/pagelist */ 87 struct bio *bio; /* instead of pages/pagelist */
87 struct bio *bio_iter; /* bio iterator */ 88 struct bio *bio_iter; /* bio iterator */
88 int bio_seg; /* current bio segment */ 89 int bio_seg; /* current bio segment */
90#endif /* CONFIG_BLOCK */
89 struct ceph_pagelist *trail; /* the trailing part of the data */ 91 struct ceph_pagelist *trail; /* the trailing part of the data */
90 bool front_is_vmalloc; 92 bool front_is_vmalloc;
91 bool more_to_follow; 93 bool more_to_follow;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index d9b880e977e6..1dd5d466b6f9 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -10,6 +10,7 @@
10#include <linux/ceph/osdmap.h> 10#include <linux/ceph/osdmap.h>
11#include <linux/ceph/messenger.h> 11#include <linux/ceph/messenger.h>
12#include <linux/ceph/auth.h> 12#include <linux/ceph/auth.h>
13#include <linux/ceph/pagelist.h>
13 14
14/* 15/*
15 * Maximum object name size 16 * Maximum object name size
@@ -22,7 +23,6 @@ struct ceph_snap_context;
22struct ceph_osd_request; 23struct ceph_osd_request;
23struct ceph_osd_client; 24struct ceph_osd_client;
24struct ceph_authorizer; 25struct ceph_authorizer;
25struct ceph_pagelist;
26 26
27/* 27/*
28 * completion callback for async writepages 28 * completion callback for async writepages
@@ -47,6 +47,9 @@ struct ceph_osd {
47 struct list_head o_keepalive_item; 47 struct list_head o_keepalive_item;
48}; 48};
49 49
50
51#define CEPH_OSD_MAX_OP 10
52
50/* an in-flight request */ 53/* an in-flight request */
51struct ceph_osd_request { 54struct ceph_osd_request {
52 u64 r_tid; /* unique for this client */ 55 u64 r_tid; /* unique for this client */
@@ -63,9 +66,23 @@ struct ceph_osd_request {
63 struct ceph_connection *r_con_filling_msg; 66 struct ceph_connection *r_con_filling_msg;
64 67
65 struct ceph_msg *r_request, *r_reply; 68 struct ceph_msg *r_request, *r_reply;
66 int r_result;
67 int r_flags; /* any additional flags for the osd */ 69 int r_flags; /* any additional flags for the osd */
68 u32 r_sent; /* >0 if r_request is sending/sent */ 70 u32 r_sent; /* >0 if r_request is sending/sent */
71 int r_num_ops;
72
73 /* encoded message content */
74 struct ceph_osd_op *r_request_ops;
75 /* these are updated on each send */
76 __le32 *r_request_osdmap_epoch;
77 __le32 *r_request_flags;
78 __le64 *r_request_pool;
79 void *r_request_pgid;
80 __le32 *r_request_attempts;
81 struct ceph_eversion *r_request_reassert_version;
82
83 int r_result;
84 int r_reply_op_len[CEPH_OSD_MAX_OP];
85 s32 r_reply_op_result[CEPH_OSD_MAX_OP];
69 int r_got_reply; 86 int r_got_reply;
70 int r_linger; 87 int r_linger;
71 88
@@ -82,6 +99,7 @@ struct ceph_osd_request {
82 99
83 char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */ 100 char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */
84 int r_oid_len; 101 int r_oid_len;
102 u64 r_snapid;
85 unsigned long r_stamp; /* send OR check time */ 103 unsigned long r_stamp; /* send OR check time */
86 104
87 struct ceph_file_layout r_file_layout; 105 struct ceph_file_layout r_file_layout;
@@ -95,7 +113,7 @@ struct ceph_osd_request {
95 struct bio *r_bio; /* instead of pages */ 113 struct bio *r_bio; /* instead of pages */
96#endif 114#endif
97 115
98 struct ceph_pagelist *r_trail; /* trailing part of the data */ 116 struct ceph_pagelist r_trail; /* trailing part of the data */
99}; 117};
100 118
101struct ceph_osd_event { 119struct ceph_osd_event {
@@ -107,7 +125,6 @@ struct ceph_osd_event {
107 struct rb_node node; 125 struct rb_node node;
108 struct list_head osd_node; 126 struct list_head osd_node;
109 struct kref kref; 127 struct kref kref;
110 struct completion completion;
111}; 128};
112 129
113struct ceph_osd_event_work { 130struct ceph_osd_event_work {
@@ -157,7 +174,7 @@ struct ceph_osd_client {
157 174
158struct ceph_osd_req_op { 175struct ceph_osd_req_op {
159 u16 op; /* CEPH_OSD_OP_* */ 176 u16 op; /* CEPH_OSD_OP_* */
160 u32 flags; /* CEPH_OSD_FLAG_* */ 177 u32 payload_len;
161 union { 178 union {
162 struct { 179 struct {
163 u64 offset, length; 180 u64 offset, length;
@@ -166,23 +183,24 @@ struct ceph_osd_req_op {
166 } extent; 183 } extent;
167 struct { 184 struct {
168 const char *name; 185 const char *name;
169 u32 name_len;
170 const char *val; 186 const char *val;
187 u32 name_len;
171 u32 value_len; 188 u32 value_len;
172 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ 189 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
173 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ 190 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
174 } xattr; 191 } xattr;
175 struct { 192 struct {
176 const char *class_name; 193 const char *class_name;
177 __u8 class_len;
178 const char *method_name; 194 const char *method_name;
179 __u8 method_len;
180 __u8 argc;
181 const char *indata; 195 const char *indata;
182 u32 indata_len; 196 u32 indata_len;
197 __u8 class_len;
198 __u8 method_len;
199 __u8 argc;
183 } cls; 200 } cls;
184 struct { 201 struct {
185 u64 cookie, count; 202 u64 cookie;
203 u64 count;
186 } pgls; 204 } pgls;
187 struct { 205 struct {
188 u64 snapid; 206 u64 snapid;
@@ -190,12 +208,11 @@ struct ceph_osd_req_op {
190 struct { 208 struct {
191 u64 cookie; 209 u64 cookie;
192 u64 ver; 210 u64 ver;
193 __u8 flag;
194 u32 prot_ver; 211 u32 prot_ver;
195 u32 timeout; 212 u32 timeout;
213 __u8 flag;
196 } watch; 214 } watch;
197 }; 215 };
198 u32 payload_len;
199}; 216};
200 217
201extern int ceph_osdc_init(struct ceph_osd_client *osdc, 218extern int ceph_osdc_init(struct ceph_osd_client *osdc,
@@ -207,29 +224,19 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
207extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, 224extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
208 struct ceph_msg *msg); 225 struct ceph_msg *msg);
209 226
210extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
211 struct ceph_file_layout *layout,
212 u64 snapid,
213 u64 off, u64 *plen, u64 *bno,
214 struct ceph_osd_request *req,
215 struct ceph_osd_req_op *op);
216
217extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, 227extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
218 int flags,
219 struct ceph_snap_context *snapc, 228 struct ceph_snap_context *snapc,
220 struct ceph_osd_req_op *ops, 229 unsigned int num_op,
221 bool use_mempool, 230 bool use_mempool,
222 gfp_t gfp_flags, 231 gfp_t gfp_flags);
223 struct page **pages,
224 struct bio *bio);
225 232
226extern void ceph_osdc_build_request(struct ceph_osd_request *req, 233extern void ceph_osdc_build_request(struct ceph_osd_request *req,
227 u64 off, u64 *plen, 234 u64 off, u64 len,
235 unsigned int num_op,
228 struct ceph_osd_req_op *src_ops, 236 struct ceph_osd_req_op *src_ops,
229 struct ceph_snap_context *snapc, 237 struct ceph_snap_context *snapc,
230 struct timespec *mtime, 238 u64 snap_id,
231 const char *oid, 239 struct timespec *mtime);
232 int oid_len);
233 240
234extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, 241extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
235 struct ceph_file_layout *layout, 242 struct ceph_file_layout *layout,
@@ -239,8 +246,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
239 int do_sync, u32 truncate_seq, 246 int do_sync, u32 truncate_seq,
240 u64 truncate_size, 247 u64 truncate_size,
241 struct timespec *mtime, 248 struct timespec *mtime,
242 bool use_mempool, int num_reply, 249 bool use_mempool, int page_align);
243 int page_align);
244 250
245extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, 251extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
246 struct ceph_osd_request *req); 252 struct ceph_osd_request *req);
@@ -279,17 +285,13 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
279 u64 off, u64 len, 285 u64 off, u64 len,
280 u32 truncate_seq, u64 truncate_size, 286 u32 truncate_seq, u64 truncate_size,
281 struct timespec *mtime, 287 struct timespec *mtime,
282 struct page **pages, int nr_pages, 288 struct page **pages, int nr_pages);
283 int flags, int do_sync, bool nofail);
284 289
285/* watch/notify events */ 290/* watch/notify events */
286extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, 291extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
287 void (*event_cb)(u64, u64, u8, void *), 292 void (*event_cb)(u64, u64, u8, void *),
288 int one_shot, void *data, 293 void *data, struct ceph_osd_event **pevent);
289 struct ceph_osd_event **pevent);
290extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); 294extern void ceph_osdc_cancel_event(struct ceph_osd_event *event);
291extern int ceph_osdc_wait_event(struct ceph_osd_event *event,
292 unsigned long timeout);
293extern void ceph_osdc_put_event(struct ceph_osd_event *event); 295extern void ceph_osdc_put_event(struct ceph_osd_event *event);
294#endif 296#endif
295 297
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 10a417f9f76f..c819190d1642 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -18,14 +18,31 @@
18 * The map can be updated either via an incremental map (diff) describing 18 * The map can be updated either via an incremental map (diff) describing
19 * the change between two successive epochs, or as a fully encoded map. 19 * the change between two successive epochs, or as a fully encoded map.
20 */ 20 */
21struct ceph_pg {
22 uint64_t pool;
23 uint32_t seed;
24};
25
26#define CEPH_POOL_FLAG_HASHPSPOOL 1
27
21struct ceph_pg_pool_info { 28struct ceph_pg_pool_info {
22 struct rb_node node; 29 struct rb_node node;
23 int id; 30 s64 id;
24 struct ceph_pg_pool v; 31 u8 type;
25 int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; 32 u8 size;
33 u8 crush_ruleset;
34 u8 object_hash;
35 u32 pg_num, pgp_num;
36 int pg_num_mask, pgp_num_mask;
37 u64 flags;
26 char *name; 38 char *name;
27}; 39};
28 40
41struct ceph_object_locator {
42 uint64_t pool;
43 char *key;
44};
45
29struct ceph_pg_mapping { 46struct ceph_pg_mapping {
30 struct rb_node node; 47 struct rb_node node;
31 struct ceph_pg pgid; 48 struct ceph_pg pgid;
@@ -110,15 +127,16 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
110 127
111/* calculate mapping of a file extent to an object */ 128/* calculate mapping of a file extent to an object */
112extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, 129extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
113 u64 off, u64 *plen, 130 u64 off, u64 len,
114 u64 *bno, u64 *oxoff, u64 *oxlen); 131 u64 *bno, u64 *oxoff, u64 *oxlen);
115 132
116/* calculate mapping of object to a placement group */ 133/* calculate mapping of object to a placement group */
117extern int ceph_calc_object_layout(struct ceph_object_layout *ol, 134extern int ceph_calc_object_layout(struct ceph_pg *pg,
118 const char *oid, 135 const char *oid,
119 struct ceph_file_layout *fl, 136 struct ceph_file_layout *fl,
120 struct ceph_osdmap *osdmap); 137 struct ceph_osdmap *osdmap);
121extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, 138extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
139 struct ceph_pg pgid,
122 int *acting); 140 int *acting);
123extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, 141extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
124 struct ceph_pg pgid); 142 struct ceph_pg pgid);
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 2c04afeead1c..68c96a508ac2 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -9,14 +9,6 @@
9#include <linux/ceph/msgr.h> 9#include <linux/ceph/msgr.h>
10 10
11/* 11/*
12 * osdmap encoding versions
13 */
14#define CEPH_OSDMAP_INC_VERSION 5
15#define CEPH_OSDMAP_INC_VERSION_EXT 6
16#define CEPH_OSDMAP_VERSION 5
17#define CEPH_OSDMAP_VERSION_EXT 6
18
19/*
20 * fs id 12 * fs id
21 */ 13 */
22struct ceph_fsid { 14struct ceph_fsid {
@@ -64,7 +56,7 @@ struct ceph_timespec {
64 * placement group. 56 * placement group.
65 * we encode this into one __le64. 57 * we encode this into one __le64.
66 */ 58 */
67struct ceph_pg { 59struct ceph_pg_v1 {
68 __le16 preferred; /* preferred primary osd */ 60 __le16 preferred; /* preferred primary osd */
69 __le16 ps; /* placement seed */ 61 __le16 ps; /* placement seed */
70 __le32 pool; /* object pool */ 62 __le32 pool; /* object pool */
@@ -91,21 +83,6 @@ struct ceph_pg {
91 83
92#define CEPH_PG_TYPE_REP 1 84#define CEPH_PG_TYPE_REP 1
93#define CEPH_PG_TYPE_RAID4 2 85#define CEPH_PG_TYPE_RAID4 2
94#define CEPH_PG_POOL_VERSION 2
95struct ceph_pg_pool {
96 __u8 type; /* CEPH_PG_TYPE_* */
97 __u8 size; /* number of osds in each pg */
98 __u8 crush_ruleset; /* crush placement rule */
99 __u8 object_hash; /* hash mapping object name to ps */
100 __le32 pg_num, pgp_num; /* number of pg's */
101 __le32 lpg_num, lpgp_num; /* number of localized pg's */
102 __le32 last_change; /* most recent epoch changed */
103 __le64 snap_seq; /* seq for per-pool snapshot */
104 __le32 snap_epoch; /* epoch of last snap */
105 __le32 num_snaps;
106 __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */
107 __le64 auid; /* who owns the pg */
108} __attribute__ ((packed));
109 86
110/* 87/*
111 * stable_mod func is used to control number of placement groups. 88 * stable_mod func is used to control number of placement groups.
@@ -128,7 +105,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask)
128 * object layout - how a given object should be stored. 105 * object layout - how a given object should be stored.
129 */ 106 */
130struct ceph_object_layout { 107struct ceph_object_layout {
131 struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */ 108 struct ceph_pg_v1 ol_pgid; /* raw pg, with _full_ ps precision. */
132 __le32 ol_stripe_unit; /* for per-object parity, if any */ 109 __le32 ol_stripe_unit; /* for per-object parity, if any */
133} __attribute__ ((packed)); 110} __attribute__ ((packed));
134 111
@@ -145,8 +122,12 @@ struct ceph_eversion {
145 */ 122 */
146 123
147/* status bits */ 124/* status bits */
148#define CEPH_OSD_EXISTS 1 125#define CEPH_OSD_EXISTS (1<<0)
149#define CEPH_OSD_UP 2 126#define CEPH_OSD_UP (1<<1)
127#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */
128#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */
129
130extern const char *ceph_osd_state_name(int s);
150 131
151/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ 132/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
152#define CEPH_OSD_IN 0x10000 133#define CEPH_OSD_IN 0x10000
@@ -161,9 +142,25 @@ struct ceph_eversion {
161#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ 142#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
162#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ 143#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
163#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ 144#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
145#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */
146#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */
147#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */
148#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */
149#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
150#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
151
152/*
153 * The error code to return when an OSD can't handle a write
154 * because it is too large.
155 */
156#define OSD_WRITETOOBIG EMSGSIZE
164 157
165/* 158/*
166 * osd ops 159 * osd ops
160 *
161 * WARNING: do not use these op codes directly. Use the helpers
162 * defined below instead. In certain cases, op code behavior was
163 * redefined, resulting in special-cases in the helpers.
167 */ 164 */
168#define CEPH_OSD_OP_MODE 0xf000 165#define CEPH_OSD_OP_MODE 0xf000
169#define CEPH_OSD_OP_MODE_RD 0x1000 166#define CEPH_OSD_OP_MODE_RD 0x1000
@@ -177,6 +174,7 @@ struct ceph_eversion {
177#define CEPH_OSD_OP_TYPE_ATTR 0x0300 174#define CEPH_OSD_OP_TYPE_ATTR 0x0300
178#define CEPH_OSD_OP_TYPE_EXEC 0x0400 175#define CEPH_OSD_OP_TYPE_EXEC 0x0400
179#define CEPH_OSD_OP_TYPE_PG 0x0500 176#define CEPH_OSD_OP_TYPE_PG 0x0500
177#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */
180 178
181enum { 179enum {
182 /** data **/ 180 /** data **/
@@ -217,6 +215,23 @@ enum {
217 215
218 CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, 216 CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15,
219 217
218 /* omap */
219 CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17,
220 CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18,
221 CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19,
222 CEPH_OSD_OP_OMAPGETVALSBYKEYS =
223 CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20,
224 CEPH_OSD_OP_OMAPSETVALS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21,
225 CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22,
226 CEPH_OSD_OP_OMAPCLEAR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23,
227 CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24,
228 CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25,
229
230 /** multi **/
231 CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
232 CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2,
233 CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3,
234
220 /** attrs **/ 235 /** attrs **/
221 /* read */ 236 /* read */
222 CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, 237 CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
@@ -238,6 +253,7 @@ enum {
238 CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, 253 CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6,
239 CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, 254 CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7,
240 CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, 255 CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8,
256 CEPH_OSD_OP_SCRUB_MAP = CEPH_OSD_OP_MODE_SUB | 9,
241 257
242 /** lock **/ 258 /** lock **/
243 CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, 259 CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
@@ -248,10 +264,12 @@ enum {
248 CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, 264 CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
249 265
250 /** exec **/ 266 /** exec **/
267 /* note: the RD bit here is wrong; see special-case below in helper */
251 CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, 268 CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
252 269
253 /** pg **/ 270 /** pg **/
254 CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, 271 CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
272 CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2,
255}; 273};
256 274
257static inline int ceph_osd_op_type_lock(int op) 275static inline int ceph_osd_op_type_lock(int op)
@@ -274,6 +292,10 @@ static inline int ceph_osd_op_type_pg(int op)
274{ 292{
275 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; 293 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
276} 294}
295static inline int ceph_osd_op_type_multi(int op)
296{
297 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI;
298}
277 299
278static inline int ceph_osd_op_mode_subop(int op) 300static inline int ceph_osd_op_mode_subop(int op)
279{ 301{
@@ -281,11 +303,12 @@ static inline int ceph_osd_op_mode_subop(int op)
281} 303}
282static inline int ceph_osd_op_mode_read(int op) 304static inline int ceph_osd_op_mode_read(int op)
283{ 305{
284 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; 306 return (op & CEPH_OSD_OP_MODE_RD) &&
307 op != CEPH_OSD_OP_CALL;
285} 308}
286static inline int ceph_osd_op_mode_modify(int op) 309static inline int ceph_osd_op_mode_modify(int op)
287{ 310{
288 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; 311 return op & CEPH_OSD_OP_MODE_WR;
289} 312}
290 313
291/* 314/*
@@ -294,34 +317,38 @@ static inline int ceph_osd_op_mode_modify(int op)
294 */ 317 */
295#define CEPH_OSD_TMAP_HDR 'h' 318#define CEPH_OSD_TMAP_HDR 'h'
296#define CEPH_OSD_TMAP_SET 's' 319#define CEPH_OSD_TMAP_SET 's'
320#define CEPH_OSD_TMAP_CREATE 'c' /* create key */
297#define CEPH_OSD_TMAP_RM 'r' 321#define CEPH_OSD_TMAP_RM 'r'
322#define CEPH_OSD_TMAP_RMSLOPPY 'R'
298 323
299extern const char *ceph_osd_op_name(int op); 324extern const char *ceph_osd_op_name(int op);
300 325
301
302/* 326/*
303 * osd op flags 327 * osd op flags
304 * 328 *
305 * An op may be READ, WRITE, or READ|WRITE. 329 * An op may be READ, WRITE, or READ|WRITE.
306 */ 330 */
307enum { 331enum {
308 CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ 332 CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */
309 CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ 333 CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */
310 CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ 334 CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */
311 CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ 335 CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */
312 CEPH_OSD_FLAG_READ = 16, /* op may read */ 336 CEPH_OSD_FLAG_READ = 0x0010, /* op may read */
313 CEPH_OSD_FLAG_WRITE = 32, /* op may write */ 337 CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */
314 CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ 338 CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */
315 CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ 339 CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */
316 CEPH_OSD_FLAG_BALANCE_READS = 256, 340 CEPH_OSD_FLAG_BALANCE_READS = 0x0100,
317 CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ 341 CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */
318 CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ 342 CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */
319 CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ 343 CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */
320 CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ 344 CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */
345 CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */
346 CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */
321}; 347};
322 348
323enum { 349enum {
324 CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ 350 CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */
351 CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */
325}; 352};
326 353
327#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ 354#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/
@@ -381,48 +408,13 @@ struct ceph_osd_op {
381 __le64 ver; 408 __le64 ver;
382 __u8 flag; /* 0 = unwatch, 1 = watch */ 409 __u8 flag; /* 0 = unwatch, 1 = watch */
383 } __attribute__ ((packed)) watch; 410 } __attribute__ ((packed)) watch;
384}; 411 struct {
412 __le64 offset, length;
413 __le64 src_offset;
414 } __attribute__ ((packed)) clonerange;
415 };
385 __le32 payload_len; 416 __le32 payload_len;
386} __attribute__ ((packed)); 417} __attribute__ ((packed));
387 418
388/*
389 * osd request message header. each request may include multiple
390 * ceph_osd_op object operations.
391 */
392struct ceph_osd_request_head {
393 __le32 client_inc; /* client incarnation */
394 struct ceph_object_layout layout; /* pgid */
395 __le32 osdmap_epoch; /* client's osdmap epoch */
396
397 __le32 flags;
398
399 struct ceph_timespec mtime; /* for mutations only */
400 struct ceph_eversion reassert_version; /* if we are replaying op */
401
402 __le32 object_len; /* length of object name */
403
404 __le64 snapid; /* snapid to read */
405 __le64 snap_seq; /* writer's snap context */
406 __le32 num_snaps;
407
408 __le16 num_ops;
409 struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */
410} __attribute__ ((packed));
411
412struct ceph_osd_reply_head {
413 __le32 client_inc; /* client incarnation */
414 __le32 flags;
415 struct ceph_object_layout layout;
416 __le32 osdmap_epoch;
417 struct ceph_eversion reassert_version; /* for replaying uncommitted */
418
419 __le32 result; /* result code */
420
421 __le32 object_len; /* length of object name */
422 __le32 num_ops;
423 struct ceph_osd_op ops[0]; /* ops[], object */
424} __attribute__ ((packed));
425
426
427 419
428#endif 420#endif