aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorAlex Elder <elder@inktank.com>2013-02-19 20:20:56 -0500
committerAlex Elder <elder@inktank.com>2013-02-19 20:21:08 -0500
commit4c7a08c83a7842e88838dde16684d6bafffdfaf0 (patch)
treec5fe0057b2ff9f98a64ceb6fa076e75da8225cdd /include
parent19f949f52599ba7c3f67a5897ac6be14bfcb1200 (diff)
parent903bb32e890237ca43ab847e561e5377cfe0fdb3 (diff)
Merge branch 'testing' of github.com:ceph/ceph-client into into linux-3.8-ceph
Diffstat (limited to 'include')
-rw-r--r--include/linux/ceph/ceph_features.h8
-rw-r--r--include/linux/ceph/ceph_fs.h32
-rw-r--r--include/linux/ceph/decode.h29
-rw-r--r--include/linux/ceph/libceph.h16
-rw-r--r--include/linux/ceph/messenger.h2
-rw-r--r--include/linux/ceph/osd_client.h54
-rw-r--r--include/linux/ceph/osdmap.h2
-rw-r--r--include/linux/ceph/rados.h93
-rw-r--r--include/linux/crush/crush.h2
9 files changed, 154 insertions, 84 deletions
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index dad579b0c0e6..2160aab482f6 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -14,13 +14,19 @@
14#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) 14#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
15/* bits 8-17 defined by user-space; not supported yet here */ 15/* bits 8-17 defined by user-space; not supported yet here */
16#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) 16#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
17/* bits 19-24 defined by user-space; not supported yet here */
18#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
19/* bit 26 defined by user-space; not supported yet here */
20#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
17 21
18/* 22/*
19 * Features supported. 23 * Features supported.
20 */ 24 */
21#define CEPH_FEATURES_SUPPORTED_DEFAULT \ 25#define CEPH_FEATURES_SUPPORTED_DEFAULT \
22 (CEPH_FEATURE_NOSRCADDR | \ 26 (CEPH_FEATURE_NOSRCADDR | \
23 CEPH_FEATURE_CRUSH_TUNABLES) 27 CEPH_FEATURE_CRUSH_TUNABLES | \
28 CEPH_FEATURE_CRUSH_TUNABLES2 | \
29 CEPH_FEATURE_REPLY_CREATE_INODE)
24 30
25#define CEPH_FEATURES_REQUIRED_DEFAULT \ 31#define CEPH_FEATURES_REQUIRED_DEFAULT \
26 (CEPH_FEATURE_NOSRCADDR) 32 (CEPH_FEATURE_NOSRCADDR)
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index cf6f4d998a76..2ad7b860f062 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -21,16 +21,14 @@
21 * internal cluster protocols separately from the public, 21 * internal cluster protocols separately from the public,
22 * client-facing protocol. 22 * client-facing protocol.
23 */ 23 */
24#define CEPH_OSD_PROTOCOL 8 /* cluster internal */
25#define CEPH_MDS_PROTOCOL 12 /* cluster internal */
26#define CEPH_MON_PROTOCOL 5 /* cluster internal */
27#define CEPH_OSDC_PROTOCOL 24 /* server/client */ 24#define CEPH_OSDC_PROTOCOL 24 /* server/client */
28#define CEPH_MDSC_PROTOCOL 32 /* server/client */ 25#define CEPH_MDSC_PROTOCOL 32 /* server/client */
29#define CEPH_MONC_PROTOCOL 15 /* server/client */ 26#define CEPH_MONC_PROTOCOL 15 /* server/client */
30 27
31 28
32#define CEPH_INO_ROOT 1 29#define CEPH_INO_ROOT 1
33#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ 30#define CEPH_INO_CEPH 2 /* hidden .ceph dir */
31#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */
34 32
35/* arbitrary limit on max # of monitors (cluster of 3 is typical) */ 33/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
36#define CEPH_MAX_MON 31 34#define CEPH_MAX_MON 31
@@ -51,7 +49,7 @@ struct ceph_file_layout {
51 __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ 49 __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */
52 50
53 /* object -> pg layout */ 51 /* object -> pg layout */
54 __le32 fl_unused; /* unused; used to be preferred primary (-1) */ 52 __le32 fl_unused; /* unused; used to be preferred primary for pg (-1 for none) */
55 __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ 53 __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
56} __attribute__ ((packed)); 54} __attribute__ ((packed));
57 55
@@ -101,6 +99,8 @@ struct ceph_dir_layout {
101#define CEPH_MSG_MON_SUBSCRIBE_ACK 16 99#define CEPH_MSG_MON_SUBSCRIBE_ACK 16
102#define CEPH_MSG_AUTH 17 100#define CEPH_MSG_AUTH 17
103#define CEPH_MSG_AUTH_REPLY 18 101#define CEPH_MSG_AUTH_REPLY 18
102#define CEPH_MSG_MON_GET_VERSION 19
103#define CEPH_MSG_MON_GET_VERSION_REPLY 20
104 104
105/* client <-> mds */ 105/* client <-> mds */
106#define CEPH_MSG_MDS_MAP 21 106#define CEPH_MSG_MDS_MAP 21
@@ -221,6 +221,11 @@ struct ceph_mon_subscribe_ack {
221} __attribute__ ((packed)); 221} __attribute__ ((packed));
222 222
223/* 223/*
224 * mdsmap flags
225 */
226#define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */
227
228/*
224 * mds states 229 * mds states
225 * > 0 -> in 230 * > 0 -> in
226 * <= 0 -> out 231 * <= 0 -> out
@@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack {
233#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ 238#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */
234#define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ 239#define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */
235#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ 240#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */
241#define CEPH_MDS_STATE_REPLAYONCE -9 /* up, replaying an active node's journal */
236 242
237#define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ 243#define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */
238#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed 244#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed
@@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s);
264#define CEPH_LOCK_IXATTR 2048 270#define CEPH_LOCK_IXATTR 2048
265#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ 271#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */
266#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ 272#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */
273#define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */
267 274
268/* client_session ops */ 275/* client_session ops */
269enum { 276enum {
@@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op);
338#define CEPH_SETATTR_SIZE 32 345#define CEPH_SETATTR_SIZE 32
339#define CEPH_SETATTR_CTIME 64 346#define CEPH_SETATTR_CTIME 64
340 347
348/*
349 * Ceph setxattr request flags.
350 */
351#define CEPH_XATTR_CREATE 1
352#define CEPH_XATTR_REPLACE 2
353
341union ceph_mds_request_args { 354union ceph_mds_request_args {
342 struct { 355 struct {
343 __le32 mask; /* CEPH_CAP_* */ 356 __le32 mask; /* CEPH_CAP_* */
@@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags);
522#define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ 535#define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */
523#define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ 536#define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */
524 537
538#define CEPH_CAP_SIMPLE_BITS 2
539#define CEPH_CAP_FILE_BITS 8
540
525/* per-lock shift */ 541/* per-lock shift */
526#define CEPH_CAP_SAUTH 2 542#define CEPH_CAP_SAUTH 2
527#define CEPH_CAP_SLINK 4 543#define CEPH_CAP_SLINK 4
528#define CEPH_CAP_SXATTR 6 544#define CEPH_CAP_SXATTR 6
529#define CEPH_CAP_SFILE 8 545#define CEPH_CAP_SFILE 8
530#define CEPH_CAP_SFLOCK 20 546#define CEPH_CAP_SFLOCK 20
531 547
532#define CEPH_CAP_BITS 22 548#define CEPH_CAP_BITS 22
533 549
534/* composed values */ 550/* composed values */
535#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) 551#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH)
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 63d092822bad..360d9d08ca9e 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
52 return end >= *p && n <= end - *p; 52 return end >= *p && n <= end - *p;
53} 53}
54 54
55#define ceph_decode_need(p, end, n, bad) \ 55#define ceph_decode_need(p, end, n, bad) \
56 do { \ 56 do { \
57 if (!likely(ceph_has_room(p, end, n))) \ 57 if (!likely(ceph_has_room(p, end, n))) \
58 goto bad; \ 58 goto bad; \
59 } while (0) 59 } while (0)
60 60
61#define ceph_decode_64_safe(p, end, v, bad) \ 61#define ceph_decode_64_safe(p, end, v, bad) \
@@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
99 * 99 *
100 * There are two possible failures: 100 * There are two possible failures:
101 * - converting the string would require accessing memory at or 101 * - converting the string would require accessing memory at or
102 * beyond the "end" pointer provided (-E 102 * beyond the "end" pointer provided (-ERANGE)
103 * - memory could not be allocated for the result 103 * - memory could not be allocated for the result (-ENOMEM)
104 */ 104 */
105static inline char *ceph_extract_encoded_string(void **p, void *end, 105static inline char *ceph_extract_encoded_string(void **p, void *end,
106 size_t *lenp, gfp_t gfp) 106 size_t *lenp, gfp_t gfp)
@@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end,
217 *p += len; 217 *p += len;
218} 218}
219 219
220#define ceph_encode_need(p, end, n, bad) \ 220#define ceph_encode_need(p, end, n, bad) \
221 do { \ 221 do { \
222 if (!likely(ceph_has_room(p, end, n))) \ 222 if (!likely(ceph_has_room(p, end, n))) \
223 goto bad; \ 223 goto bad; \
224 } while (0) 224 } while (0)
225 225
226#define ceph_encode_64_safe(p, end, v, bad) \ 226#define ceph_encode_64_safe(p, end, v, bad) \
@@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end,
231#define ceph_encode_32_safe(p, end, v, bad) \ 231#define ceph_encode_32_safe(p, end, v, bad) \
232 do { \ 232 do { \
233 ceph_encode_need(p, end, sizeof(u32), bad); \ 233 ceph_encode_need(p, end, sizeof(u32), bad); \
234 ceph_encode_32(p, v); \ 234 ceph_encode_32(p, v); \
235 } while (0) 235 } while (0)
236#define ceph_encode_16_safe(p, end, v, bad) \ 236#define ceph_encode_16_safe(p, end, v, bad) \
237 do { \ 237 do { \
238 ceph_encode_need(p, end, sizeof(u16), bad); \ 238 ceph_encode_need(p, end, sizeof(u16), bad); \
239 ceph_encode_16(p, v); \ 239 ceph_encode_16(p, v); \
240 } while (0)
241#define ceph_encode_8_safe(p, end, v, bad) \
242 do { \
243 ceph_encode_need(p, end, sizeof(u8), bad); \
244 ceph_encode_8(p, v); \
240 } while (0) 245 } while (0)
241 246
242#define ceph_encode_copy_safe(p, end, pv, n, bad) \ 247#define ceph_encode_copy_safe(p, end, pv, n, bad) \
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 084d3c622b12..29818fc3fa49 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len)
193} 193}
194 194
195/* ceph_common.c */ 195/* ceph_common.c */
196extern bool libceph_compatible(void *data);
197
196extern const char *ceph_msg_type_name(int type); 198extern const char *ceph_msg_type_name(int type);
197extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); 199extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
198extern struct kmem_cache *ceph_inode_cachep; 200extern struct kmem_cache *ceph_inode_cachep;
@@ -220,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client);
220/* pagevec.c */ 222/* pagevec.c */
221extern void ceph_release_page_vector(struct page **pages, int num_pages); 223extern void ceph_release_page_vector(struct page **pages, int num_pages);
222 224
223extern struct page **ceph_get_direct_page_vector(const char __user *data, 225extern struct page **ceph_get_direct_page_vector(const void __user *data,
224 int num_pages, 226 int num_pages,
225 bool write_page); 227 bool write_page);
226extern void ceph_put_page_vector(struct page **pages, int num_pages, 228extern void ceph_put_page_vector(struct page **pages, int num_pages,
@@ -228,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages,
228extern void ceph_release_page_vector(struct page **pages, int num_pages); 230extern void ceph_release_page_vector(struct page **pages, int num_pages);
229extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); 231extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
230extern int ceph_copy_user_to_page_vector(struct page **pages, 232extern int ceph_copy_user_to_page_vector(struct page **pages,
231 const char __user *data, 233 const void __user *data,
232 loff_t off, size_t len); 234 loff_t off, size_t len);
233extern int ceph_copy_to_page_vector(struct page **pages, 235extern void ceph_copy_to_page_vector(struct page **pages,
234 const char *data, 236 const void *data,
235 loff_t off, size_t len); 237 loff_t off, size_t len);
236extern int ceph_copy_from_page_vector(struct page **pages, 238extern void ceph_copy_from_page_vector(struct page **pages,
237 char *data, 239 void *data,
238 loff_t off, size_t len); 240 loff_t off, size_t len);
239extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, 241extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data,
240 loff_t off, size_t len); 242 loff_t off, size_t len);
241extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); 243extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
242 244
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 14ba5ee738a9..60903e0f665c 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -83,9 +83,11 @@ struct ceph_msg {
83 struct list_head list_head; 83 struct list_head list_head;
84 84
85 struct kref kref; 85 struct kref kref;
86#ifdef CONFIG_BLOCK
86 struct bio *bio; /* instead of pages/pagelist */ 87 struct bio *bio; /* instead of pages/pagelist */
87 struct bio *bio_iter; /* bio iterator */ 88 struct bio *bio_iter; /* bio iterator */
88 int bio_seg; /* current bio segment */ 89 int bio_seg; /* current bio segment */
90#endif /* CONFIG_BLOCK */
89 struct ceph_pagelist *trail; /* the trailing part of the data */ 91 struct ceph_pagelist *trail; /* the trailing part of the data */
90 bool front_is_vmalloc; 92 bool front_is_vmalloc;
91 bool more_to_follow; 93 bool more_to_follow;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index d9b880e977e6..388158ff0cbc 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -10,6 +10,7 @@
10#include <linux/ceph/osdmap.h> 10#include <linux/ceph/osdmap.h>
11#include <linux/ceph/messenger.h> 11#include <linux/ceph/messenger.h>
12#include <linux/ceph/auth.h> 12#include <linux/ceph/auth.h>
13#include <linux/ceph/pagelist.h>
13 14
14/* 15/*
15 * Maximum object name size 16 * Maximum object name size
@@ -22,7 +23,6 @@ struct ceph_snap_context;
22struct ceph_osd_request; 23struct ceph_osd_request;
23struct ceph_osd_client; 24struct ceph_osd_client;
24struct ceph_authorizer; 25struct ceph_authorizer;
25struct ceph_pagelist;
26 26
27/* 27/*
28 * completion callback for async writepages 28 * completion callback for async writepages
@@ -95,7 +95,7 @@ struct ceph_osd_request {
95 struct bio *r_bio; /* instead of pages */ 95 struct bio *r_bio; /* instead of pages */
96#endif 96#endif
97 97
98 struct ceph_pagelist *r_trail; /* trailing part of the data */ 98 struct ceph_pagelist r_trail; /* trailing part of the data */
99}; 99};
100 100
101struct ceph_osd_event { 101struct ceph_osd_event {
@@ -107,7 +107,6 @@ struct ceph_osd_event {
107 struct rb_node node; 107 struct rb_node node;
108 struct list_head osd_node; 108 struct list_head osd_node;
109 struct kref kref; 109 struct kref kref;
110 struct completion completion;
111}; 110};
112 111
113struct ceph_osd_event_work { 112struct ceph_osd_event_work {
@@ -157,7 +156,7 @@ struct ceph_osd_client {
157 156
158struct ceph_osd_req_op { 157struct ceph_osd_req_op {
159 u16 op; /* CEPH_OSD_OP_* */ 158 u16 op; /* CEPH_OSD_OP_* */
160 u32 flags; /* CEPH_OSD_FLAG_* */ 159 u32 payload_len;
161 union { 160 union {
162 struct { 161 struct {
163 u64 offset, length; 162 u64 offset, length;
@@ -166,23 +165,24 @@ struct ceph_osd_req_op {
166 } extent; 165 } extent;
167 struct { 166 struct {
168 const char *name; 167 const char *name;
169 u32 name_len;
170 const char *val; 168 const char *val;
169 u32 name_len;
171 u32 value_len; 170 u32 value_len;
172 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ 171 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
173 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ 172 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
174 } xattr; 173 } xattr;
175 struct { 174 struct {
176 const char *class_name; 175 const char *class_name;
177 __u8 class_len;
178 const char *method_name; 176 const char *method_name;
179 __u8 method_len;
180 __u8 argc;
181 const char *indata; 177 const char *indata;
182 u32 indata_len; 178 u32 indata_len;
179 __u8 class_len;
180 __u8 method_len;
181 __u8 argc;
183 } cls; 182 } cls;
184 struct { 183 struct {
185 u64 cookie, count; 184 u64 cookie;
185 u64 count;
186 } pgls; 186 } pgls;
187 struct { 187 struct {
188 u64 snapid; 188 u64 snapid;
@@ -190,12 +190,11 @@ struct ceph_osd_req_op {
190 struct { 190 struct {
191 u64 cookie; 191 u64 cookie;
192 u64 ver; 192 u64 ver;
193 __u8 flag;
194 u32 prot_ver; 193 u32 prot_ver;
195 u32 timeout; 194 u32 timeout;
195 __u8 flag;
196 } watch; 196 } watch;
197 }; 197 };
198 u32 payload_len;
199}; 198};
200 199
201extern int ceph_osdc_init(struct ceph_osd_client *osdc, 200extern int ceph_osdc_init(struct ceph_osd_client *osdc,
@@ -207,29 +206,19 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
207extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, 206extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
208 struct ceph_msg *msg); 207 struct ceph_msg *msg);
209 208
210extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
211 struct ceph_file_layout *layout,
212 u64 snapid,
213 u64 off, u64 *plen, u64 *bno,
214 struct ceph_osd_request *req,
215 struct ceph_osd_req_op *op);
216
217extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, 209extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
218 int flags,
219 struct ceph_snap_context *snapc, 210 struct ceph_snap_context *snapc,
220 struct ceph_osd_req_op *ops, 211 unsigned int num_op,
221 bool use_mempool, 212 bool use_mempool,
222 gfp_t gfp_flags, 213 gfp_t gfp_flags);
223 struct page **pages,
224 struct bio *bio);
225 214
226extern void ceph_osdc_build_request(struct ceph_osd_request *req, 215extern void ceph_osdc_build_request(struct ceph_osd_request *req,
227 u64 off, u64 *plen, 216 u64 off, u64 len,
217 unsigned int num_op,
228 struct ceph_osd_req_op *src_ops, 218 struct ceph_osd_req_op *src_ops,
229 struct ceph_snap_context *snapc, 219 struct ceph_snap_context *snapc,
230 struct timespec *mtime, 220 u64 snap_id,
231 const char *oid, 221 struct timespec *mtime);
232 int oid_len);
233 222
234extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, 223extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
235 struct ceph_file_layout *layout, 224 struct ceph_file_layout *layout,
@@ -239,8 +228,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
239 int do_sync, u32 truncate_seq, 228 int do_sync, u32 truncate_seq,
240 u64 truncate_size, 229 u64 truncate_size,
241 struct timespec *mtime, 230 struct timespec *mtime,
242 bool use_mempool, int num_reply, 231 bool use_mempool, int page_align);
243 int page_align);
244 232
245extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, 233extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
246 struct ceph_osd_request *req); 234 struct ceph_osd_request *req);
@@ -279,17 +267,13 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
279 u64 off, u64 len, 267 u64 off, u64 len,
280 u32 truncate_seq, u64 truncate_size, 268 u32 truncate_seq, u64 truncate_size,
281 struct timespec *mtime, 269 struct timespec *mtime,
282 struct page **pages, int nr_pages, 270 struct page **pages, int nr_pages);
283 int flags, int do_sync, bool nofail);
284 271
285/* watch/notify events */ 272/* watch/notify events */
286extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, 273extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
287 void (*event_cb)(u64, u64, u8, void *), 274 void (*event_cb)(u64, u64, u8, void *),
288 int one_shot, void *data, 275 void *data, struct ceph_osd_event **pevent);
289 struct ceph_osd_event **pevent);
290extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); 276extern void ceph_osdc_cancel_event(struct ceph_osd_event *event);
291extern int ceph_osdc_wait_event(struct ceph_osd_event *event,
292 unsigned long timeout);
293extern void ceph_osdc_put_event(struct ceph_osd_event *event); 277extern void ceph_osdc_put_event(struct ceph_osd_event *event);
294#endif 278#endif
295 279
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 10a417f9f76f..c83a838f89f5 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -110,7 +110,7 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
110 110
111/* calculate mapping of a file extent to an object */ 111/* calculate mapping of a file extent to an object */
112extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, 112extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
113 u64 off, u64 *plen, 113 u64 off, u64 len,
114 u64 *bno, u64 *oxoff, u64 *oxlen); 114 u64 *bno, u64 *oxoff, u64 *oxlen);
115 115
116/* calculate mapping of object to a placement group */ 116/* calculate mapping of object to a placement group */
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 2c04afeead1c..b65182aba6f7 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -145,8 +145,12 @@ struct ceph_eversion {
145 */ 145 */
146 146
147/* status bits */ 147/* status bits */
148#define CEPH_OSD_EXISTS 1 148#define CEPH_OSD_EXISTS (1<<0)
149#define CEPH_OSD_UP 2 149#define CEPH_OSD_UP (1<<1)
150#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */
151#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */
152
153extern const char *ceph_osd_state_name(int s);
150 154
151/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ 155/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
152#define CEPH_OSD_IN 0x10000 156#define CEPH_OSD_IN 0x10000
@@ -161,9 +165,25 @@ struct ceph_eversion {
161#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ 165#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
162#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ 166#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
163#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ 167#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
168#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */
169#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */
170#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */
171#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */
172#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
173#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
174
175/*
176 * The error code to return when an OSD can't handle a write
177 * because it is too large.
178 */
179#define OSD_WRITETOOBIG EMSGSIZE
164 180
165/* 181/*
166 * osd ops 182 * osd ops
183 *
184 * WARNING: do not use these op codes directly. Use the helpers
185 * defined below instead. In certain cases, op code behavior was
186 * redefined, resulting in special-cases in the helpers.
167 */ 187 */
168#define CEPH_OSD_OP_MODE 0xf000 188#define CEPH_OSD_OP_MODE 0xf000
169#define CEPH_OSD_OP_MODE_RD 0x1000 189#define CEPH_OSD_OP_MODE_RD 0x1000
@@ -177,6 +197,7 @@ struct ceph_eversion {
177#define CEPH_OSD_OP_TYPE_ATTR 0x0300 197#define CEPH_OSD_OP_TYPE_ATTR 0x0300
178#define CEPH_OSD_OP_TYPE_EXEC 0x0400 198#define CEPH_OSD_OP_TYPE_EXEC 0x0400
179#define CEPH_OSD_OP_TYPE_PG 0x0500 199#define CEPH_OSD_OP_TYPE_PG 0x0500
200#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */
180 201
181enum { 202enum {
182 /** data **/ 203 /** data **/
@@ -217,6 +238,23 @@ enum {
217 238
218 CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, 239 CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15,
219 240
241 /* omap */
242 CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17,
243 CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18,
244 CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19,
245 CEPH_OSD_OP_OMAPGETVALSBYKEYS =
246 CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20,
247 CEPH_OSD_OP_OMAPSETVALS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21,
248 CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22,
249 CEPH_OSD_OP_OMAPCLEAR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23,
250 CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24,
251 CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25,
252
253 /** multi **/
254 CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
255 CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2,
256 CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3,
257
220 /** attrs **/ 258 /** attrs **/
221 /* read */ 259 /* read */
222 CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, 260 CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
@@ -238,6 +276,7 @@ enum {
238 CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, 276 CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6,
239 CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, 277 CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7,
240 CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, 278 CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8,
279 CEPH_OSD_OP_SCRUB_MAP = CEPH_OSD_OP_MODE_SUB | 9,
241 280
242 /** lock **/ 281 /** lock **/
243 CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, 282 CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
@@ -248,10 +287,12 @@ enum {
248 CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, 287 CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
249 288
250 /** exec **/ 289 /** exec **/
290 /* note: the RD bit here is wrong; see special-case below in helper */
251 CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, 291 CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
252 292
253 /** pg **/ 293 /** pg **/
254 CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, 294 CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
295 CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2,
255}; 296};
256 297
257static inline int ceph_osd_op_type_lock(int op) 298static inline int ceph_osd_op_type_lock(int op)
@@ -274,6 +315,10 @@ static inline int ceph_osd_op_type_pg(int op)
274{ 315{
275 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; 316 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
276} 317}
318static inline int ceph_osd_op_type_multi(int op)
319{
320 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI;
321}
277 322
278static inline int ceph_osd_op_mode_subop(int op) 323static inline int ceph_osd_op_mode_subop(int op)
279{ 324{
@@ -281,11 +326,12 @@ static inline int ceph_osd_op_mode_subop(int op)
281} 326}
282static inline int ceph_osd_op_mode_read(int op) 327static inline int ceph_osd_op_mode_read(int op)
283{ 328{
284 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; 329 return (op & CEPH_OSD_OP_MODE_RD) &&
330 op != CEPH_OSD_OP_CALL;
285} 331}
286static inline int ceph_osd_op_mode_modify(int op) 332static inline int ceph_osd_op_mode_modify(int op)
287{ 333{
288 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; 334 return op & CEPH_OSD_OP_MODE_WR;
289} 335}
290 336
291/* 337/*
@@ -294,34 +340,38 @@ static inline int ceph_osd_op_mode_modify(int op)
294 */ 340 */
295#define CEPH_OSD_TMAP_HDR 'h' 341#define CEPH_OSD_TMAP_HDR 'h'
296#define CEPH_OSD_TMAP_SET 's' 342#define CEPH_OSD_TMAP_SET 's'
343#define CEPH_OSD_TMAP_CREATE 'c' /* create key */
297#define CEPH_OSD_TMAP_RM 'r' 344#define CEPH_OSD_TMAP_RM 'r'
345#define CEPH_OSD_TMAP_RMSLOPPY 'R'
298 346
299extern const char *ceph_osd_op_name(int op); 347extern const char *ceph_osd_op_name(int op);
300 348
301
302/* 349/*
303 * osd op flags 350 * osd op flags
304 * 351 *
305 * An op may be READ, WRITE, or READ|WRITE. 352 * An op may be READ, WRITE, or READ|WRITE.
306 */ 353 */
307enum { 354enum {
308 CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ 355 CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */
309 CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ 356 CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */
310 CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ 357 CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */
311 CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ 358 CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */
312 CEPH_OSD_FLAG_READ = 16, /* op may read */ 359 CEPH_OSD_FLAG_READ = 0x0010, /* op may read */
313 CEPH_OSD_FLAG_WRITE = 32, /* op may write */ 360 CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */
314 CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ 361 CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */
315 CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ 362 CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */
316 CEPH_OSD_FLAG_BALANCE_READS = 256, 363 CEPH_OSD_FLAG_BALANCE_READS = 0x0100,
317 CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ 364 CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */
318 CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ 365 CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */
319 CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ 366 CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */
320 CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ 367 CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */
368 CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */
369 CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */
321}; 370};
322 371
323enum { 372enum {
324 CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ 373 CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */
374 CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */
325}; 375};
326 376
327#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ 377#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/
@@ -381,7 +431,11 @@ struct ceph_osd_op {
381 __le64 ver; 431 __le64 ver;
382 __u8 flag; /* 0 = unwatch, 1 = watch */ 432 __u8 flag; /* 0 = unwatch, 1 = watch */
383 } __attribute__ ((packed)) watch; 433 } __attribute__ ((packed)) watch;
384}; 434 struct {
435 __le64 offset, length;
436 __le64 src_offset;
437 } __attribute__ ((packed)) clonerange;
438 };
385 __le32 payload_len; 439 __le32 payload_len;
386} __attribute__ ((packed)); 440} __attribute__ ((packed));
387 441
@@ -424,5 +478,4 @@ struct ceph_osd_reply_head {
424} __attribute__ ((packed)); 478} __attribute__ ((packed));
425 479
426 480
427
428#endif 481#endif
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 25baa287cff7..6a1101f24cfb 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -162,6 +162,8 @@ struct crush_map {
162 __u32 choose_local_fallback_tries; 162 __u32 choose_local_fallback_tries;
163 /* choose attempts before giving up */ 163 /* choose attempts before giving up */
164 __u32 choose_total_tries; 164 __u32 choose_total_tries;
165 /* attempt chooseleaf inner descent once; on failure retry outer descent */
166 __u32 chooseleaf_descend_once;
165}; 167};
166 168
167 169