aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/ceph
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /include/linux/ceph
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'include/linux/ceph')
-rw-r--r--include/linux/ceph/auth.h92
-rw-r--r--include/linux/ceph/buffer.h39
-rw-r--r--include/linux/ceph/ceph_debug.h38
-rw-r--r--include/linux/ceph/ceph_frag.h109
-rw-r--r--include/linux/ceph/ceph_fs.h751
-rw-r--r--include/linux/ceph/ceph_hash.h13
-rw-r--r--include/linux/ceph/debugfs.h33
-rw-r--r--include/linux/ceph/decode.h201
-rw-r--r--include/linux/ceph/libceph.h249
-rw-r--r--include/linux/ceph/mdsmap.h62
-rw-r--r--include/linux/ceph/messenger.h257
-rw-r--r--include/linux/ceph/mon_client.h122
-rw-r--r--include/linux/ceph/msgpool.h25
-rw-r--r--include/linux/ceph/msgr.h175
-rw-r--r--include/linux/ceph/osd_client.h290
-rw-r--r--include/linux/ceph/osdmap.h130
-rw-r--r--include/linux/ceph/pagelist.h75
-rw-r--r--include/linux/ceph/rados.h426
-rw-r--r--include/linux/ceph/types.h29
19 files changed, 3116 insertions, 0 deletions
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
new file mode 100644
index 000000000000..aa13392a7efb
--- /dev/null
+++ b/include/linux/ceph/auth.h
@@ -0,0 +1,92 @@
1#ifndef _FS_CEPH_AUTH_H
2#define _FS_CEPH_AUTH_H
3
4#include <linux/ceph/types.h>
5#include <linux/ceph/buffer.h>
6
7/*
8 * Abstract interface for communicating with the authenticate module.
9 * There is some handshake that takes place between us and the monitor
10 * to acquire the necessary keys. These are used to generate an
11 * 'authorizer' that we use when connecting to a service (mds, osd).
12 */
13
14struct ceph_auth_client;
15struct ceph_authorizer;
16
17struct ceph_auth_client_ops {
18 const char *name;
19
20 /*
21 * true if we are authenticated and can connect to
22 * services.
23 */
24 int (*is_authenticated)(struct ceph_auth_client *ac);
25
26 /*
27 * true if we should (re)authenticate, e.g., when our tickets
28 * are getting old and crusty.
29 */
30 int (*should_authenticate)(struct ceph_auth_client *ac);
31
32 /*
33 * build requests and process replies during monitor
34 * handshake. if handle_reply returns -EAGAIN, we build
35 * another request.
36 */
37 int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
38 int (*handle_reply)(struct ceph_auth_client *ac, int result,
39 void *buf, void *end);
40
41 /*
42 * Create authorizer for connecting to a service, and verify
43 * the response to authenticate the service.
44 */
45 int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
46 struct ceph_authorizer **a,
47 void **buf, size_t *len,
48 void **reply_buf, size_t *reply_len);
49 int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
50 struct ceph_authorizer *a, size_t len);
51 void (*destroy_authorizer)(struct ceph_auth_client *ac,
52 struct ceph_authorizer *a);
53 void (*invalidate_authorizer)(struct ceph_auth_client *ac,
54 int peer_type);
55
56 /* reset when we (re)connect to a monitor */
57 void (*reset)(struct ceph_auth_client *ac);
58
59 void (*destroy)(struct ceph_auth_client *ac);
60};
61
62struct ceph_auth_client {
63 u32 protocol; /* CEPH_AUTH_* */
64 void *private; /* for use by protocol implementation */
65 const struct ceph_auth_client_ops *ops; /* null iff protocol==0 */
66
67 bool negotiating; /* true if negotiating protocol */
68 const char *name; /* entity name */
69 u64 global_id; /* our unique id in system */
70 const struct ceph_crypto_key *key; /* our secret key */
71 unsigned want_keys; /* which services we want */
72};
73
74extern struct ceph_auth_client *ceph_auth_init(const char *name,
75 const struct ceph_crypto_key *key);
76extern void ceph_auth_destroy(struct ceph_auth_client *ac);
77
78extern void ceph_auth_reset(struct ceph_auth_client *ac);
79
80extern int ceph_auth_build_hello(struct ceph_auth_client *ac,
81 void *buf, size_t len);
82extern int ceph_handle_auth_reply(struct ceph_auth_client *ac,
83 void *buf, size_t len,
84 void *reply_buf, size_t reply_len);
85extern int ceph_entity_name_encode(const char *name, void **p, void *end);
86
87extern int ceph_build_auth(struct ceph_auth_client *ac,
88 void *msg_buf, size_t msg_len);
89
90extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
91
92#endif
diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h
new file mode 100644
index 000000000000..58d19014068f
--- /dev/null
+++ b/include/linux/ceph/buffer.h
@@ -0,0 +1,39 @@
1#ifndef __FS_CEPH_BUFFER_H
2#define __FS_CEPH_BUFFER_H
3
4#include <linux/kref.h>
5#include <linux/mm.h>
6#include <linux/vmalloc.h>
7#include <linux/types.h>
8#include <linux/uio.h>
9
10/*
11 * a simple reference counted buffer.
12 *
13 * use kmalloc for small sizes (<= one page), vmalloc for larger
14 * sizes.
15 */
16struct ceph_buffer {
17 struct kref kref;
18 struct kvec vec;
19 size_t alloc_len;
20 bool is_vmalloc;
21};
22
23extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp);
24extern void ceph_buffer_release(struct kref *kref);
25
26static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b)
27{
28 kref_get(&b->kref);
29 return b;
30}
31
32static inline void ceph_buffer_put(struct ceph_buffer *b)
33{
34 kref_put(&b->kref, ceph_buffer_release);
35}
36
37extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end);
38
39#endif
diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h
new file mode 100644
index 000000000000..aa2e19182d99
--- /dev/null
+++ b/include/linux/ceph/ceph_debug.h
@@ -0,0 +1,38 @@
1#ifndef _FS_CEPH_DEBUG_H
2#define _FS_CEPH_DEBUG_H
3
4#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
5
6#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
7
8/*
9 * wrap pr_debug to include a filename:lineno prefix on each line.
10 * this incurs some overhead (kernel size and execution time) due to
11 * the extra function call at each call site.
12 */
13
14# if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
15extern const char *ceph_file_part(const char *s, int len);
16# define dout(fmt, ...) \
17 pr_debug("%.*s %12.12s:%-4d : " fmt, \
18 8 - (int)sizeof(KBUILD_MODNAME), " ", \
19 ceph_file_part(__FILE__, sizeof(__FILE__)), \
20 __LINE__, ##__VA_ARGS__)
21# else
22/* faux printk call just to see any compiler warnings. */
23# define dout(fmt, ...) do { \
24 if (0) \
25 printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
26 } while (0)
27# endif
28
29#else
30
31/*
32 * or, just wrap pr_debug
33 */
34# define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__)
35
36#endif
37
38#endif
diff --git a/include/linux/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h
new file mode 100644
index 000000000000..5babb8e95352
--- /dev/null
+++ b/include/linux/ceph/ceph_frag.h
@@ -0,0 +1,109 @@
1#ifndef FS_CEPH_FRAG_H
2#define FS_CEPH_FRAG_H
3
4/*
5 * "Frags" are a way to describe a subset of a 32-bit number space,
6 * using a mask and a value to match against that mask. Any given frag
7 * (subset of the number space) can be partitioned into 2^n sub-frags.
8 *
9 * Frags are encoded into a 32-bit word:
10 * 8 upper bits = "bits"
11 * 24 lower bits = "value"
12 * (We could go to 5+27 bits, but who cares.)
13 *
14 * We use the _most_ significant bits of the 24 bit value. This makes
15 * values logically sort.
16 *
17 * Unfortunately, because the "bits" field is still in the high bits, we
18 * can't sort encoded frags numerically. However, it does allow you
19 * to feed encoded frags as values into frag_contains_value.
20 */
21static inline __u32 ceph_frag_make(__u32 b, __u32 v)
22{
23 return (b << 24) |
24 (v & (0xffffffu << (24-b)) & 0xffffffu);
25}
26static inline __u32 ceph_frag_bits(__u32 f)
27{
28 return f >> 24;
29}
30static inline __u32 ceph_frag_value(__u32 f)
31{
32 return f & 0xffffffu;
33}
34static inline __u32 ceph_frag_mask(__u32 f)
35{
36 return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu;
37}
38static inline __u32 ceph_frag_mask_shift(__u32 f)
39{
40 return 24 - ceph_frag_bits(f);
41}
42
43static inline int ceph_frag_contains_value(__u32 f, __u32 v)
44{
45 return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
46}
47static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
48{
49 /* is sub as specific as us, and contained by us? */
50 return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
51 (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
52}
53
54static inline __u32 ceph_frag_parent(__u32 f)
55{
56 return ceph_frag_make(ceph_frag_bits(f) - 1,
57 ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
58}
59static inline int ceph_frag_is_left_child(__u32 f)
60{
61 return ceph_frag_bits(f) > 0 &&
62 (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
63}
64static inline int ceph_frag_is_right_child(__u32 f)
65{
66 return ceph_frag_bits(f) > 0 &&
67 (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
68}
69static inline __u32 ceph_frag_sibling(__u32 f)
70{
71 return ceph_frag_make(ceph_frag_bits(f),
72 ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
73}
74static inline __u32 ceph_frag_left_child(__u32 f)
75{
76 return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
77}
78static inline __u32 ceph_frag_right_child(__u32 f)
79{
80 return ceph_frag_make(ceph_frag_bits(f)+1,
81 ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
82}
83static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
84{
85 int newbits = ceph_frag_bits(f) + by;
86 return ceph_frag_make(newbits,
87 ceph_frag_value(f) | (i << (24 - newbits)));
88}
89static inline int ceph_frag_is_leftmost(__u32 f)
90{
91 return ceph_frag_value(f) == 0;
92}
93static inline int ceph_frag_is_rightmost(__u32 f)
94{
95 return ceph_frag_value(f) == ceph_frag_mask(f);
96}
97static inline __u32 ceph_frag_next(__u32 f)
98{
99 return ceph_frag_make(ceph_frag_bits(f),
100 ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f)));
101}
102
103/*
104 * comparator to sort frags logically, as when traversing the
105 * number space in ascending order...
106 */
107int ceph_frag_compare(__u32 a, __u32 b);
108
109#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
new file mode 100644
index 000000000000..b8c60694b2b0
--- /dev/null
+++ b/include/linux/ceph/ceph_fs.h
@@ -0,0 +1,751 @@
1/*
2 * ceph_fs.h - Ceph constants and data types to share between kernel and
3 * user space.
4 *
5 * Most types in this file are defined as little-endian, and are
6 * primarily intended to describe data structures that pass over the
7 * wire or that are stored on disk.
8 *
9 * LGPL2
10 */
11
12#ifndef CEPH_FS_H
13#define CEPH_FS_H
14
15#include "msgr.h"
16#include "rados.h"
17
18/*
19 * subprotocol versions. when specific messages types or high-level
20 * protocols change, bump the affected components. we keep rev
21 * internal cluster protocols separately from the public,
22 * client-facing protocol.
23 */
24#define CEPH_OSD_PROTOCOL 8 /* cluster internal */
25#define CEPH_MDS_PROTOCOL 12 /* cluster internal */
26#define CEPH_MON_PROTOCOL 5 /* cluster internal */
27#define CEPH_OSDC_PROTOCOL 24 /* server/client */
28#define CEPH_MDSC_PROTOCOL 32 /* server/client */
29#define CEPH_MONC_PROTOCOL 15 /* server/client */
30
31
32#define CEPH_INO_ROOT 1
33#define CEPH_INO_CEPH 2 /* hidden .ceph dir */
34
35/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
36#define CEPH_MAX_MON 31
37
38
39/*
40 * feature bits
41 */
42#define CEPH_FEATURE_UID (1<<0)
43#define CEPH_FEATURE_NOSRCADDR (1<<1)
44#define CEPH_FEATURE_MONCLOCKCHECK (1<<2)
45#define CEPH_FEATURE_FLOCK (1<<3)
46#define CEPH_FEATURE_SUBSCRIBE2 (1<<4)
47#define CEPH_FEATURE_MONNAMES (1<<5)
48#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
49#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
50
51
52/*
53 * ceph_file_layout - describe data layout for a file/inode
54 */
55struct ceph_file_layout {
56 /* file -> object mapping */
57 __le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple
58 of page size. */
59 __le32 fl_stripe_count; /* over this many objects */
60 __le32 fl_object_size; /* until objects are this big, then move to
61 new objects */
62 __le32 fl_cas_hash; /* UNUSED. 0 = none; 1 = sha256 */
63
64 /* pg -> disk layout */
65 __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */
66
67 /* object -> pg layout */
68 __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
69 __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
70} __attribute__ ((packed));
71
72#define CEPH_MIN_STRIPE_UNIT 65536
73
74int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
75
76struct ceph_dir_layout {
77 __u8 dl_dir_hash; /* see ceph_hash.h for ids */
78 __u8 dl_unused1;
79 __u16 dl_unused2;
80 __u32 dl_unused3;
81} __attribute__ ((packed));
82
83/* crypto algorithms */
84#define CEPH_CRYPTO_NONE 0x0
85#define CEPH_CRYPTO_AES 0x1
86
87#define CEPH_AES_IV "cephsageyudagreg"
88
89/* security/authentication protocols */
90#define CEPH_AUTH_UNKNOWN 0x0
91#define CEPH_AUTH_NONE 0x1
92#define CEPH_AUTH_CEPHX 0x2
93
94#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
95
96
97/*********************************************
98 * message layer
99 */
100
101/*
102 * message types
103 */
104
105/* misc */
106#define CEPH_MSG_SHUTDOWN 1
107#define CEPH_MSG_PING 2
108
109/* client <-> monitor */
110#define CEPH_MSG_MON_MAP 4
111#define CEPH_MSG_MON_GET_MAP 5
112#define CEPH_MSG_STATFS 13
113#define CEPH_MSG_STATFS_REPLY 14
114#define CEPH_MSG_MON_SUBSCRIBE 15
115#define CEPH_MSG_MON_SUBSCRIBE_ACK 16
116#define CEPH_MSG_AUTH 17
117#define CEPH_MSG_AUTH_REPLY 18
118
119/* client <-> mds */
120#define CEPH_MSG_MDS_MAP 21
121
122#define CEPH_MSG_CLIENT_SESSION 22
123#define CEPH_MSG_CLIENT_RECONNECT 23
124
125#define CEPH_MSG_CLIENT_REQUEST 24
126#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
127#define CEPH_MSG_CLIENT_REPLY 26
128#define CEPH_MSG_CLIENT_CAPS 0x310
129#define CEPH_MSG_CLIENT_LEASE 0x311
130#define CEPH_MSG_CLIENT_SNAP 0x312
131#define CEPH_MSG_CLIENT_CAPRELEASE 0x313
132
133/* pool ops */
134#define CEPH_MSG_POOLOP_REPLY 48
135#define CEPH_MSG_POOLOP 49
136
137
138/* osd */
139#define CEPH_MSG_OSD_MAP 41
140#define CEPH_MSG_OSD_OP 42
141#define CEPH_MSG_OSD_OPREPLY 43
142#define CEPH_MSG_WATCH_NOTIFY 44
143
144
145/* watch-notify operations */
146enum {
147 WATCH_NOTIFY = 1, /* notifying watcher */
148 WATCH_NOTIFY_COMPLETE = 2, /* notifier notified when done */
149};
150
151
152/* pool operations */
153enum {
154 POOL_OP_CREATE = 0x01,
155 POOL_OP_DELETE = 0x02,
156 POOL_OP_AUID_CHANGE = 0x03,
157 POOL_OP_CREATE_SNAP = 0x11,
158 POOL_OP_DELETE_SNAP = 0x12,
159 POOL_OP_CREATE_UNMANAGED_SNAP = 0x21,
160 POOL_OP_DELETE_UNMANAGED_SNAP = 0x22,
161};
162
163struct ceph_mon_request_header {
164 __le64 have_version;
165 __le16 session_mon;
166 __le64 session_mon_tid;
167} __attribute__ ((packed));
168
169struct ceph_mon_statfs {
170 struct ceph_mon_request_header monhdr;
171 struct ceph_fsid fsid;
172} __attribute__ ((packed));
173
174struct ceph_statfs {
175 __le64 kb, kb_used, kb_avail;
176 __le64 num_objects;
177} __attribute__ ((packed));
178
179struct ceph_mon_statfs_reply {
180 struct ceph_fsid fsid;
181 __le64 version;
182 struct ceph_statfs st;
183} __attribute__ ((packed));
184
185const char *ceph_pool_op_name(int op);
186
187struct ceph_mon_poolop {
188 struct ceph_mon_request_header monhdr;
189 struct ceph_fsid fsid;
190 __le32 pool;
191 __le32 op;
192 __le64 auid;
193 __le64 snapid;
194 __le32 name_len;
195} __attribute__ ((packed));
196
197struct ceph_mon_poolop_reply {
198 struct ceph_mon_request_header monhdr;
199 struct ceph_fsid fsid;
200 __le32 reply_code;
201 __le32 epoch;
202 char has_data;
203 char data[0];
204} __attribute__ ((packed));
205
206struct ceph_mon_unmanaged_snap {
207 __le64 snapid;
208} __attribute__ ((packed));
209
210struct ceph_osd_getmap {
211 struct ceph_mon_request_header monhdr;
212 struct ceph_fsid fsid;
213 __le32 start;
214} __attribute__ ((packed));
215
216struct ceph_mds_getmap {
217 struct ceph_mon_request_header monhdr;
218 struct ceph_fsid fsid;
219} __attribute__ ((packed));
220
221struct ceph_client_mount {
222 struct ceph_mon_request_header monhdr;
223} __attribute__ ((packed));
224
225#define CEPH_SUBSCRIBE_ONETIME 1 /* i want only 1 update after have */
226
227struct ceph_mon_subscribe_item {
228 __le64 have_version; __le64 have;
229 __u8 onetime;
230} __attribute__ ((packed));
231
232struct ceph_mon_subscribe_ack {
233 __le32 duration; /* seconds */
234 struct ceph_fsid fsid;
235} __attribute__ ((packed));
236
237/*
238 * mds states
239 * > 0 -> in
240 * <= 0 -> out
241 */
242#define CEPH_MDS_STATE_DNE 0 /* down, does not exist. */
243#define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees.
244 empty log. */
245#define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. */
246#define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment. */
247#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */
248#define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */
249#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */
250
251#define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */
252#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed
253 operations (import, rename, etc.) */
254#define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */
255#define CEPH_MDS_STATE_REJOIN 11 /* up, rejoining distributed cache */
256#define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */
257#define CEPH_MDS_STATE_ACTIVE 13 /* up, active */
258#define CEPH_MDS_STATE_STOPPING 14 /* up, but exporting metadata */
259
260extern const char *ceph_mds_state_name(int s);
261
262
263/*
264 * metadata lock types.
265 * - these are bitmasks.. we can compose them
266 * - they also define the lock ordering by the MDS
267 * - a few of these are internal to the mds
268 */
269#define CEPH_LOCK_DVERSION 1
270#define CEPH_LOCK_DN 2
271#define CEPH_LOCK_ISNAP 16
272#define CEPH_LOCK_IVERSION 32 /* mds internal */
273#define CEPH_LOCK_IFILE 64
274#define CEPH_LOCK_IAUTH 128
275#define CEPH_LOCK_ILINK 256
276#define CEPH_LOCK_IDFT 512 /* dir frag tree */
277#define CEPH_LOCK_INEST 1024 /* mds internal */
278#define CEPH_LOCK_IXATTR 2048
279#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */
280#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */
281
282/* client_session ops */
283enum {
284 CEPH_SESSION_REQUEST_OPEN,
285 CEPH_SESSION_OPEN,
286 CEPH_SESSION_REQUEST_CLOSE,
287 CEPH_SESSION_CLOSE,
288 CEPH_SESSION_REQUEST_RENEWCAPS,
289 CEPH_SESSION_RENEWCAPS,
290 CEPH_SESSION_STALE,
291 CEPH_SESSION_RECALL_STATE,
292};
293
294extern const char *ceph_session_op_name(int op);
295
296struct ceph_mds_session_head {
297 __le32 op;
298 __le64 seq;
299 struct ceph_timespec stamp;
300 __le32 max_caps, max_leases;
301} __attribute__ ((packed));
302
303/* client_request */
304/*
305 * metadata ops.
306 * & 0x001000 -> write op
307 * & 0x010000 -> follow symlink (e.g. stat(), not lstat()).
308 & & 0x100000 -> use weird ino/path trace
309 */
310#define CEPH_MDS_OP_WRITE 0x001000
311enum {
312 CEPH_MDS_OP_LOOKUP = 0x00100,
313 CEPH_MDS_OP_GETATTR = 0x00101,
314 CEPH_MDS_OP_LOOKUPHASH = 0x00102,
315 CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
316 CEPH_MDS_OP_LOOKUPINO = 0x00104,
317
318 CEPH_MDS_OP_SETXATTR = 0x01105,
319 CEPH_MDS_OP_RMXATTR = 0x01106,
320 CEPH_MDS_OP_SETLAYOUT = 0x01107,
321 CEPH_MDS_OP_SETATTR = 0x01108,
322 CEPH_MDS_OP_SETFILELOCK= 0x01109,
323 CEPH_MDS_OP_GETFILELOCK= 0x00110,
324 CEPH_MDS_OP_SETDIRLAYOUT=0x0110a,
325
326 CEPH_MDS_OP_MKNOD = 0x01201,
327 CEPH_MDS_OP_LINK = 0x01202,
328 CEPH_MDS_OP_UNLINK = 0x01203,
329 CEPH_MDS_OP_RENAME = 0x01204,
330 CEPH_MDS_OP_MKDIR = 0x01220,
331 CEPH_MDS_OP_RMDIR = 0x01221,
332 CEPH_MDS_OP_SYMLINK = 0x01222,
333
334 CEPH_MDS_OP_CREATE = 0x01301,
335 CEPH_MDS_OP_OPEN = 0x00302,
336 CEPH_MDS_OP_READDIR = 0x00305,
337
338 CEPH_MDS_OP_LOOKUPSNAP = 0x00400,
339 CEPH_MDS_OP_MKSNAP = 0x01400,
340 CEPH_MDS_OP_RMSNAP = 0x01401,
341 CEPH_MDS_OP_LSSNAP = 0x00402,
342};
343
344extern const char *ceph_mds_op_name(int op);
345
346
347#define CEPH_SETATTR_MODE 1
348#define CEPH_SETATTR_UID 2
349#define CEPH_SETATTR_GID 4
350#define CEPH_SETATTR_MTIME 8
351#define CEPH_SETATTR_ATIME 16
352#define CEPH_SETATTR_SIZE 32
353#define CEPH_SETATTR_CTIME 64
354
355union ceph_mds_request_args {
356 struct {
357 __le32 mask; /* CEPH_CAP_* */
358 } __attribute__ ((packed)) getattr;
359 struct {
360 __le32 mode;
361 __le32 uid;
362 __le32 gid;
363 struct ceph_timespec mtime;
364 struct ceph_timespec atime;
365 __le64 size, old_size; /* old_size needed by truncate */
366 __le32 mask; /* CEPH_SETATTR_* */
367 } __attribute__ ((packed)) setattr;
368 struct {
369 __le32 frag; /* which dir fragment */
370 __le32 max_entries; /* how many dentries to grab */
371 __le32 max_bytes;
372 } __attribute__ ((packed)) readdir;
373 struct {
374 __le32 mode;
375 __le32 rdev;
376 } __attribute__ ((packed)) mknod;
377 struct {
378 __le32 mode;
379 } __attribute__ ((packed)) mkdir;
380 struct {
381 __le32 flags;
382 __le32 mode;
383 __le32 stripe_unit; /* layout for newly created file */
384 __le32 stripe_count; /* ... */
385 __le32 object_size;
386 __le32 file_replication;
387 __le32 preferred;
388 } __attribute__ ((packed)) open;
389 struct {
390 __le32 flags;
391 } __attribute__ ((packed)) setxattr;
392 struct {
393 struct ceph_file_layout layout;
394 } __attribute__ ((packed)) setlayout;
395 struct {
396 __u8 rule; /* currently fcntl or flock */
397 __u8 type; /* shared, exclusive, remove*/
398 __le64 pid; /* process id requesting the lock */
399 __le64 pid_namespace;
400 __le64 start; /* initial location to lock */
401 __le64 length; /* num bytes to lock from start */
402 __u8 wait; /* will caller wait for lock to become available? */
403 } __attribute__ ((packed)) filelock_change;
404} __attribute__ ((packed));
405
406#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
407#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
408
409struct ceph_mds_request_head {
410 __le64 oldest_client_tid;
411 __le32 mdsmap_epoch; /* on client */
412 __le32 flags; /* CEPH_MDS_FLAG_* */
413 __u8 num_retry, num_fwd; /* count retry, fwd attempts */
414 __le16 num_releases; /* # include cap/lease release records */
415 __le32 op; /* mds op code */
416 __le32 caller_uid, caller_gid;
417 __le64 ino; /* use this ino for openc, mkdir, mknod,
418 etc. (if replaying) */
419 union ceph_mds_request_args args;
420} __attribute__ ((packed));
421
422/* cap/lease release record */
423struct ceph_mds_request_release {
424 __le64 ino, cap_id; /* ino and unique cap id */
425 __le32 caps, wanted; /* new issued, wanted */
426 __le32 seq, issue_seq, mseq;
427 __le32 dname_seq; /* if releasing a dentry lease, a */
428 __le32 dname_len; /* string follows. */
429} __attribute__ ((packed));
430
431/* client reply */
432struct ceph_mds_reply_head {
433 __le32 op;
434 __le32 result;
435 __le32 mdsmap_epoch;
436 __u8 safe; /* true if committed to disk */
437 __u8 is_dentry, is_target; /* true if dentry, target inode records
438 are included with reply */
439} __attribute__ ((packed));
440
441/* one for each node split */
442struct ceph_frag_tree_split {
443 __le32 frag; /* this frag splits... */
444 __le32 by; /* ...by this many bits */
445} __attribute__ ((packed));
446
447struct ceph_frag_tree_head {
448 __le32 nsplits; /* num ceph_frag_tree_split records */
449 struct ceph_frag_tree_split splits[];
450} __attribute__ ((packed));
451
452/* capability issue, for bundling with mds reply */
453struct ceph_mds_reply_cap {
454 __le32 caps, wanted; /* caps issued, wanted */
455 __le64 cap_id;
456 __le32 seq, mseq;
457 __le64 realm; /* snap realm */
458 __u8 flags; /* CEPH_CAP_FLAG_* */
459} __attribute__ ((packed));
460
461#define CEPH_CAP_FLAG_AUTH 1 /* cap is issued by auth mds */
462
463/* inode record, for bundling with mds reply */
464struct ceph_mds_reply_inode {
465 __le64 ino;
466 __le64 snapid;
467 __le32 rdev;
468 __le64 version; /* inode version */
469 __le64 xattr_version; /* version for xattr blob */
470 struct ceph_mds_reply_cap cap; /* caps issued for this inode */
471 struct ceph_file_layout layout;
472 struct ceph_timespec ctime, mtime, atime;
473 __le32 time_warp_seq;
474 __le64 size, max_size, truncate_size;
475 __le32 truncate_seq;
476 __le32 mode, uid, gid;
477 __le32 nlink;
478 __le64 files, subdirs, rbytes, rfiles, rsubdirs; /* dir stats */
479 struct ceph_timespec rctime;
480 struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */
481} __attribute__ ((packed));
482/* followed by frag array, symlink string, dir layout, xattr blob */
483
484/* reply_lease follows dname, and reply_inode */
485struct ceph_mds_reply_lease {
486 __le16 mask; /* lease type(s) */
487 __le32 duration_ms; /* lease duration */
488 __le32 seq;
489} __attribute__ ((packed));
490
491struct ceph_mds_reply_dirfrag {
492 __le32 frag; /* fragment */
493 __le32 auth; /* auth mds, if this is a delegation point */
494 __le32 ndist; /* number of mds' this is replicated on */
495 __le32 dist[];
496} __attribute__ ((packed));
497
498#define CEPH_LOCK_FCNTL 1
499#define CEPH_LOCK_FLOCK 2
500
501#define CEPH_LOCK_SHARED 1
502#define CEPH_LOCK_EXCL 2
503#define CEPH_LOCK_UNLOCK 4
504
505struct ceph_filelock {
506 __le64 start;/* file offset to start lock at */
507 __le64 length; /* num bytes to lock; 0 for all following start */
508 __le64 client; /* which client holds the lock */
509 __le64 pid; /* process id holding the lock on the client */
510 __le64 pid_namespace;
511 __u8 type; /* shared lock, exclusive lock, or unlock */
512} __attribute__ ((packed));
513
514
515/* file access modes */
516#define CEPH_FILE_MODE_PIN 0
517#define CEPH_FILE_MODE_RD 1
518#define CEPH_FILE_MODE_WR 2
519#define CEPH_FILE_MODE_RDWR 3 /* RD | WR */
520#define CEPH_FILE_MODE_LAZY 4 /* lazy io */
521#define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */
522
523int ceph_flags_to_mode(int flags);
524
525
526/* capability bits */
527#define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */
528
529/* generic cap bits */
530#define CEPH_CAP_GSHARED 1 /* client can reads */
531#define CEPH_CAP_GEXCL 2 /* client can read and update */
532#define CEPH_CAP_GCACHE 4 /* (file) client can cache reads */
533#define CEPH_CAP_GRD 8 /* (file) client can read */
534#define CEPH_CAP_GWR 16 /* (file) client can write */
535#define CEPH_CAP_GBUFFER 32 /* (file) client can buffer writes */
536#define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */
537#define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */
538
539/* per-lock shift */
540#define CEPH_CAP_SAUTH 2
541#define CEPH_CAP_SLINK 4
542#define CEPH_CAP_SXATTR 6
543#define CEPH_CAP_SFILE 8
544#define CEPH_CAP_SFLOCK 20
545
546#define CEPH_CAP_BITS 22
547
548/* composed values */
549#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH)
550#define CEPH_CAP_AUTH_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SAUTH)
551#define CEPH_CAP_LINK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SLINK)
552#define CEPH_CAP_LINK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SLINK)
553#define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SXATTR)
554#define CEPH_CAP_XATTR_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR)
555#define CEPH_CAP_FILE(x) (x << CEPH_CAP_SFILE)
556#define CEPH_CAP_FILE_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFILE)
557#define CEPH_CAP_FILE_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFILE)
558#define CEPH_CAP_FILE_CACHE (CEPH_CAP_GCACHE << CEPH_CAP_SFILE)
559#define CEPH_CAP_FILE_RD (CEPH_CAP_GRD << CEPH_CAP_SFILE)
560#define CEPH_CAP_FILE_WR (CEPH_CAP_GWR << CEPH_CAP_SFILE)
561#define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE)
562#define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE)
563#define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE)
564#define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK)
565#define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK)
566
567
568/* cap masks (for getattr) */
569#define CEPH_STAT_CAP_INODE CEPH_CAP_PIN
570#define CEPH_STAT_CAP_TYPE CEPH_CAP_PIN /* mode >> 12 */
571#define CEPH_STAT_CAP_SYMLINK CEPH_CAP_PIN
572#define CEPH_STAT_CAP_UID CEPH_CAP_AUTH_SHARED
573#define CEPH_STAT_CAP_GID CEPH_CAP_AUTH_SHARED
574#define CEPH_STAT_CAP_MODE CEPH_CAP_AUTH_SHARED
575#define CEPH_STAT_CAP_NLINK CEPH_CAP_LINK_SHARED
576#define CEPH_STAT_CAP_LAYOUT CEPH_CAP_FILE_SHARED
577#define CEPH_STAT_CAP_MTIME CEPH_CAP_FILE_SHARED
578#define CEPH_STAT_CAP_SIZE CEPH_CAP_FILE_SHARED
579#define CEPH_STAT_CAP_ATIME CEPH_CAP_FILE_SHARED /* fixme */
580#define CEPH_STAT_CAP_XATTR CEPH_CAP_XATTR_SHARED
581#define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN | \
582 CEPH_CAP_AUTH_SHARED | \
583 CEPH_CAP_LINK_SHARED | \
584 CEPH_CAP_FILE_SHARED | \
585 CEPH_CAP_XATTR_SHARED)
586
587#define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \
588 CEPH_CAP_LINK_SHARED | \
589 CEPH_CAP_XATTR_SHARED | \
590 CEPH_CAP_FILE_SHARED)
591#define CEPH_CAP_ANY_RD (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD | \
592 CEPH_CAP_FILE_CACHE)
593
594#define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL | \
595 CEPH_CAP_LINK_EXCL | \
596 CEPH_CAP_XATTR_EXCL | \
597 CEPH_CAP_FILE_EXCL)
598#define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \
599 CEPH_CAP_FILE_EXCL)
600#define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)
601#define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \
602 CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \
603 CEPH_CAP_PIN)
604
605#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \
606 CEPH_LOCK_IXATTR)
607
608int ceph_caps_for_mode(int mode);
609
610enum {
611 CEPH_CAP_OP_GRANT, /* mds->client grant */
612 CEPH_CAP_OP_REVOKE, /* mds->client revoke */
613 CEPH_CAP_OP_TRUNC, /* mds->client trunc notify */
614 CEPH_CAP_OP_EXPORT, /* mds has exported the cap */
615 CEPH_CAP_OP_IMPORT, /* mds has imported the cap */
616 CEPH_CAP_OP_UPDATE, /* client->mds update */
617 CEPH_CAP_OP_DROP, /* client->mds drop cap bits */
618 CEPH_CAP_OP_FLUSH, /* client->mds cap writeback */
619 CEPH_CAP_OP_FLUSH_ACK, /* mds->client flushed */
620 CEPH_CAP_OP_FLUSHSNAP, /* client->mds flush snapped metadata */
621 CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */
622 CEPH_CAP_OP_RELEASE, /* client->mds release (clean) cap */
623 CEPH_CAP_OP_RENEW, /* client->mds renewal request */
624};
625
626extern const char *ceph_cap_op_name(int op);
627
628/*
629 * caps message, used for capability callbacks, acks, requests, etc.
630 */
631struct ceph_mds_caps {
632 __le32 op; /* CEPH_CAP_OP_* */
633 __le64 ino, realm;
634 __le64 cap_id;
635 __le32 seq, issue_seq;
636 __le32 caps, wanted, dirty; /* latest issued/wanted/dirty */
637 __le32 migrate_seq;
638 __le64 snap_follows;
639 __le32 snap_trace_len;
640
641 /* authlock */
642 __le32 uid, gid, mode;
643
644 /* linklock */
645 __le32 nlink;
646
647 /* xattrlock */
648 __le32 xattr_len;
649 __le64 xattr_version;
650
651 /* filelock */
652 __le64 size, max_size, truncate_size;
653 __le32 truncate_seq;
654 struct ceph_timespec mtime, atime, ctime;
655 struct ceph_file_layout layout;
656 __le32 time_warp_seq;
657} __attribute__ ((packed));
658
659/* cap release msg head */
660struct ceph_mds_cap_release {
661 __le32 num; /* number of cap_items that follow */
662} __attribute__ ((packed));
663
664struct ceph_mds_cap_item {
665 __le64 ino;
666 __le64 cap_id;
667 __le32 migrate_seq, seq;
668} __attribute__ ((packed));
669
670#define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */
671#define CEPH_MDS_LEASE_RELEASE 2 /* client -> mds */
672#define CEPH_MDS_LEASE_RENEW 3 /* client <-> mds */
673#define CEPH_MDS_LEASE_REVOKE_ACK 4 /* client -> mds */
674
675extern const char *ceph_lease_op_name(int o);
676
677/* lease msg header */
678struct ceph_mds_lease {
679 __u8 action; /* CEPH_MDS_LEASE_* */
680 __le16 mask; /* which lease */
681 __le64 ino;
682 __le64 first, last; /* snap range */
683 __le32 seq;
684 __le32 duration_ms; /* duration of renewal */
685} __attribute__ ((packed));
686/* followed by a __le32+string for dname */
687
688/* client reconnect */
689struct ceph_mds_cap_reconnect {
690 __le64 cap_id;
691 __le32 wanted;
692 __le32 issued;
693 __le64 snaprealm;
694 __le64 pathbase; /* base ino for our path to this ino */
695 __le32 flock_len; /* size of flock state blob, if any */
696} __attribute__ ((packed));
697/* followed by flock blob */
698
699struct ceph_mds_cap_reconnect_v1 {
700 __le64 cap_id;
701 __le32 wanted;
702 __le32 issued;
703 __le64 size;
704 struct ceph_timespec mtime, atime;
705 __le64 snaprealm;
706 __le64 pathbase; /* base ino for our path to this ino */
707} __attribute__ ((packed));
708
709struct ceph_mds_snaprealm_reconnect {
710 __le64 ino; /* snap realm base */
711 __le64 seq; /* snap seq for this snap realm */
712 __le64 parent; /* parent realm */
713} __attribute__ ((packed));
714
715/*
716 * snaps
717 */
718enum {
719 CEPH_SNAP_OP_UPDATE, /* CREATE or DESTROY */
720 CEPH_SNAP_OP_CREATE,
721 CEPH_SNAP_OP_DESTROY,
722 CEPH_SNAP_OP_SPLIT,
723};
724
725extern const char *ceph_snap_op_name(int o);
726
727/* snap msg header */
728struct ceph_mds_snap_head {
729 __le32 op; /* CEPH_SNAP_OP_* */
730 __le64 split; /* ino to split off, if any */
731 __le32 num_split_inos; /* # inos belonging to new child realm */
732 __le32 num_split_realms; /* # child realms udner new child realm */
733 __le32 trace_len; /* size of snap trace blob */
734} __attribute__ ((packed));
735/* followed by split ino list, then split realms, then the trace blob */
736
737/*
738 * encode info about a snaprealm, as viewed by a client
739 */
740struct ceph_mds_snap_realm {
741 __le64 ino; /* ino */
742 __le64 created; /* snap: when created */
743 __le64 parent; /* ino: parent realm */
744 __le64 parent_since; /* snap: same parent since */
745 __le64 seq; /* snap: version */
746 __le32 num_snaps;
747 __le32 num_prior_parent_snaps;
748} __attribute__ ((packed));
749/* followed by my snap list, then prior parent snap list */
750
751#endif
diff --git a/include/linux/ceph/ceph_hash.h b/include/linux/ceph/ceph_hash.h
new file mode 100644
index 000000000000..d099c3f90236
--- /dev/null
+++ b/include/linux/ceph/ceph_hash.h
@@ -0,0 +1,13 @@
1#ifndef FS_CEPH_HASH_H
2#define FS_CEPH_HASH_H
3
4#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */
5#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */
6
7extern unsigned ceph_str_hash_linux(const char *s, unsigned len);
8extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len);
9
10extern unsigned ceph_str_hash(int type, const char *s, unsigned len);
11extern const char *ceph_str_hash_name(int type);
12
13#endif
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h
new file mode 100644
index 000000000000..2a79702e092b
--- /dev/null
+++ b/include/linux/ceph/debugfs.h
@@ -0,0 +1,33 @@
1#ifndef _FS_CEPH_DEBUGFS_H
2#define _FS_CEPH_DEBUGFS_H
3
4#include "ceph_debug.h"
5#include "types.h"
6
7#define CEPH_DEFINE_SHOW_FUNC(name) \
8static int name##_open(struct inode *inode, struct file *file) \
9{ \
10 struct seq_file *sf; \
11 int ret; \
12 \
13 ret = single_open(file, name, NULL); \
14 sf = file->private_data; \
15 sf->private = inode->i_private; \
16 return ret; \
17} \
18 \
19static const struct file_operations name##_fops = { \
20 .open = name##_open, \
21 .read = seq_read, \
22 .llseek = seq_lseek, \
23 .release = single_release, \
24};
25
26/* debugfs.c */
27extern int ceph_debugfs_init(void);
28extern void ceph_debugfs_cleanup(void);
29extern int ceph_debugfs_client_init(struct ceph_client *client);
30extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
31
32#endif
33
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
new file mode 100644
index 000000000000..c5b6939fb32a
--- /dev/null
+++ b/include/linux/ceph/decode.h
@@ -0,0 +1,201 @@
1#ifndef __CEPH_DECODE_H
2#define __CEPH_DECODE_H
3
4#include <asm/unaligned.h>
5#include <linux/time.h>
6
7#include "types.h"
8
9/*
10 * in all cases,
11 * void **p pointer to position pointer
12 * void *end pointer to end of buffer (last byte + 1)
13 */
14
15static inline u64 ceph_decode_64(void **p)
16{
17 u64 v = get_unaligned_le64(*p);
18 *p += sizeof(u64);
19 return v;
20}
21static inline u32 ceph_decode_32(void **p)
22{
23 u32 v = get_unaligned_le32(*p);
24 *p += sizeof(u32);
25 return v;
26}
27static inline u16 ceph_decode_16(void **p)
28{
29 u16 v = get_unaligned_le16(*p);
30 *p += sizeof(u16);
31 return v;
32}
33static inline u8 ceph_decode_8(void **p)
34{
35 u8 v = *(u8 *)*p;
36 (*p)++;
37 return v;
38}
39static inline void ceph_decode_copy(void **p, void *pv, size_t n)
40{
41 memcpy(pv, *p, n);
42 *p += n;
43}
44
45/*
46 * bounds check input.
47 */
48#define ceph_decode_need(p, end, n, bad) \
49 do { \
50 if (unlikely(*(p) + (n) > (end))) \
51 goto bad; \
52 } while (0)
53
54#define ceph_decode_64_safe(p, end, v, bad) \
55 do { \
56 ceph_decode_need(p, end, sizeof(u64), bad); \
57 v = ceph_decode_64(p); \
58 } while (0)
59#define ceph_decode_32_safe(p, end, v, bad) \
60 do { \
61 ceph_decode_need(p, end, sizeof(u32), bad); \
62 v = ceph_decode_32(p); \
63 } while (0)
64#define ceph_decode_16_safe(p, end, v, bad) \
65 do { \
66 ceph_decode_need(p, end, sizeof(u16), bad); \
67 v = ceph_decode_16(p); \
68 } while (0)
69#define ceph_decode_8_safe(p, end, v, bad) \
70 do { \
71 ceph_decode_need(p, end, sizeof(u8), bad); \
72 v = ceph_decode_8(p); \
73 } while (0)
74
75#define ceph_decode_copy_safe(p, end, pv, n, bad) \
76 do { \
77 ceph_decode_need(p, end, n, bad); \
78 ceph_decode_copy(p, pv, n); \
79 } while (0)
80
81/*
82 * struct ceph_timespec <-> struct timespec
83 */
84static inline void ceph_decode_timespec(struct timespec *ts,
85 const struct ceph_timespec *tv)
86{
87 ts->tv_sec = le32_to_cpu(tv->tv_sec);
88 ts->tv_nsec = le32_to_cpu(tv->tv_nsec);
89}
90static inline void ceph_encode_timespec(struct ceph_timespec *tv,
91 const struct timespec *ts)
92{
93 tv->tv_sec = cpu_to_le32(ts->tv_sec);
94 tv->tv_nsec = cpu_to_le32(ts->tv_nsec);
95}
96
97/*
98 * sockaddr_storage <-> ceph_sockaddr
99 */
100static inline void ceph_encode_addr(struct ceph_entity_addr *a)
101{
102 __be16 ss_family = htons(a->in_addr.ss_family);
103 a->in_addr.ss_family = *(__u16 *)&ss_family;
104}
105static inline void ceph_decode_addr(struct ceph_entity_addr *a)
106{
107 __be16 ss_family = *(__be16 *)&a->in_addr.ss_family;
108 a->in_addr.ss_family = ntohs(ss_family);
109 WARN_ON(a->in_addr.ss_family == 512);
110}
111
112/*
113 * encoders
114 */
115static inline void ceph_encode_64(void **p, u64 v)
116{
117 put_unaligned_le64(v, (__le64 *)*p);
118 *p += sizeof(u64);
119}
120static inline void ceph_encode_32(void **p, u32 v)
121{
122 put_unaligned_le32(v, (__le32 *)*p);
123 *p += sizeof(u32);
124}
125static inline void ceph_encode_16(void **p, u16 v)
126{
127 put_unaligned_le16(v, (__le16 *)*p);
128 *p += sizeof(u16);
129}
130static inline void ceph_encode_8(void **p, u8 v)
131{
132 *(u8 *)*p = v;
133 (*p)++;
134}
135static inline void ceph_encode_copy(void **p, const void *s, int len)
136{
137 memcpy(*p, s, len);
138 *p += len;
139}
140
141/*
142 * filepath, string encoders
143 */
144static inline void ceph_encode_filepath(void **p, void *end,
145 u64 ino, const char *path)
146{
147 u32 len = path ? strlen(path) : 0;
148 BUG_ON(*p + sizeof(ino) + sizeof(len) + len > end);
149 ceph_encode_8(p, 1);
150 ceph_encode_64(p, ino);
151 ceph_encode_32(p, len);
152 if (len)
153 memcpy(*p, path, len);
154 *p += len;
155}
156
157static inline void ceph_encode_string(void **p, void *end,
158 const char *s, u32 len)
159{
160 BUG_ON(*p + sizeof(len) + len > end);
161 ceph_encode_32(p, len);
162 if (len)
163 memcpy(*p, s, len);
164 *p += len;
165}
166
167#define ceph_encode_need(p, end, n, bad) \
168 do { \
169 if (unlikely(*(p) + (n) > (end))) \
170 goto bad; \
171 } while (0)
172
173#define ceph_encode_64_safe(p, end, v, bad) \
174 do { \
175 ceph_encode_need(p, end, sizeof(u64), bad); \
176 ceph_encode_64(p, v); \
177 } while (0)
178#define ceph_encode_32_safe(p, end, v, bad) \
179 do { \
180 ceph_encode_need(p, end, sizeof(u32), bad); \
181 ceph_encode_32(p, v); \
182 } while (0)
183#define ceph_encode_16_safe(p, end, v, bad) \
184 do { \
185 ceph_encode_need(p, end, sizeof(u16), bad); \
186 ceph_encode_16(p, v); \
187 } while (0)
188
189#define ceph_encode_copy_safe(p, end, pv, n, bad) \
190 do { \
191 ceph_encode_need(p, end, n, bad); \
192 ceph_encode_copy(p, pv, n); \
193 } while (0)
194#define ceph_encode_string_safe(p, end, s, n, bad) \
195 do { \
196 ceph_encode_need(p, end, n, bad); \
197 ceph_encode_string(p, end, s, n); \
198 } while (0)
199
200
201#endif
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
new file mode 100644
index 000000000000..6365f041745b
--- /dev/null
+++ b/include/linux/ceph/libceph.h
@@ -0,0 +1,249 @@
1#ifndef _FS_CEPH_LIBCEPH_H
2#define _FS_CEPH_LIBCEPH_H
3
4#include "ceph_debug.h"
5
6#include <asm/unaligned.h>
7#include <linux/backing-dev.h>
8#include <linux/completion.h>
9#include <linux/exportfs.h>
10#include <linux/fs.h>
11#include <linux/mempool.h>
12#include <linux/pagemap.h>
13#include <linux/wait.h>
14#include <linux/writeback.h>
15#include <linux/slab.h>
16
17#include "types.h"
18#include "messenger.h"
19#include "msgpool.h"
20#include "mon_client.h"
21#include "osd_client.h"
22#include "ceph_fs.h"
23
24/*
25 * Supported features
26 */
27#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR
28#define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR
29
30/*
31 * mount options
32 */
33#define CEPH_OPT_FSID (1<<0)
34#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
35#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
36#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
37
38#define CEPH_OPT_DEFAULT (0);
39
40#define ceph_set_opt(client, opt) \
41 (client)->options->flags |= CEPH_OPT_##opt;
42#define ceph_test_opt(client, opt) \
43 (!!((client)->options->flags & CEPH_OPT_##opt))
44
45struct ceph_options {
46 int flags;
47 struct ceph_fsid fsid;
48 struct ceph_entity_addr my_addr;
49 int mount_timeout;
50 int osd_idle_ttl;
51 int osd_timeout;
52 int osd_keepalive_timeout;
53
54 /*
55 * any type that can't be simply compared or doesn't need need
56 * to be compared should go beyond this point,
57 * ceph_compare_options() should be updated accordingly
58 */
59
60 struct ceph_entity_addr *mon_addr; /* should be the first
61 pointer type of args */
62 int num_mon;
63 char *name;
64 struct ceph_crypto_key *key;
65};
66
67/*
68 * defaults
69 */
70#define CEPH_MOUNT_TIMEOUT_DEFAULT 60
71#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */
72#define CEPH_OSD_KEEPALIVE_DEFAULT 5
73#define CEPH_OSD_IDLE_TTL_DEFAULT 60
74
75#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
76#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
77
78#define CEPH_AUTH_NAME_DEFAULT "guest"
79
80/*
81 * Delay telling the MDS we no longer want caps, in case we reopen
82 * the file. Delay a minimum amount of time, even if we send a cap
83 * message for some other reason. Otherwise, take the oppotunity to
84 * update the mds to avoid sending another message later.
85 */
86#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */
87#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
88
89#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4)
90
91/* mount state */
92enum {
93 CEPH_MOUNT_MOUNTING,
94 CEPH_MOUNT_MOUNTED,
95 CEPH_MOUNT_UNMOUNTING,
96 CEPH_MOUNT_UNMOUNTED,
97 CEPH_MOUNT_SHUTDOWN,
98};
99
100/*
101 * subtract jiffies
102 */
103static inline unsigned long time_sub(unsigned long a, unsigned long b)
104{
105 BUG_ON(time_after(b, a));
106 return (long)a - (long)b;
107}
108
109struct ceph_mds_client;
110
111/*
112 * per client state
113 *
114 * possibly shared by multiple mount points, if they are
115 * mounting the same ceph filesystem/cluster.
116 */
117struct ceph_client {
118 struct ceph_fsid fsid;
119 bool have_fsid;
120
121 void *private;
122
123 struct ceph_options *options;
124
125 struct mutex mount_mutex; /* serialize mount attempts */
126 wait_queue_head_t auth_wq;
127 int auth_err;
128
129 int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
130
131 u32 supported_features;
132 u32 required_features;
133
134 struct ceph_messenger *msgr; /* messenger instance */
135 struct ceph_mon_client monc;
136 struct ceph_osd_client osdc;
137
138#ifdef CONFIG_DEBUG_FS
139 struct dentry *debugfs_dir;
140 struct dentry *debugfs_monmap;
141 struct dentry *debugfs_osdmap;
142#endif
143};
144
145
146
147/*
148 * snapshots
149 */
150
151/*
152 * A "snap context" is the set of existing snapshots when we
153 * write data. It is used by the OSD to guide its COW behavior.
154 *
155 * The ceph_snap_context is refcounted, and attached to each dirty
156 * page, indicating which context the dirty data belonged when it was
157 * dirtied.
158 */
159struct ceph_snap_context {
160 atomic_t nref;
161 u64 seq;
162 int num_snaps;
163 u64 snaps[];
164};
165
166static inline struct ceph_snap_context *
167ceph_get_snap_context(struct ceph_snap_context *sc)
168{
169 /*
170 printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
171 atomic_read(&sc->nref)+1);
172 */
173 if (sc)
174 atomic_inc(&sc->nref);
175 return sc;
176}
177
178static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
179{
180 if (!sc)
181 return;
182 /*
183 printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
184 atomic_read(&sc->nref)-1);
185 */
186 if (atomic_dec_and_test(&sc->nref)) {
187 /*printk(" deleting snap_context %p\n", sc);*/
188 kfree(sc);
189 }
190}
191
192/*
193 * calculate the number of pages a given length and offset map onto,
194 * if we align the data.
195 */
196static inline int calc_pages_for(u64 off, u64 len)
197{
198 return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
199 (off >> PAGE_CACHE_SHIFT);
200}
201
202/* ceph_common.c */
203extern const char *ceph_msg_type_name(int type);
204extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
205extern struct kmem_cache *ceph_inode_cachep;
206extern struct kmem_cache *ceph_cap_cachep;
207extern struct kmem_cache *ceph_dentry_cachep;
208extern struct kmem_cache *ceph_file_cachep;
209
210extern int ceph_parse_options(struct ceph_options **popt, char *options,
211 const char *dev_name, const char *dev_name_end,
212 int (*parse_extra_token)(char *c, void *private),
213 void *private);
214extern void ceph_destroy_options(struct ceph_options *opt);
215extern int ceph_compare_options(struct ceph_options *new_opt,
216 struct ceph_client *client);
217extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
218 void *private);
219extern u64 ceph_client_id(struct ceph_client *client);
220extern void ceph_destroy_client(struct ceph_client *client);
221extern int __ceph_open_session(struct ceph_client *client,
222 unsigned long started);
223extern int ceph_open_session(struct ceph_client *client);
224
225/* pagevec.c */
226extern void ceph_release_page_vector(struct page **pages, int num_pages);
227
228extern struct page **ceph_get_direct_page_vector(const char __user *data,
229 int num_pages,
230 bool write_page);
231extern void ceph_put_page_vector(struct page **pages, int num_pages,
232 bool dirty);
233extern void ceph_release_page_vector(struct page **pages, int num_pages);
234extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
235extern int ceph_copy_user_to_page_vector(struct page **pages,
236 const char __user *data,
237 loff_t off, size_t len);
238extern int ceph_copy_to_page_vector(struct page **pages,
239 const char *data,
240 loff_t off, size_t len);
241extern int ceph_copy_from_page_vector(struct page **pages,
242 char *data,
243 loff_t off, size_t len);
244extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data,
245 loff_t off, size_t len);
246extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
247
248
249#endif /* _FS_CEPH_SUPER_H */
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
new file mode 100644
index 000000000000..4c5cb0880bba
--- /dev/null
+++ b/include/linux/ceph/mdsmap.h
@@ -0,0 +1,62 @@
1#ifndef _FS_CEPH_MDSMAP_H
2#define _FS_CEPH_MDSMAP_H
3
4#include "types.h"
5
6/*
7 * mds map - describe servers in the mds cluster.
8 *
9 * we limit fields to those the client actually xcares about
10 */
11struct ceph_mds_info {
12 u64 global_id;
13 struct ceph_entity_addr addr;
14 s32 state;
15 int num_export_targets;
16 bool laggy;
17 u32 *export_targets;
18};
19
20struct ceph_mdsmap {
21 u32 m_epoch, m_client_epoch, m_last_failure;
22 u32 m_root;
23 u32 m_session_timeout; /* seconds */
24 u32 m_session_autoclose; /* seconds */
25 u64 m_max_file_size;
26 u32 m_max_mds; /* size of m_addr, m_state arrays */
27 struct ceph_mds_info *m_info;
28
29 /* which object pools file data can be stored in */
30 int m_num_data_pg_pools;
31 u32 *m_data_pg_pools;
32 u32 m_cas_pg_pool;
33};
34
35static inline struct ceph_entity_addr *
36ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
37{
38 if (w >= m->m_max_mds)
39 return NULL;
40 return &m->m_info[w].addr;
41}
42
43static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
44{
45 BUG_ON(w < 0);
46 if (w >= m->m_max_mds)
47 return CEPH_MDS_STATE_DNE;
48 return m->m_info[w].state;
49}
50
51static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
52{
53 if (w >= 0 && w < m->m_max_mds)
54 return m->m_info[w].laggy;
55 return false;
56}
57
58extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
59extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
60extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
61
62#endif
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
new file mode 100644
index 000000000000..31d91a64838b
--- /dev/null
+++ b/include/linux/ceph/messenger.h
@@ -0,0 +1,257 @@
1#ifndef __FS_CEPH_MESSENGER_H
2#define __FS_CEPH_MESSENGER_H
3
4#include <linux/kref.h>
5#include <linux/mutex.h>
6#include <linux/net.h>
7#include <linux/radix-tree.h>
8#include <linux/uio.h>
9#include <linux/version.h>
10#include <linux/workqueue.h>
11
12#include "types.h"
13#include "buffer.h"
14
15struct ceph_msg;
16struct ceph_connection;
17
18extern struct workqueue_struct *ceph_msgr_wq; /* receive work queue */
19
20/*
21 * Ceph defines these callbacks for handling connection events.
22 */
23struct ceph_connection_operations {
24 struct ceph_connection *(*get)(struct ceph_connection *);
25 void (*put)(struct ceph_connection *);
26
27 /* handle an incoming message. */
28 void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
29
30 /* authorize an outgoing connection */
31 int (*get_authorizer) (struct ceph_connection *con,
32 void **buf, int *len, int *proto,
33 void **reply_buf, int *reply_len, int force_new);
34 int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
35 int (*invalidate_authorizer)(struct ceph_connection *con);
36
37 /* protocol version mismatch */
38 void (*bad_proto) (struct ceph_connection *con);
39
40 /* there was some error on the socket (disconnect, whatever) */
41 void (*fault) (struct ceph_connection *con);
42
43 /* a remote host as terminated a message exchange session, and messages
44 * we sent (or they tried to send us) may be lost. */
45 void (*peer_reset) (struct ceph_connection *con);
46
47 struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
48 struct ceph_msg_header *hdr,
49 int *skip);
50};
51
52/* use format string %s%d */
53#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
54
55struct ceph_messenger {
56 struct ceph_entity_inst inst; /* my name+address */
57 struct ceph_entity_addr my_enc_addr;
58 struct page *zero_page; /* used in certain error cases */
59
60 bool nocrc;
61
62 /*
63 * the global_seq counts connections i (attempt to) initiate
64 * in order to disambiguate certain connect race conditions.
65 */
66 u32 global_seq;
67 spinlock_t global_seq_lock;
68
69 u32 supported_features;
70 u32 required_features;
71};
72
73/*
74 * a single message. it contains a header (src, dest, message type, etc.),
75 * footer (crc values, mainly), a "front" message body, and possibly a
76 * data payload (stored in some number of pages).
77 */
78struct ceph_msg {
79 struct ceph_msg_header hdr; /* header */
80 struct ceph_msg_footer footer; /* footer */
81 struct kvec front; /* unaligned blobs of message */
82 struct ceph_buffer *middle;
83 struct page **pages; /* data payload. NOT OWNER. */
84 unsigned nr_pages; /* size of page array */
85 unsigned page_alignment; /* io offset in first page */
86 struct ceph_pagelist *pagelist; /* instead of pages */
87 struct list_head list_head;
88 struct kref kref;
89 struct bio *bio; /* instead of pages/pagelist */
90 struct bio *bio_iter; /* bio iterator */
91 int bio_seg; /* current bio segment */
92 struct ceph_pagelist *trail; /* the trailing part of the data */
93 bool front_is_vmalloc;
94 bool more_to_follow;
95 bool needs_out_seq;
96 int front_max;
97
98 struct ceph_msgpool *pool;
99};
100
101struct ceph_msg_pos {
102 int page, page_pos; /* which page; offset in page */
103 int data_pos; /* offset in data payload */
104 int did_page_crc; /* true if we've calculated crc for current page */
105};
106
107/* ceph connection fault delay defaults, for exponential backoff */
108#define BASE_DELAY_INTERVAL (HZ/2)
109#define MAX_DELAY_INTERVAL (5 * 60 * HZ)
110
111/*
112 * ceph_connection state bit flags
113 */
114#define LOSSYTX 0 /* we can close channel or drop messages on errors */
115#define CONNECTING 1
116#define NEGOTIATING 2
117#define KEEPALIVE_PENDING 3
118#define WRITE_PENDING 4 /* we have data ready to send */
119#define STANDBY 8 /* no outgoing messages, socket closed. we keep
120 * the ceph_connection around to maintain shared
121 * state with the peer. */
122#define CLOSED 10 /* we've closed the connection */
123#define SOCK_CLOSED 11 /* socket state changed to closed */
124#define OPENING 13 /* open connection w/ (possibly new) peer */
125#define DEAD 14 /* dead, about to kfree */
126#define BACKOFF 15
127
128/*
129 * A single connection with another host.
130 *
131 * We maintain a queue of outgoing messages, and some session state to
132 * ensure that we can preserve the lossless, ordered delivery of
133 * messages in the case of a TCP disconnect.
134 */
135struct ceph_connection {
136 void *private;
137 atomic_t nref;
138
139 const struct ceph_connection_operations *ops;
140
141 struct ceph_messenger *msgr;
142 struct socket *sock;
143 unsigned long state; /* connection state (see flags above) */
144 const char *error_msg; /* error message, if any */
145
146 struct ceph_entity_addr peer_addr; /* peer address */
147 struct ceph_entity_name peer_name; /* peer name */
148 struct ceph_entity_addr peer_addr_for_me;
149 unsigned peer_features;
150 u32 connect_seq; /* identify the most recent connection
151 attempt for this connection, client */
152 u32 peer_global_seq; /* peer's global seq for this connection */
153
154 int auth_retry; /* true if we need a newer authorizer */
155 void *auth_reply_buf; /* where to put the authorizer reply */
156 int auth_reply_buf_len;
157
158 struct mutex mutex;
159
160 /* out queue */
161 struct list_head out_queue;
162 struct list_head out_sent; /* sending or sent but unacked */
163 u64 out_seq; /* last message queued for send */
164
165 u64 in_seq, in_seq_acked; /* last message received, acked */
166
167 /* connection negotiation temps */
168 char in_banner[CEPH_BANNER_MAX_LEN];
169 union {
170 struct { /* outgoing connection */
171 struct ceph_msg_connect out_connect;
172 struct ceph_msg_connect_reply in_reply;
173 };
174 struct { /* incoming */
175 struct ceph_msg_connect in_connect;
176 struct ceph_msg_connect_reply out_reply;
177 };
178 };
179 struct ceph_entity_addr actual_peer_addr;
180
181 /* message out temps */
182 struct ceph_msg *out_msg; /* sending message (== tail of
183 out_sent) */
184 bool out_msg_done;
185 struct ceph_msg_pos out_msg_pos;
186
187 struct kvec out_kvec[8], /* sending header/footer data */
188 *out_kvec_cur;
189 int out_kvec_left; /* kvec's left in out_kvec */
190 int out_skip; /* skip this many bytes */
191 int out_kvec_bytes; /* total bytes left */
192 bool out_kvec_is_msg; /* kvec refers to out_msg */
193 int out_more; /* there is more data after the kvecs */
194 __le64 out_temp_ack; /* for writing an ack */
195
196 /* message in temps */
197 struct ceph_msg_header in_hdr;
198 struct ceph_msg *in_msg;
199 struct ceph_msg_pos in_msg_pos;
200 u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */
201
202 char in_tag; /* protocol control byte */
203 int in_base_pos; /* bytes read */
204 __le64 in_temp_ack; /* for reading an ack */
205
206 struct delayed_work work; /* send|recv work */
207 unsigned long delay; /* current delay interval */
208};
209
210
211extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
212extern int ceph_parse_ips(const char *c, const char *end,
213 struct ceph_entity_addr *addr,
214 int max_count, int *count);
215
216
217extern int ceph_msgr_init(void);
218extern void ceph_msgr_exit(void);
219extern void ceph_msgr_flush(void);
220
221extern struct ceph_messenger *ceph_messenger_create(
222 struct ceph_entity_addr *myaddr,
223 u32 features, u32 required);
224extern void ceph_messenger_destroy(struct ceph_messenger *);
225
226extern void ceph_con_init(struct ceph_messenger *msgr,
227 struct ceph_connection *con);
228extern void ceph_con_open(struct ceph_connection *con,
229 struct ceph_entity_addr *addr);
230extern bool ceph_con_opened(struct ceph_connection *con);
231extern void ceph_con_close(struct ceph_connection *con);
232extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
233extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
234extern void ceph_con_revoke_message(struct ceph_connection *con,
235 struct ceph_msg *msg);
236extern void ceph_con_keepalive(struct ceph_connection *con);
237extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
238extern void ceph_con_put(struct ceph_connection *con);
239
240extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags);
241extern void ceph_msg_kfree(struct ceph_msg *m);
242
243
244static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
245{
246 kref_get(&msg->kref);
247 return msg;
248}
249extern void ceph_msg_last_put(struct kref *kref);
250static inline void ceph_msg_put(struct ceph_msg *msg)
251{
252 kref_put(&msg->kref, ceph_msg_last_put);
253}
254
255extern void ceph_msg_dump(struct ceph_msg *msg);
256
257#endif
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
new file mode 100644
index 000000000000..545f85917780
--- /dev/null
+++ b/include/linux/ceph/mon_client.h
@@ -0,0 +1,122 @@
1#ifndef _FS_CEPH_MON_CLIENT_H
2#define _FS_CEPH_MON_CLIENT_H
3
4#include <linux/completion.h>
5#include <linux/kref.h>
6#include <linux/rbtree.h>
7
8#include "messenger.h"
9
10struct ceph_client;
11struct ceph_mount_args;
12struct ceph_auth_client;
13
14/*
15 * The monitor map enumerates the set of all monitors.
16 */
17struct ceph_monmap {
18 struct ceph_fsid fsid;
19 u32 epoch;
20 u32 num_mon;
21 struct ceph_entity_inst mon_inst[0];
22};
23
24struct ceph_mon_client;
25struct ceph_mon_generic_request;
26
27
28/*
29 * Generic mechanism for resending monitor requests.
30 */
31typedef void (*ceph_monc_request_func_t)(struct ceph_mon_client *monc,
32 int newmon);
33
34/* a pending monitor request */
35struct ceph_mon_request {
36 struct ceph_mon_client *monc;
37 struct delayed_work delayed_work;
38 unsigned long delay;
39 ceph_monc_request_func_t do_request;
40};
41
42/*
43 * ceph_mon_generic_request is being used for the statfs and poolop requests
44 * which are bening done a bit differently because we need to get data back
45 * to the caller
46 */
47struct ceph_mon_generic_request {
48 struct kref kref;
49 u64 tid;
50 struct rb_node node;
51 int result;
52 void *buf;
53 int buf_len;
54 struct completion completion;
55 struct ceph_msg *request; /* original request */
56 struct ceph_msg *reply; /* and reply */
57};
58
59struct ceph_mon_client {
60 struct ceph_client *client;
61 struct ceph_monmap *monmap;
62
63 struct mutex mutex;
64 struct delayed_work delayed_work;
65
66 struct ceph_auth_client *auth;
67 struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack;
68 int pending_auth;
69
70 bool hunting;
71 int cur_mon; /* last monitor i contacted */
72 unsigned long sub_sent, sub_renew_after;
73 struct ceph_connection *con;
74 bool have_fsid;
75
76 /* pending generic requests */
77 struct rb_root generic_request_tree;
78 int num_generic_requests;
79 u64 last_tid;
80
81 /* mds/osd map */
82 int want_mdsmap;
83 int want_next_osdmap; /* 1 = want, 2 = want+asked */
84 u32 have_osdmap, have_mdsmap;
85
86#ifdef CONFIG_DEBUG_FS
87 struct dentry *debugfs_file;
88#endif
89};
90
91extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end);
92extern int ceph_monmap_contains(struct ceph_monmap *m,
93 struct ceph_entity_addr *addr);
94
95extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
96extern void ceph_monc_stop(struct ceph_mon_client *monc);
97
98/*
99 * The model here is to indicate that we need a new map of at least
100 * epoch @want, and also call in when we receive a map. We will
101 * periodically rerequest the map from the monitor cluster until we
102 * get what we want.
103 */
104extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
105extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
106
107extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
108
109extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
110 struct ceph_statfs *buf);
111
112extern int ceph_monc_open_session(struct ceph_mon_client *monc);
113
114extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);
115
116extern int ceph_monc_create_snapid(struct ceph_mon_client *monc,
117 u32 pool, u64 *snapid);
118
119extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
120 u32 pool, u64 snapid);
121
122#endif
diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
new file mode 100644
index 000000000000..a362605f9368
--- /dev/null
+++ b/include/linux/ceph/msgpool.h
@@ -0,0 +1,25 @@
1#ifndef _FS_CEPH_MSGPOOL
2#define _FS_CEPH_MSGPOOL
3
4#include <linux/mempool.h>
5#include "messenger.h"
6
7/*
8 * we use memory pools for preallocating messages we may receive, to
9 * avoid unexpected OOM conditions.
10 */
11struct ceph_msgpool {
12 const char *name;
13 mempool_t *pool;
14 int front_len; /* preallocated payload size */
15};
16
17extern int ceph_msgpool_init(struct ceph_msgpool *pool,
18 int front_len, int size, bool blocking,
19 const char *name);
20extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
21extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *,
22 int front_len);
23extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
24
25#endif
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
new file mode 100644
index 000000000000..680d3d648cac
--- /dev/null
+++ b/include/linux/ceph/msgr.h
@@ -0,0 +1,175 @@
1#ifndef CEPH_MSGR_H
2#define CEPH_MSGR_H
3
4/*
5 * Data types for message passing layer used by Ceph.
6 */
7
8#define CEPH_MON_PORT 6789 /* default monitor port */
9
10/*
11 * client-side processes will try to bind to ports in this
12 * range, simply for the benefit of tools like nmap or wireshark
13 * that would like to identify the protocol.
14 */
15#define CEPH_PORT_FIRST 6789
16#define CEPH_PORT_START 6800 /* non-monitors start here */
17#define CEPH_PORT_LAST 6900
18
19/*
20 * tcp connection banner. include a protocol version. and adjust
21 * whenever the wire protocol changes. try to keep this string length
22 * constant.
23 */
24#define CEPH_BANNER "ceph v027"
25#define CEPH_BANNER_MAX_LEN 30
26
27
28/*
29 * Rollover-safe type and comparator for 32-bit sequence numbers.
30 * Comparator returns -1, 0, or 1.
31 */
32typedef __u32 ceph_seq_t;
33
34static inline __s32 ceph_seq_cmp(__u32 a, __u32 b)
35{
36 return (__s32)a - (__s32)b;
37}
38
39
40/*
41 * entity_name -- logical name for a process participating in the
42 * network, e.g. 'mds0' or 'osd3'.
43 */
44struct ceph_entity_name {
45 __u8 type; /* CEPH_ENTITY_TYPE_* */
46 __le64 num;
47} __attribute__ ((packed));
48
49#define CEPH_ENTITY_TYPE_MON 0x01
50#define CEPH_ENTITY_TYPE_MDS 0x02
51#define CEPH_ENTITY_TYPE_OSD 0x04
52#define CEPH_ENTITY_TYPE_CLIENT 0x08
53#define CEPH_ENTITY_TYPE_AUTH 0x20
54
55#define CEPH_ENTITY_TYPE_ANY 0xFF
56
57extern const char *ceph_entity_type_name(int type);
58
59/*
60 * entity_addr -- network address
61 */
62struct ceph_entity_addr {
63 __le32 type;
64 __le32 nonce; /* unique id for process (e.g. pid) */
65 struct sockaddr_storage in_addr;
66} __attribute__ ((packed));
67
68struct ceph_entity_inst {
69 struct ceph_entity_name name;
70 struct ceph_entity_addr addr;
71} __attribute__ ((packed));
72
73
74/* used by message exchange protocol */
75#define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */
76#define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */
77#define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing
78 incoming connection */
79#define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again
80 with higher cseq */
81#define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again
82 with higher gseq */
83#define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */
84#define CEPH_MSGR_TAG_MSG 7 /* message */
85#define CEPH_MSGR_TAG_ACK 8 /* message ack */
86#define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */
87#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */
88#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
89#define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */
90
91
92/*
93 * connection negotiation
94 */
95struct ceph_msg_connect {
96 __le64 features; /* supported feature bits */
97 __le32 host_type; /* CEPH_ENTITY_TYPE_* */
98 __le32 global_seq; /* count connections initiated by this host */
99 __le32 connect_seq; /* count connections initiated in this session */
100 __le32 protocol_version;
101 __le32 authorizer_protocol;
102 __le32 authorizer_len;
103 __u8 flags; /* CEPH_MSG_CONNECT_* */
104} __attribute__ ((packed));
105
106struct ceph_msg_connect_reply {
107 __u8 tag;
108 __le64 features; /* feature bits for this session */
109 __le32 global_seq;
110 __le32 connect_seq;
111 __le32 protocol_version;
112 __le32 authorizer_len;
113 __u8 flags;
114} __attribute__ ((packed));
115
116#define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */
117
118
119/*
120 * message header
121 */
122struct ceph_msg_header_old {
123 __le64 seq; /* message seq# for this session */
124 __le64 tid; /* transaction id */
125 __le16 type; /* message type */
126 __le16 priority; /* priority. higher value == higher priority */
127 __le16 version; /* version of message encoding */
128
129 __le32 front_len; /* bytes in main payload */
130 __le32 middle_len;/* bytes in middle payload */
131 __le32 data_len; /* bytes of data payload */
132 __le16 data_off; /* sender: include full offset;
133 receiver: mask against ~PAGE_MASK */
134
135 struct ceph_entity_inst src, orig_src;
136 __le32 reserved;
137 __le32 crc; /* header crc32c */
138} __attribute__ ((packed));
139
140struct ceph_msg_header {
141 __le64 seq; /* message seq# for this session */
142 __le64 tid; /* transaction id */
143 __le16 type; /* message type */
144 __le16 priority; /* priority. higher value == higher priority */
145 __le16 version; /* version of message encoding */
146
147 __le32 front_len; /* bytes in main payload */
148 __le32 middle_len;/* bytes in middle payload */
149 __le32 data_len; /* bytes of data payload */
150 __le16 data_off; /* sender: include full offset;
151 receiver: mask against ~PAGE_MASK */
152
153 struct ceph_entity_name src;
154 __le32 reserved;
155 __le32 crc; /* header crc32c */
156} __attribute__ ((packed));
157
158#define CEPH_MSG_PRIO_LOW 64
159#define CEPH_MSG_PRIO_DEFAULT 127
160#define CEPH_MSG_PRIO_HIGH 196
161#define CEPH_MSG_PRIO_HIGHEST 255
162
163/*
164 * follows data payload
165 */
166struct ceph_msg_footer {
167 __le32 front_crc, middle_crc, data_crc;
168 __u8 flags;
169} __attribute__ ((packed));
170
171#define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */
172#define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */
173
174
175#endif
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
new file mode 100644
index 000000000000..f88eacb111d4
--- /dev/null
+++ b/include/linux/ceph/osd_client.h
@@ -0,0 +1,290 @@
1#ifndef _FS_CEPH_OSD_CLIENT_H
2#define _FS_CEPH_OSD_CLIENT_H
3
4#include <linux/completion.h>
5#include <linux/kref.h>
6#include <linux/mempool.h>
7#include <linux/rbtree.h>
8
9#include "types.h"
10#include "osdmap.h"
11#include "messenger.h"
12
13struct ceph_msg;
14struct ceph_snap_context;
15struct ceph_osd_request;
16struct ceph_osd_client;
17struct ceph_authorizer;
18struct ceph_pagelist;
19
20/*
21 * completion callback for async writepages
22 */
23typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
24 struct ceph_msg *);
25
26/* a given osd we're communicating with */
27struct ceph_osd {
28 atomic_t o_ref;
29 struct ceph_osd_client *o_osdc;
30 int o_osd;
31 int o_incarnation;
32 struct rb_node o_node;
33 struct ceph_connection o_con;
34 struct list_head o_requests;
35 struct list_head o_linger_requests;
36 struct list_head o_osd_lru;
37 struct ceph_authorizer *o_authorizer;
38 void *o_authorizer_buf, *o_authorizer_reply_buf;
39 size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
40 unsigned long lru_ttl;
41 int o_marked_for_keepalive;
42 struct list_head o_keepalive_item;
43};
44
45/* an in-flight request */
46struct ceph_osd_request {
47 u64 r_tid; /* unique for this client */
48 struct rb_node r_node;
49 struct list_head r_req_lru_item;
50 struct list_head r_osd_item;
51 struct list_head r_linger_item;
52 struct list_head r_linger_osd;
53 struct ceph_osd *r_osd;
54 struct ceph_pg r_pgid;
55 int r_pg_osds[CEPH_PG_MAX_SIZE];
56 int r_num_pg_osds;
57
58 struct ceph_connection *r_con_filling_msg;
59
60 struct ceph_msg *r_request, *r_reply;
61 int r_result;
62 int r_flags; /* any additional flags for the osd */
63 u32 r_sent; /* >0 if r_request is sending/sent */
64 int r_got_reply;
65 int r_linger;
66
67 struct ceph_osd_client *r_osdc;
68 struct kref r_kref;
69 bool r_mempool;
70 struct completion r_completion, r_safe_completion;
71 ceph_osdc_callback_t r_callback, r_safe_callback;
72 struct ceph_eversion r_reassert_version;
73 struct list_head r_unsafe_item;
74
75 struct inode *r_inode; /* for use by callbacks */
76 void *r_priv; /* ditto */
77
78 char r_oid[40]; /* object name */
79 int r_oid_len;
80 unsigned long r_stamp; /* send OR check time */
81
82 struct ceph_file_layout r_file_layout;
83 struct ceph_snap_context *r_snapc; /* snap context for writes */
84 unsigned r_num_pages; /* size of page array (follows) */
85 unsigned r_page_alignment; /* io offset in first page */
86 struct page **r_pages; /* pages for data payload */
87 int r_pages_from_pool;
88 int r_own_pages; /* if true, i own page list */
89#ifdef CONFIG_BLOCK
90 struct bio *r_bio; /* instead of pages */
91#endif
92
93 struct ceph_pagelist *r_trail; /* trailing part of the data */
94};
95
96struct ceph_osd_event {
97 u64 cookie;
98 int one_shot;
99 struct ceph_osd_client *osdc;
100 void (*cb)(u64, u64, u8, void *);
101 void *data;
102 struct rb_node node;
103 struct list_head osd_node;
104 struct kref kref;
105 struct completion completion;
106};
107
108struct ceph_osd_event_work {
109 struct work_struct work;
110 struct ceph_osd_event *event;
111 u64 ver;
112 u64 notify_id;
113 u8 opcode;
114};
115
116struct ceph_osd_client {
117 struct ceph_client *client;
118
119 struct ceph_osdmap *osdmap; /* current map */
120 struct rw_semaphore map_sem;
121 struct completion map_waiters;
122 u64 last_requested_map;
123
124 struct mutex request_mutex;
125 struct rb_root osds; /* osds */
126 struct list_head osd_lru; /* idle osds */
127 u64 timeout_tid; /* tid of timeout triggering rq */
128 u64 last_tid; /* tid of last request */
129 struct rb_root requests; /* pending requests */
130 struct list_head req_lru; /* in-flight lru */
131 struct list_head req_unsent; /* unsent/need-resend queue */
132 struct list_head req_notarget; /* map to no osd */
133 struct list_head req_linger; /* lingering requests */
134 int num_requests;
135 struct delayed_work timeout_work;
136 struct delayed_work osds_timeout_work;
137#ifdef CONFIG_DEBUG_FS
138 struct dentry *debugfs_file;
139#endif
140
141 mempool_t *req_mempool;
142
143 struct ceph_msgpool msgpool_op;
144 struct ceph_msgpool msgpool_op_reply;
145
146 spinlock_t event_lock;
147 struct rb_root event_tree;
148 u64 event_count;
149
150 struct workqueue_struct *notify_wq;
151};
152
153struct ceph_osd_req_op {
154 u16 op; /* CEPH_OSD_OP_* */
155 u32 flags; /* CEPH_OSD_FLAG_* */
156 union {
157 struct {
158 u64 offset, length;
159 u64 truncate_size;
160 u32 truncate_seq;
161 } extent;
162 struct {
163 const char *name;
164 u32 name_len;
165 const char *val;
166 u32 value_len;
167 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
168 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
169 } xattr;
170 struct {
171 const char *class_name;
172 __u8 class_len;
173 const char *method_name;
174 __u8 method_len;
175 __u8 argc;
176 const char *indata;
177 u32 indata_len;
178 } cls;
179 struct {
180 u64 cookie, count;
181 } pgls;
182 struct {
183 u64 snapid;
184 } snap;
185 struct {
186 u64 cookie;
187 u64 ver;
188 __u8 flag;
189 u32 prot_ver;
190 u32 timeout;
191 } watch;
192 };
193 u32 payload_len;
194};
195
196extern int ceph_osdc_init(struct ceph_osd_client *osdc,
197 struct ceph_client *client);
198extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
199
200extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
201 struct ceph_msg *msg);
202extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
203 struct ceph_msg *msg);
204
205extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
206 struct ceph_file_layout *layout,
207 u64 snapid,
208 u64 off, u64 *plen, u64 *bno,
209 struct ceph_osd_request *req,
210 struct ceph_osd_req_op *op);
211
212extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
213 int flags,
214 struct ceph_snap_context *snapc,
215 struct ceph_osd_req_op *ops,
216 bool use_mempool,
217 gfp_t gfp_flags,
218 struct page **pages,
219 struct bio *bio);
220
221extern void ceph_osdc_build_request(struct ceph_osd_request *req,
222 u64 off, u64 *plen,
223 struct ceph_osd_req_op *src_ops,
224 struct ceph_snap_context *snapc,
225 struct timespec *mtime,
226 const char *oid,
227 int oid_len);
228
229extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
230 struct ceph_file_layout *layout,
231 struct ceph_vino vino,
232 u64 offset, u64 *len, int op, int flags,
233 struct ceph_snap_context *snapc,
234 int do_sync, u32 truncate_seq,
235 u64 truncate_size,
236 struct timespec *mtime,
237 bool use_mempool, int num_reply,
238 int page_align);
239
240extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
241 struct ceph_osd_request *req);
242extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
243 struct ceph_osd_request *req);
244
245static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
246{
247 kref_get(&req->r_kref);
248}
249extern void ceph_osdc_release_request(struct kref *kref);
250static inline void ceph_osdc_put_request(struct ceph_osd_request *req)
251{
252 kref_put(&req->r_kref, ceph_osdc_release_request);
253}
254
255extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
256 struct ceph_osd_request *req,
257 bool nofail);
258extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
259 struct ceph_osd_request *req);
260extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
261
262extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
263 struct ceph_vino vino,
264 struct ceph_file_layout *layout,
265 u64 off, u64 *plen,
266 u32 truncate_seq, u64 truncate_size,
267 struct page **pages, int nr_pages,
268 int page_align);
269
270extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
271 struct ceph_vino vino,
272 struct ceph_file_layout *layout,
273 struct ceph_snap_context *sc,
274 u64 off, u64 len,
275 u32 truncate_seq, u64 truncate_size,
276 struct timespec *mtime,
277 struct page **pages, int nr_pages,
278 int flags, int do_sync, bool nofail);
279
280/* watch/notify events */
281extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
282 void (*event_cb)(u64, u64, u8, void *),
283 int one_shot, void *data,
284 struct ceph_osd_event **pevent);
285extern void ceph_osdc_cancel_event(struct ceph_osd_event *event);
286extern int ceph_osdc_wait_event(struct ceph_osd_event *event,
287 unsigned long timeout);
288extern void ceph_osdc_put_event(struct ceph_osd_event *event);
289#endif
290
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
new file mode 100644
index 000000000000..ba4c205cbb01
--- /dev/null
+++ b/include/linux/ceph/osdmap.h
@@ -0,0 +1,130 @@
1#ifndef _FS_CEPH_OSDMAP_H
2#define _FS_CEPH_OSDMAP_H
3
4#include <linux/rbtree.h>
5#include "types.h"
6#include "ceph_fs.h"
7#include <linux/crush/crush.h>
8
9/*
10 * The osd map describes the current membership of the osd cluster and
11 * specifies the mapping of objects to placement groups and placement
12 * groups to (sets of) osds. That is, it completely specifies the
13 * (desired) distribution of all data objects in the system at some
14 * point in time.
15 *
16 * Each map version is identified by an epoch, which increases monotonically.
17 *
18 * The map can be updated either via an incremental map (diff) describing
19 * the change between two successive epochs, or as a fully encoded map.
20 */
21struct ceph_pg_pool_info {
22 struct rb_node node;
23 int id;
24 struct ceph_pg_pool v;
25 int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
26 char *name;
27};
28
29struct ceph_pg_mapping {
30 struct rb_node node;
31 struct ceph_pg pgid;
32 int len;
33 int osds[];
34};
35
36struct ceph_osdmap {
37 struct ceph_fsid fsid;
38 u32 epoch;
39 u32 mkfs_epoch;
40 struct ceph_timespec created, modified;
41
42 u32 flags; /* CEPH_OSDMAP_* */
43
44 u32 max_osd; /* size of osd_state, _offload, _addr arrays */
45 u8 *osd_state; /* CEPH_OSD_* */
46 u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */
47 struct ceph_entity_addr *osd_addr;
48
49 struct rb_root pg_temp;
50 struct rb_root pg_pools;
51 u32 pool_max;
52
53 /* the CRUSH map specifies the mapping of placement groups to
54 * the list of osds that store+replicate them. */
55 struct crush_map *crush;
56};
57
58/*
59 * file layout helpers
60 */
61#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
62#define ceph_file_layout_stripe_count(l) \
63 ((__s32)le32_to_cpu((l).fl_stripe_count))
64#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
65#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
66#define ceph_file_layout_object_su(l) \
67 ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
68#define ceph_file_layout_pg_preferred(l) \
69 ((__s32)le32_to_cpu((l).fl_pg_preferred))
70#define ceph_file_layout_pg_pool(l) \
71 ((__s32)le32_to_cpu((l).fl_pg_pool))
72
73static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
74{
75 return le32_to_cpu(l->fl_stripe_unit) *
76 le32_to_cpu(l->fl_stripe_count);
77}
78
79/* "period" == bytes before i start on a new set of objects */
80static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
81{
82 return le32_to_cpu(l->fl_object_size) *
83 le32_to_cpu(l->fl_stripe_count);
84}
85
86
87static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
88{
89 return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
90}
91
92static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
93{
94 return map && (map->flags & flag);
95}
96
97extern char *ceph_osdmap_state_str(char *str, int len, int state);
98
99static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
100 int osd)
101{
102 if (osd >= map->max_osd)
103 return NULL;
104 return &map->osd_addr[osd];
105}
106
107extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
108extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
109 struct ceph_osdmap *map,
110 struct ceph_messenger *msgr);
111extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
112
113/* calculate mapping of a file extent to an object */
114extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
115 u64 off, u64 *plen,
116 u64 *bno, u64 *oxoff, u64 *oxlen);
117
118/* calculate mapping of object to a placement group */
119extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
120 const char *oid,
121 struct ceph_file_layout *fl,
122 struct ceph_osdmap *osdmap);
123extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
124 int *acting);
125extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
126 struct ceph_pg pgid);
127
128extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
129
130#endif
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
new file mode 100644
index 000000000000..9660d6b0a35d
--- /dev/null
+++ b/include/linux/ceph/pagelist.h
@@ -0,0 +1,75 @@
1#ifndef __FS_CEPH_PAGELIST_H
2#define __FS_CEPH_PAGELIST_H
3
4#include <linux/list.h>
5
6struct ceph_pagelist {
7 struct list_head head;
8 void *mapped_tail;
9 size_t length;
10 size_t room;
11 struct list_head free_list;
12 size_t num_pages_free;
13};
14
15struct ceph_pagelist_cursor {
16 struct ceph_pagelist *pl; /* pagelist, for error checking */
17 struct list_head *page_lru; /* page in list */
18 size_t room; /* room remaining to reset to */
19};
20
21static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
22{
23 INIT_LIST_HEAD(&pl->head);
24 pl->mapped_tail = NULL;
25 pl->length = 0;
26 pl->room = 0;
27 INIT_LIST_HEAD(&pl->free_list);
28 pl->num_pages_free = 0;
29}
30
31extern int ceph_pagelist_release(struct ceph_pagelist *pl);
32
33extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
34
35extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space);
36
37extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl);
38
39extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
40 struct ceph_pagelist_cursor *c);
41
42extern int ceph_pagelist_truncate(struct ceph_pagelist *pl,
43 struct ceph_pagelist_cursor *c);
44
45static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v)
46{
47 __le64 ev = cpu_to_le64(v);
48 return ceph_pagelist_append(pl, &ev, sizeof(ev));
49}
50static inline int ceph_pagelist_encode_32(struct ceph_pagelist *pl, u32 v)
51{
52 __le32 ev = cpu_to_le32(v);
53 return ceph_pagelist_append(pl, &ev, sizeof(ev));
54}
55static inline int ceph_pagelist_encode_16(struct ceph_pagelist *pl, u16 v)
56{
57 __le16 ev = cpu_to_le16(v);
58 return ceph_pagelist_append(pl, &ev, sizeof(ev));
59}
60static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
61{
62 return ceph_pagelist_append(pl, &v, 1);
63}
64static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
65 char *s, size_t len)
66{
67 int ret = ceph_pagelist_encode_32(pl, len);
68 if (ret)
69 return ret;
70 if (len)
71 return ceph_pagelist_append(pl, s, len);
72 return 0;
73}
74
75#endif
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
new file mode 100644
index 000000000000..0a99099801a4
--- /dev/null
+++ b/include/linux/ceph/rados.h
@@ -0,0 +1,426 @@
1#ifndef CEPH_RADOS_H
2#define CEPH_RADOS_H
3
4/*
5 * Data types for the Ceph distributed object storage layer RADOS
6 * (Reliable Autonomic Distributed Object Store).
7 */
8
9#include "msgr.h"
10
11/*
12 * osdmap encoding versions
13 */
14#define CEPH_OSDMAP_INC_VERSION 5
15#define CEPH_OSDMAP_INC_VERSION_EXT 6
16#define CEPH_OSDMAP_VERSION 5
17#define CEPH_OSDMAP_VERSION_EXT 6
18
19/*
20 * fs id
21 */
22struct ceph_fsid {
23 unsigned char fsid[16];
24};
25
26static inline int ceph_fsid_compare(const struct ceph_fsid *a,
27 const struct ceph_fsid *b)
28{
29 return memcmp(a, b, sizeof(*a));
30}
31
32/*
33 * ino, object, etc.
34 */
35typedef __le64 ceph_snapid_t;
36#define CEPH_SNAPDIR ((__u64)(-1)) /* reserved for hidden .snap dir */
37#define CEPH_NOSNAP ((__u64)(-2)) /* "head", "live" revision */
38#define CEPH_MAXSNAP ((__u64)(-3)) /* largest valid snapid */
39
40struct ceph_timespec {
41 __le32 tv_sec;
42 __le32 tv_nsec;
43} __attribute__ ((packed));
44
45
46/*
47 * object layout - how objects are mapped into PGs
48 */
49#define CEPH_OBJECT_LAYOUT_HASH 1
50#define CEPH_OBJECT_LAYOUT_LINEAR 2
51#define CEPH_OBJECT_LAYOUT_HASHINO 3
52
53/*
54 * pg layout -- how PGs are mapped onto (sets of) OSDs
55 */
56#define CEPH_PG_LAYOUT_CRUSH 0
57#define CEPH_PG_LAYOUT_HASH 1
58#define CEPH_PG_LAYOUT_LINEAR 2
59#define CEPH_PG_LAYOUT_HYBRID 3
60
61#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
62
63/*
64 * placement group.
65 * we encode this into one __le64.
66 */
67struct ceph_pg {
68 __le16 preferred; /* preferred primary osd */
69 __le16 ps; /* placement seed */
70 __le32 pool; /* object pool */
71} __attribute__ ((packed));
72
73/*
74 * pg_pool is a set of pgs storing a pool of objects
75 *
76 * pg_num -- base number of pseudorandomly placed pgs
77 *
78 * pgp_num -- effective number when calculating pg placement. this
79 * is used for pg_num increases. new pgs result in data being "split"
80 * into new pgs. for this to proceed smoothly, new pgs are intiially
81 * colocated with their parents; that is, pgp_num doesn't increase
82 * until the new pgs have successfully split. only _then_ are the new
83 * pgs placed independently.
84 *
85 * lpg_num -- localized pg count (per device). replicas are randomly
86 * selected.
87 *
88 * lpgp_num -- as above.
89 */
90#define CEPH_PG_TYPE_REP 1
91#define CEPH_PG_TYPE_RAID4 2
92#define CEPH_PG_POOL_VERSION 2
93struct ceph_pg_pool {
94 __u8 type; /* CEPH_PG_TYPE_* */
95 __u8 size; /* number of osds in each pg */
96 __u8 crush_ruleset; /* crush placement rule */
97 __u8 object_hash; /* hash mapping object name to ps */
98 __le32 pg_num, pgp_num; /* number of pg's */
99 __le32 lpg_num, lpgp_num; /* number of localized pg's */
100 __le32 last_change; /* most recent epoch changed */
101 __le64 snap_seq; /* seq for per-pool snapshot */
102 __le32 snap_epoch; /* epoch of last snap */
103 __le32 num_snaps;
104 __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */
105 __le64 auid; /* who owns the pg */
106} __attribute__ ((packed));
107
108/*
109 * stable_mod func is used to control number of placement groups.
110 * similar to straight-up modulo, but produces a stable mapping as b
111 * increases over time. b is the number of bins, and bmask is the
112 * containing power of 2 minus 1.
113 *
114 * b <= bmask and bmask=(2**n)-1
115 * e.g., b=12 -> bmask=15, b=123 -> bmask=127
116 */
117static inline int ceph_stable_mod(int x, int b, int bmask)
118{
119 if ((x & bmask) < b)
120 return x & bmask;
121 else
122 return x & (bmask >> 1);
123}
124
125/*
126 * object layout - how a given object should be stored.
127 */
128struct ceph_object_layout {
129 struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */
130 __le32 ol_stripe_unit; /* for per-object parity, if any */
131} __attribute__ ((packed));
132
133/*
134 * compound epoch+version, used by storage layer to serialize mutations
135 */
136struct ceph_eversion {
137 __le32 epoch;
138 __le64 version;
139} __attribute__ ((packed));
140
141/*
142 * osd map bits
143 */
144
145/* status bits */
146#define CEPH_OSD_EXISTS 1
147#define CEPH_OSD_UP 2
148
149/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
150#define CEPH_OSD_IN 0x10000
151#define CEPH_OSD_OUT 0
152
153
154/*
155 * osd map flag bits
156 */
157#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */
158#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */
159#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
160#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
161#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
162
163/*
164 * osd ops
165 */
166#define CEPH_OSD_OP_MODE 0xf000
167#define CEPH_OSD_OP_MODE_RD 0x1000
168#define CEPH_OSD_OP_MODE_WR 0x2000
169#define CEPH_OSD_OP_MODE_RMW 0x3000
170#define CEPH_OSD_OP_MODE_SUB 0x4000
171
172#define CEPH_OSD_OP_TYPE 0x0f00
173#define CEPH_OSD_OP_TYPE_LOCK 0x0100
174#define CEPH_OSD_OP_TYPE_DATA 0x0200
175#define CEPH_OSD_OP_TYPE_ATTR 0x0300
176#define CEPH_OSD_OP_TYPE_EXEC 0x0400
177#define CEPH_OSD_OP_TYPE_PG 0x0500
178
179enum {
180 /** data **/
181 /* read */
182 CEPH_OSD_OP_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1,
183 CEPH_OSD_OP_STAT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2,
184 CEPH_OSD_OP_MAPEXT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 3,
185
186 /* fancy read */
187 CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4,
188 CEPH_OSD_OP_SPARSE_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 5,
189
190 CEPH_OSD_OP_NOTIFY = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 6,
191 CEPH_OSD_OP_NOTIFY_ACK = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 7,
192
193 /* versioning */
194 CEPH_OSD_OP_ASSERT_VER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 8,
195
196 /* write */
197 CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
198 CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2,
199 CEPH_OSD_OP_TRUNCATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3,
200 CEPH_OSD_OP_ZERO = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4,
201 CEPH_OSD_OP_DELETE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5,
202
203 /* fancy write */
204 CEPH_OSD_OP_APPEND = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6,
205 CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7,
206 CEPH_OSD_OP_SETTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8,
207 CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9,
208
209 CEPH_OSD_OP_TMAPUP = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10,
210 CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11,
211 CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12,
212
213 CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13,
214 CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14,
215
216 CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15,
217
218 /** attrs **/
219 /* read */
220 CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
221 CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2,
222 CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3,
223
224 /* write */
225 CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1,
226 CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2,
227 CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3,
228 CEPH_OSD_OP_RMXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4,
229
230 /** subop **/
231 CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1,
232 CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2,
233 CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3,
234 CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4,
235 CEPH_OSD_OP_SCRUB = CEPH_OSD_OP_MODE_SUB | 5,
236 CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6,
237 CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7,
238 CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8,
239
240 /** lock **/
241 CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
242 CEPH_OSD_OP_WRUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2,
243 CEPH_OSD_OP_RDLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3,
244 CEPH_OSD_OP_RDUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4,
245 CEPH_OSD_OP_UPLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5,
246 CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
247
248 /** exec **/
249 CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
250
251 /** pg **/
252 CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
253};
254
255static inline int ceph_osd_op_type_lock(int op)
256{
257 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK;
258}
259static inline int ceph_osd_op_type_data(int op)
260{
261 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA;
262}
263static inline int ceph_osd_op_type_attr(int op)
264{
265 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR;
266}
267static inline int ceph_osd_op_type_exec(int op)
268{
269 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC;
270}
271static inline int ceph_osd_op_type_pg(int op)
272{
273 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
274}
275
276static inline int ceph_osd_op_mode_subop(int op)
277{
278 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB;
279}
280static inline int ceph_osd_op_mode_read(int op)
281{
282 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD;
283}
284static inline int ceph_osd_op_mode_modify(int op)
285{
286 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR;
287}
288
289/*
290 * note that the following tmap stuff is also defined in the ceph librados.h
291 * any modification here needs to be updated there
292 */
293#define CEPH_OSD_TMAP_HDR 'h'
294#define CEPH_OSD_TMAP_SET 's'
295#define CEPH_OSD_TMAP_RM 'r'
296
297extern const char *ceph_osd_op_name(int op);
298
299
300/*
301 * osd op flags
302 *
303 * An op may be READ, WRITE, or READ|WRITE.
304 */
305enum {
306 CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */
307 CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */
308 CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */
309 CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */
310 CEPH_OSD_FLAG_READ = 16, /* op may read */
311 CEPH_OSD_FLAG_WRITE = 32, /* op may write */
312 CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */
313 CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */
314 CEPH_OSD_FLAG_BALANCE_READS = 256,
315 CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */
316 CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */
317 CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */
318 CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */
319};
320
321enum {
322 CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */
323};
324
325#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/
326#define EBLACKLISTED ESHUTDOWN /* blacklisted */
327
328/* xattr comparison */
329enum {
330 CEPH_OSD_CMPXATTR_OP_NOP = 0,
331 CEPH_OSD_CMPXATTR_OP_EQ = 1,
332 CEPH_OSD_CMPXATTR_OP_NE = 2,
333 CEPH_OSD_CMPXATTR_OP_GT = 3,
334 CEPH_OSD_CMPXATTR_OP_GTE = 4,
335 CEPH_OSD_CMPXATTR_OP_LT = 5,
336 CEPH_OSD_CMPXATTR_OP_LTE = 6
337};
338
339enum {
340 CEPH_OSD_CMPXATTR_MODE_STRING = 1,
341 CEPH_OSD_CMPXATTR_MODE_U64 = 2
342};
343
344#define RADOS_NOTIFY_VER 1
345
346/*
347 * an individual object operation. each may be accompanied by some data
348 * payload
349 */
350struct ceph_osd_op {
351 __le16 op; /* CEPH_OSD_OP_* */
352 __le32 flags; /* CEPH_OSD_FLAG_* */
353 union {
354 struct {
355 __le64 offset, length;
356 __le64 truncate_size;
357 __le32 truncate_seq;
358 } __attribute__ ((packed)) extent;
359 struct {
360 __le32 name_len;
361 __le32 value_len;
362 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
363 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
364 } __attribute__ ((packed)) xattr;
365 struct {
366 __u8 class_len;
367 __u8 method_len;
368 __u8 argc;
369 __le32 indata_len;
370 } __attribute__ ((packed)) cls;
371 struct {
372 __le64 cookie, count;
373 } __attribute__ ((packed)) pgls;
374 struct {
375 __le64 snapid;
376 } __attribute__ ((packed)) snap;
377 struct {
378 __le64 cookie;
379 __le64 ver;
380 __u8 flag; /* 0 = unwatch, 1 = watch */
381 } __attribute__ ((packed)) watch;
382};
383 __le32 payload_len;
384} __attribute__ ((packed));
385
386/*
387 * osd request message header. each request may include multiple
388 * ceph_osd_op object operations.
389 */
390struct ceph_osd_request_head {
391 __le32 client_inc; /* client incarnation */
392 struct ceph_object_layout layout; /* pgid */
393 __le32 osdmap_epoch; /* client's osdmap epoch */
394
395 __le32 flags;
396
397 struct ceph_timespec mtime; /* for mutations only */
398 struct ceph_eversion reassert_version; /* if we are replaying op */
399
400 __le32 object_len; /* length of object name */
401
402 __le64 snapid; /* snapid to read */
403 __le64 snap_seq; /* writer's snap context */
404 __le32 num_snaps;
405
406 __le16 num_ops;
407 struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */
408} __attribute__ ((packed));
409
410struct ceph_osd_reply_head {
411 __le32 client_inc; /* client incarnation */
412 __le32 flags;
413 struct ceph_object_layout layout;
414 __le32 osdmap_epoch;
415 struct ceph_eversion reassert_version; /* for replaying uncommitted */
416
417 __le32 result; /* result code */
418
419 __le32 object_len; /* length of object name */
420 __le32 num_ops;
421 struct ceph_osd_op ops[0]; /* ops[], object */
422} __attribute__ ((packed));
423
424
425
426#endif
diff --git a/include/linux/ceph/types.h b/include/linux/ceph/types.h
new file mode 100644
index 000000000000..28b35a005ec2
--- /dev/null
+++ b/include/linux/ceph/types.h
@@ -0,0 +1,29 @@
1#ifndef _FS_CEPH_TYPES_H
2#define _FS_CEPH_TYPES_H
3
4/* needed before including ceph_fs.h */
5#include <linux/in.h>
6#include <linux/types.h>
7#include <linux/fcntl.h>
8#include <linux/string.h>
9
10#include "ceph_fs.h"
11#include "ceph_frag.h"
12#include "ceph_hash.h"
13
14/*
15 * Identify inodes by both their ino AND snapshot id (a u64).
16 */
17struct ceph_vino {
18 u64 ino;
19 u64 snap;
20};
21
22
23/* context for the caps reservation mechanism */
24struct ceph_cap_reservation {
25 int count;
26};
27
28
29#endif