diff options
-rw-r--r-- | fs/ceph/ceph_fs.c | 80 | ||||
-rw-r--r-- | fs/ceph/ceph_fs.h | 629 | ||||
-rw-r--r-- | fs/ceph/ceph_strings.c | 163 | ||||
-rw-r--r-- | fs/ceph/msgr.h | 157 | ||||
-rw-r--r-- | fs/ceph/rados.h | 372 |
5 files changed, 1401 insertions, 0 deletions
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c new file mode 100644 index 000000000000..9371ff1c0002 --- /dev/null +++ b/fs/ceph/ceph_fs.c | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * Some non-inline ceph helpers | ||
3 | */ | ||
4 | #include "types.h" | ||
5 | |||
6 | int ceph_flags_to_mode(int flags) | ||
7 | { | ||
8 | #ifdef O_DIRECTORY /* fixme */ | ||
9 | if ((flags & O_DIRECTORY) == O_DIRECTORY) | ||
10 | return CEPH_FILE_MODE_PIN; | ||
11 | #endif | ||
12 | #ifdef O_LAZY | ||
13 | if (flags & O_LAZY) | ||
14 | return CEPH_FILE_MODE_LAZY; | ||
15 | #endif | ||
16 | if ((flags & O_APPEND) == O_APPEND) | ||
17 | flags |= O_WRONLY; | ||
18 | |||
19 | flags &= O_ACCMODE; | ||
20 | if ((flags & O_RDWR) == O_RDWR) | ||
21 | return CEPH_FILE_MODE_RDWR; | ||
22 | if ((flags & O_WRONLY) == O_WRONLY) | ||
23 | return CEPH_FILE_MODE_WR; | ||
24 | return CEPH_FILE_MODE_RD; | ||
25 | } | ||
26 | |||
27 | int ceph_caps_for_mode(int mode) | ||
28 | { | ||
29 | switch (mode) { | ||
30 | case CEPH_FILE_MODE_PIN: | ||
31 | return CEPH_CAP_PIN; | ||
32 | case CEPH_FILE_MODE_RD: | ||
33 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
34 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; | ||
35 | case CEPH_FILE_MODE_RDWR: | ||
36 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
37 | CEPH_CAP_FILE_EXCL | | ||
38 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | | ||
39 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | ||
40 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | ||
41 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | ||
42 | case CEPH_FILE_MODE_WR: | ||
43 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
44 | CEPH_CAP_FILE_EXCL | | ||
45 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | ||
46 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | ||
47 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | ||
48 | } | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | /* Name hashing routines. Initial hash value */ | ||
53 | /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ | ||
54 | #define ceph_init_name_hash() 0 | ||
55 | |||
56 | /* partial hash update function. Assume roughly 4 bits per character */ | ||
57 | static unsigned long ceph_partial_name_hash(unsigned long c, | ||
58 | unsigned long prevhash) | ||
59 | { | ||
60 | return (prevhash + (c << 4) + (c >> 4)) * 11; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * Finally: cut down the number of bits to a int value (and try to avoid | ||
65 | * losing bits) | ||
66 | */ | ||
67 | static unsigned long ceph_end_name_hash(unsigned long hash) | ||
68 | { | ||
69 | return hash & 0xffffffff; | ||
70 | } | ||
71 | |||
72 | /* Compute the hash for a name string. */ | ||
73 | unsigned int ceph_full_name_hash(const char *name, unsigned int len) | ||
74 | { | ||
75 | unsigned long hash = ceph_init_name_hash(); | ||
76 | while (len--) | ||
77 | hash = ceph_partial_name_hash(*name++, hash); | ||
78 | return ceph_end_name_hash(hash); | ||
79 | } | ||
80 | |||
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h new file mode 100644 index 000000000000..21ed51b127f2 --- /dev/null +++ b/fs/ceph/ceph_fs.h | |||
@@ -0,0 +1,629 @@ | |||
1 | /* | ||
2 | * ceph_fs.h - Ceph constants and data types to share between kernel and | ||
3 | * user space. | ||
4 | * | ||
5 | * Most types in this file are defined as little-endian, and are | ||
6 | * primarily intended to describe data structures that pass over the | ||
7 | * wire or that are stored on disk. | ||
8 | * | ||
9 | * LGPL2 | ||
10 | */ | ||
11 | |||
12 | #ifndef _FS_CEPH_CEPH_FS_H | ||
13 | #define _FS_CEPH_CEPH_FS_H | ||
14 | |||
15 | #include "msgr.h" | ||
16 | #include "rados.h" | ||
17 | |||
18 | /* | ||
19 | * Ceph release version | ||
20 | */ | ||
21 | #define CEPH_VERSION_MAJOR 0 | ||
22 | #define CEPH_VERSION_MINOR 16 | ||
23 | #define CEPH_VERSION_PATCH 1 | ||
24 | |||
25 | #define _CEPH_STRINGIFY(x) #x | ||
26 | #define CEPH_STRINGIFY(x) _CEPH_STRINGIFY(x) | ||
27 | #define CEPH_MAKE_VERSION(x, y, z) CEPH_STRINGIFY(x) "." CEPH_STRINGIFY(y) \ | ||
28 | "." CEPH_STRINGIFY(z) | ||
29 | #define CEPH_VERSION CEPH_MAKE_VERSION(CEPH_VERSION_MAJOR, \ | ||
30 | CEPH_VERSION_MINOR, CEPH_VERSION_PATCH) | ||
31 | |||
32 | /* | ||
33 | * subprotocol versions. when specific messages types or high-level | ||
34 | * protocols change, bump the affected components. we keep rev | ||
35 | * internal cluster protocols separately from the public, | ||
36 | * client-facing protocol. | ||
37 | */ | ||
38 | #define CEPH_OSD_PROTOCOL 7 /* cluster internal */ | ||
39 | #define CEPH_MDS_PROTOCOL 9 /* cluster internal */ | ||
40 | #define CEPH_MON_PROTOCOL 4 /* cluster internal */ | ||
41 | #define CEPH_OSDC_PROTOCOL 20 /* server/client */ | ||
42 | #define CEPH_MDSC_PROTOCOL 29 /* server/client */ | ||
43 | #define CEPH_MONC_PROTOCOL 14 /* server/client */ | ||
44 | |||
45 | |||
46 | #define CEPH_INO_ROOT 1 | ||
47 | |||
48 | /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ | ||
49 | #define CEPH_MAX_MON 31 | ||
50 | |||
51 | |||
52 | unsigned int ceph_full_name_hash(const char *name, unsigned int len); | ||
53 | |||
54 | |||
55 | /* | ||
56 | * ceph_file_layout - describe data layout for a file/inode | ||
57 | */ | ||
58 | struct ceph_file_layout { | ||
59 | /* file -> object mapping */ | ||
60 | __le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple | ||
61 | of page size. */ | ||
62 | __le32 fl_stripe_count; /* over this many objects */ | ||
63 | __le32 fl_object_size; /* until objects are this big, then move to | ||
64 | new objects */ | ||
65 | __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */ | ||
66 | |||
67 | /* pg -> disk layout */ | ||
68 | __le32 fl_object_stripe_unit; /* for per-object parity, if any */ | ||
69 | |||
70 | /* object -> pg layout */ | ||
71 | __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ | ||
72 | __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ | ||
73 | } __attribute__ ((packed)); | ||
74 | |||
75 | |||
76 | |||
77 | |||
78 | /********************************************* | ||
79 | * message layer | ||
80 | */ | ||
81 | |||
82 | /* | ||
83 | * message types | ||
84 | */ | ||
85 | |||
86 | /* misc */ | ||
87 | #define CEPH_MSG_SHUTDOWN 1 | ||
88 | #define CEPH_MSG_PING 2 | ||
89 | |||
90 | /* client <-> monitor */ | ||
91 | #define CEPH_MSG_MON_MAP 4 | ||
92 | #define CEPH_MSG_MON_GET_MAP 5 | ||
93 | #define CEPH_MSG_CLIENT_MOUNT 10 | ||
94 | #define CEPH_MSG_CLIENT_MOUNT_ACK 11 | ||
95 | #define CEPH_MSG_STATFS 13 | ||
96 | #define CEPH_MSG_STATFS_REPLY 14 | ||
97 | #define CEPH_MSG_MON_SUBSCRIBE 15 | ||
98 | #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 | ||
99 | |||
100 | /* client <-> mds */ | ||
101 | #define CEPH_MSG_MDS_GETMAP 20 | ||
102 | #define CEPH_MSG_MDS_MAP 21 | ||
103 | |||
104 | #define CEPH_MSG_CLIENT_SESSION 22 | ||
105 | #define CEPH_MSG_CLIENT_RECONNECT 23 | ||
106 | |||
107 | #define CEPH_MSG_CLIENT_REQUEST 24 | ||
108 | #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 | ||
109 | #define CEPH_MSG_CLIENT_REPLY 26 | ||
110 | #define CEPH_MSG_CLIENT_CAPS 0x310 | ||
111 | #define CEPH_MSG_CLIENT_LEASE 0x311 | ||
112 | #define CEPH_MSG_CLIENT_SNAP 0x312 | ||
113 | #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 | ||
114 | |||
115 | /* osd */ | ||
116 | #define CEPH_MSG_OSD_GETMAP 40 | ||
117 | #define CEPH_MSG_OSD_MAP 41 | ||
118 | #define CEPH_MSG_OSD_OP 42 | ||
119 | #define CEPH_MSG_OSD_OPREPLY 43 | ||
120 | |||
121 | |||
122 | struct ceph_mon_statfs { | ||
123 | __le64 have_version; | ||
124 | struct ceph_fsid fsid; | ||
125 | __le64 tid; | ||
126 | } __attribute__ ((packed)); | ||
127 | |||
128 | struct ceph_statfs { | ||
129 | __le64 kb, kb_used, kb_avail; | ||
130 | __le64 num_objects; | ||
131 | } __attribute__ ((packed)); | ||
132 | |||
133 | struct ceph_mon_statfs_reply { | ||
134 | struct ceph_fsid fsid; | ||
135 | __le64 tid; | ||
136 | __le64 version; | ||
137 | struct ceph_statfs st; | ||
138 | } __attribute__ ((packed)); | ||
139 | |||
140 | struct ceph_osd_getmap { | ||
141 | __le64 have_version; | ||
142 | struct ceph_fsid fsid; | ||
143 | __le32 start; | ||
144 | } __attribute__ ((packed)); | ||
145 | |||
146 | struct ceph_mds_getmap { | ||
147 | __le64 have_version; | ||
148 | struct ceph_fsid fsid; | ||
149 | } __attribute__ ((packed)); | ||
150 | |||
151 | struct ceph_client_mount { | ||
152 | __le64 have_version; | ||
153 | } __attribute__ ((packed)); | ||
154 | |||
155 | struct ceph_mon_subscribe_item { | ||
156 | __le64 have; | ||
157 | __u8 onetime; | ||
158 | } __attribute__ ((packed)); | ||
159 | |||
160 | /* | ||
161 | * mds states | ||
162 | * > 0 -> in | ||
163 | * <= 0 -> out | ||
164 | */ | ||
165 | #define CEPH_MDS_STATE_DNE 0 /* down, does not exist. */ | ||
166 | #define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees. | ||
167 | empty log. */ | ||
168 | #define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. */ | ||
169 | #define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment. */ | ||
170 | #define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ | ||
171 | #define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ | ||
172 | #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ | ||
173 | |||
174 | #define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ | ||
175 | #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed | ||
176 | operations (import, rename, etc.) */ | ||
177 | #define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */ | ||
178 | #define CEPH_MDS_STATE_REJOIN 11 /* up, rejoining distributed cache */ | ||
179 | #define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */ | ||
180 | #define CEPH_MDS_STATE_ACTIVE 13 /* up, active */ | ||
181 | #define CEPH_MDS_STATE_STOPPING 14 /* up, but exporting metadata */ | ||
182 | |||
183 | extern const char *ceph_mds_state_name(int s); | ||
184 | |||
185 | |||
186 | /* | ||
187 | * metadata lock types. | ||
188 | * - these are bitmasks.. we can compose them | ||
189 | * - they also define the lock ordering by the MDS | ||
190 | * - a few of these are internal to the mds | ||
191 | */ | ||
192 | #define CEPH_LOCK_DN 1 | ||
193 | #define CEPH_LOCK_ISNAP 2 | ||
194 | #define CEPH_LOCK_IVERSION 4 /* mds internal */ | ||
195 | #define CEPH_LOCK_IFILE 8 /* mds internal */ | ||
196 | #define CEPH_LOCK_IAUTH 32 | ||
197 | #define CEPH_LOCK_ILINK 64 | ||
198 | #define CEPH_LOCK_IDFT 128 /* dir frag tree */ | ||
199 | #define CEPH_LOCK_INEST 256 /* mds internal */ | ||
200 | #define CEPH_LOCK_IXATTR 512 | ||
201 | #define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */ | ||
202 | |||
203 | /* client_session ops */ | ||
204 | enum { | ||
205 | CEPH_SESSION_REQUEST_OPEN, | ||
206 | CEPH_SESSION_OPEN, | ||
207 | CEPH_SESSION_REQUEST_CLOSE, | ||
208 | CEPH_SESSION_CLOSE, | ||
209 | CEPH_SESSION_REQUEST_RENEWCAPS, | ||
210 | CEPH_SESSION_RENEWCAPS, | ||
211 | CEPH_SESSION_STALE, | ||
212 | CEPH_SESSION_RECALL_STATE, | ||
213 | }; | ||
214 | |||
215 | extern const char *ceph_session_op_name(int op); | ||
216 | |||
217 | struct ceph_mds_session_head { | ||
218 | __le32 op; | ||
219 | __le64 seq; | ||
220 | struct ceph_timespec stamp; | ||
221 | __le32 max_caps, max_leases; | ||
222 | } __attribute__ ((packed)); | ||
223 | |||
224 | /* client_request */ | ||
225 | /* | ||
226 | * metadata ops. | ||
227 | * & 0x001000 -> write op | ||
228 | * & 0x010000 -> follow symlink (e.g. stat(), not lstat()). | ||
229 | & & 0x100000 -> use weird ino/path trace | ||
230 | */ | ||
231 | #define CEPH_MDS_OP_WRITE 0x001000 | ||
232 | enum { | ||
233 | CEPH_MDS_OP_LOOKUP = 0x00100, | ||
234 | CEPH_MDS_OP_GETATTR = 0x00101, | ||
235 | CEPH_MDS_OP_LOOKUPHASH = 0x00102, | ||
236 | CEPH_MDS_OP_LOOKUPPARENT = 0x00103, | ||
237 | |||
238 | CEPH_MDS_OP_SETXATTR = 0x01105, | ||
239 | CEPH_MDS_OP_RMXATTR = 0x01106, | ||
240 | CEPH_MDS_OP_SETLAYOUT = 0x01107, | ||
241 | CEPH_MDS_OP_SETATTR = 0x01108, | ||
242 | |||
243 | CEPH_MDS_OP_MKNOD = 0x01201, | ||
244 | CEPH_MDS_OP_LINK = 0x01202, | ||
245 | CEPH_MDS_OP_UNLINK = 0x01203, | ||
246 | CEPH_MDS_OP_RENAME = 0x01204, | ||
247 | CEPH_MDS_OP_MKDIR = 0x01220, | ||
248 | CEPH_MDS_OP_RMDIR = 0x01221, | ||
249 | CEPH_MDS_OP_SYMLINK = 0x01222, | ||
250 | |||
251 | CEPH_MDS_OP_CREATE = 0x00301, | ||
252 | CEPH_MDS_OP_OPEN = 0x00302, | ||
253 | CEPH_MDS_OP_READDIR = 0x00305, | ||
254 | |||
255 | CEPH_MDS_OP_LOOKUPSNAP = 0x00400, | ||
256 | CEPH_MDS_OP_MKSNAP = 0x01400, | ||
257 | CEPH_MDS_OP_RMSNAP = 0x01401, | ||
258 | CEPH_MDS_OP_LSSNAP = 0x00402, | ||
259 | }; | ||
260 | |||
261 | extern const char *ceph_mds_op_name(int op); | ||
262 | |||
263 | |||
264 | #define CEPH_SETATTR_MODE 1 | ||
265 | #define CEPH_SETATTR_UID 2 | ||
266 | #define CEPH_SETATTR_GID 4 | ||
267 | #define CEPH_SETATTR_MTIME 8 | ||
268 | #define CEPH_SETATTR_ATIME 16 | ||
269 | #define CEPH_SETATTR_SIZE 32 | ||
270 | #define CEPH_SETATTR_CTIME 64 | ||
271 | |||
272 | union ceph_mds_request_args { | ||
273 | struct { | ||
274 | __le32 mask; /* CEPH_CAP_* */ | ||
275 | } __attribute__ ((packed)) getattr; | ||
276 | struct { | ||
277 | __le32 mode; | ||
278 | __le32 uid; | ||
279 | __le32 gid; | ||
280 | struct ceph_timespec mtime; | ||
281 | struct ceph_timespec atime; | ||
282 | __le64 size, old_size; /* old_size needed by truncate */ | ||
283 | __le32 mask; /* CEPH_SETATTR_* */ | ||
284 | } __attribute__ ((packed)) setattr; | ||
285 | struct { | ||
286 | __le32 frag; /* which dir fragment */ | ||
287 | __le32 max_entries; /* how many dentries to grab */ | ||
288 | } __attribute__ ((packed)) readdir; | ||
289 | struct { | ||
290 | __le32 mode; | ||
291 | __le32 rdev; | ||
292 | } __attribute__ ((packed)) mknod; | ||
293 | struct { | ||
294 | __le32 mode; | ||
295 | } __attribute__ ((packed)) mkdir; | ||
296 | struct { | ||
297 | __le32 flags; | ||
298 | __le32 mode; | ||
299 | __le32 stripe_unit; /* layout for newly created file */ | ||
300 | __le32 stripe_count; /* ... */ | ||
301 | __le32 object_size; | ||
302 | __le32 file_replication; | ||
303 | __le32 preferred; | ||
304 | } __attribute__ ((packed)) open; | ||
305 | struct { | ||
306 | __le32 flags; | ||
307 | } __attribute__ ((packed)) setxattr; | ||
308 | struct { | ||
309 | struct ceph_file_layout layout; | ||
310 | } __attribute__ ((packed)) setlayout; | ||
311 | } __attribute__ ((packed)); | ||
312 | |||
313 | #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ | ||
314 | #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ | ||
315 | |||
316 | struct ceph_mds_request_head { | ||
317 | __le64 tid, oldest_client_tid; | ||
318 | __le32 mdsmap_epoch; /* on client */ | ||
319 | __le32 flags; /* CEPH_MDS_FLAG_* */ | ||
320 | __u8 num_retry, num_fwd; /* count retry, fwd attempts */ | ||
321 | __le16 num_releases; /* # include cap/lease release records */ | ||
322 | __le32 op; /* mds op code */ | ||
323 | __le32 caller_uid, caller_gid; | ||
324 | __le64 ino; /* use this ino for openc, mkdir, mknod, | ||
325 | etc. (if replaying) */ | ||
326 | union ceph_mds_request_args args; | ||
327 | } __attribute__ ((packed)); | ||
328 | |||
329 | /* cap/lease release record */ | ||
330 | struct ceph_mds_request_release { | ||
331 | __le64 ino, cap_id; /* ino and unique cap id */ | ||
332 | __le32 caps, wanted; /* new issued, wanted */ | ||
333 | __le32 seq, issue_seq, mseq; | ||
334 | __le32 dname_seq; /* if releasing a dentry lease, a */ | ||
335 | __le32 dname_len; /* string follows. */ | ||
336 | } __attribute__ ((packed)); | ||
337 | |||
338 | /* client reply */ | ||
339 | struct ceph_mds_reply_head { | ||
340 | __le64 tid; | ||
341 | __le32 op; | ||
342 | __le32 result; | ||
343 | __le32 mdsmap_epoch; | ||
344 | __u8 safe; /* true if committed to disk */ | ||
345 | __u8 is_dentry, is_target; /* true if dentry, target inode records | ||
346 | are included with reply */ | ||
347 | } __attribute__ ((packed)); | ||
348 | |||
349 | /* one for each node split */ | ||
350 | struct ceph_frag_tree_split { | ||
351 | __le32 frag; /* this frag splits... */ | ||
352 | __le32 by; /* ...by this many bits */ | ||
353 | } __attribute__ ((packed)); | ||
354 | |||
355 | struct ceph_frag_tree_head { | ||
356 | __le32 nsplits; /* num ceph_frag_tree_split records */ | ||
357 | struct ceph_frag_tree_split splits[]; | ||
358 | } __attribute__ ((packed)); | ||
359 | |||
360 | /* capability issue, for bundling with mds reply */ | ||
361 | struct ceph_mds_reply_cap { | ||
362 | __le32 caps, wanted; /* caps issued, wanted */ | ||
363 | __le64 cap_id; | ||
364 | __le32 seq, mseq; | ||
365 | __le64 realm; /* snap realm */ | ||
366 | __u8 flags; /* CEPH_CAP_FLAG_* */ | ||
367 | } __attribute__ ((packed)); | ||
368 | |||
369 | #define CEPH_CAP_FLAG_AUTH 1 /* cap is issued by auth mds */ | ||
370 | |||
371 | /* inode record, for bundling with mds reply */ | ||
372 | struct ceph_mds_reply_inode { | ||
373 | __le64 ino; | ||
374 | __le64 snapid; | ||
375 | __le32 rdev; | ||
376 | __le64 version; /* inode version */ | ||
377 | __le64 xattr_version; /* version for xattr blob */ | ||
378 | struct ceph_mds_reply_cap cap; /* caps issued for this inode */ | ||
379 | struct ceph_file_layout layout; | ||
380 | struct ceph_timespec ctime, mtime, atime; | ||
381 | __le32 time_warp_seq; | ||
382 | __le64 size, max_size, truncate_size; | ||
383 | __le32 truncate_seq; | ||
384 | __le32 mode, uid, gid; | ||
385 | __le32 nlink; | ||
386 | __le64 files, subdirs, rbytes, rfiles, rsubdirs; /* dir stats */ | ||
387 | struct ceph_timespec rctime; | ||
388 | struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */ | ||
389 | } __attribute__ ((packed)); | ||
390 | /* followed by frag array, then symlink string, then xattr blob */ | ||
391 | |||
392 | /* reply_lease follows dname, and reply_inode */ | ||
393 | struct ceph_mds_reply_lease { | ||
394 | __le16 mask; /* lease type(s) */ | ||
395 | __le32 duration_ms; /* lease duration */ | ||
396 | __le32 seq; | ||
397 | } __attribute__ ((packed)); | ||
398 | |||
399 | struct ceph_mds_reply_dirfrag { | ||
400 | __le32 frag; /* fragment */ | ||
401 | __le32 auth; /* auth mds, if this is a delegation point */ | ||
402 | __le32 ndist; /* number of mds' this is replicated on */ | ||
403 | __le32 dist[]; | ||
404 | } __attribute__ ((packed)); | ||
405 | |||
406 | /* file access modes */ | ||
407 | #define CEPH_FILE_MODE_PIN 0 | ||
408 | #define CEPH_FILE_MODE_RD 1 | ||
409 | #define CEPH_FILE_MODE_WR 2 | ||
410 | #define CEPH_FILE_MODE_RDWR 3 /* RD | WR */ | ||
411 | #define CEPH_FILE_MODE_LAZY 4 /* lazy io */ | ||
412 | #define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */ | ||
413 | |||
414 | int ceph_flags_to_mode(int flags); | ||
415 | |||
416 | |||
417 | /* capability bits */ | ||
418 | #define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */ | ||
419 | |||
420 | /* generic cap bits */ | ||
421 | #define CEPH_CAP_GSHARED 1 /* client can reads */ | ||
422 | #define CEPH_CAP_GEXCL 2 /* client can read and update */ | ||
423 | #define CEPH_CAP_GCACHE 4 /* (file) client can cache reads */ | ||
424 | #define CEPH_CAP_GRD 8 /* (file) client can read */ | ||
425 | #define CEPH_CAP_GWR 16 /* (file) client can write */ | ||
426 | #define CEPH_CAP_GBUFFER 32 /* (file) client can buffer writes */ | ||
427 | #define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ | ||
428 | #define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ | ||
429 | |||
430 | /* per-lock shift */ | ||
431 | #define CEPH_CAP_SAUTH 2 | ||
432 | #define CEPH_CAP_SLINK 4 | ||
433 | #define CEPH_CAP_SXATTR 6 | ||
434 | #define CEPH_CAP_SFILE 8 /* goes at the end (uses >2 cap bits) */ | ||
435 | |||
436 | #define CEPH_CAP_BITS 16 | ||
437 | |||
438 | /* composed values */ | ||
439 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) | ||
440 | #define CEPH_CAP_AUTH_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SAUTH) | ||
441 | #define CEPH_CAP_LINK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SLINK) | ||
442 | #define CEPH_CAP_LINK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SLINK) | ||
443 | #define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SXATTR) | ||
444 | #define CEPH_CAP_XATTR_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR) | ||
445 | #define CEPH_CAP_FILE(x) (x << CEPH_CAP_SFILE) | ||
446 | #define CEPH_CAP_FILE_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFILE) | ||
447 | #define CEPH_CAP_FILE_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFILE) | ||
448 | #define CEPH_CAP_FILE_CACHE (CEPH_CAP_GCACHE << CEPH_CAP_SFILE) | ||
449 | #define CEPH_CAP_FILE_RD (CEPH_CAP_GRD << CEPH_CAP_SFILE) | ||
450 | #define CEPH_CAP_FILE_WR (CEPH_CAP_GWR << CEPH_CAP_SFILE) | ||
451 | #define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) | ||
452 | #define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) | ||
453 | #define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) | ||
454 | |||
455 | /* cap masks (for getattr) */ | ||
456 | #define CEPH_STAT_CAP_INODE CEPH_CAP_PIN | ||
457 | #define CEPH_STAT_CAP_TYPE CEPH_CAP_PIN /* mode >> 12 */ | ||
458 | #define CEPH_STAT_CAP_SYMLINK CEPH_CAP_PIN | ||
459 | #define CEPH_STAT_CAP_UID CEPH_CAP_AUTH_SHARED | ||
460 | #define CEPH_STAT_CAP_GID CEPH_CAP_AUTH_SHARED | ||
461 | #define CEPH_STAT_CAP_MODE CEPH_CAP_AUTH_SHARED | ||
462 | #define CEPH_STAT_CAP_NLINK CEPH_CAP_LINK_SHARED | ||
463 | #define CEPH_STAT_CAP_LAYOUT CEPH_CAP_FILE_SHARED | ||
464 | #define CEPH_STAT_CAP_MTIME CEPH_CAP_FILE_SHARED | ||
465 | #define CEPH_STAT_CAP_SIZE CEPH_CAP_FILE_SHARED | ||
466 | #define CEPH_STAT_CAP_ATIME CEPH_CAP_FILE_SHARED /* fixme */ | ||
467 | #define CEPH_STAT_CAP_XATTR CEPH_CAP_XATTR_SHARED | ||
468 | #define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN | \ | ||
469 | CEPH_CAP_AUTH_SHARED | \ | ||
470 | CEPH_CAP_LINK_SHARED | \ | ||
471 | CEPH_CAP_FILE_SHARED | \ | ||
472 | CEPH_CAP_XATTR_SHARED) | ||
473 | |||
474 | #define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \ | ||
475 | CEPH_CAP_LINK_SHARED | \ | ||
476 | CEPH_CAP_XATTR_SHARED | \ | ||
477 | CEPH_CAP_FILE_SHARED) | ||
478 | #define CEPH_CAP_ANY_RD (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD | \ | ||
479 | CEPH_CAP_FILE_CACHE) | ||
480 | |||
481 | #define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL | \ | ||
482 | CEPH_CAP_LINK_EXCL | \ | ||
483 | CEPH_CAP_XATTR_EXCL | \ | ||
484 | CEPH_CAP_FILE_EXCL) | ||
485 | #define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \ | ||
486 | CEPH_CAP_FILE_EXCL) | ||
487 | #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) | ||
488 | #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ | ||
489 | CEPH_CAP_ANY_FILE_WR | CEPH_CAP_PIN) | ||
490 | |||
491 | #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ | ||
492 | CEPH_LOCK_IXATTR) | ||
493 | |||
494 | int ceph_caps_for_mode(int mode); | ||
495 | |||
496 | enum { | ||
497 | CEPH_CAP_OP_GRANT, /* mds->client grant */ | ||
498 | CEPH_CAP_OP_REVOKE, /* mds->client revoke */ | ||
499 | CEPH_CAP_OP_TRUNC, /* mds->client trunc notify */ | ||
500 | CEPH_CAP_OP_EXPORT, /* mds has exported the cap */ | ||
501 | CEPH_CAP_OP_IMPORT, /* mds has imported the cap */ | ||
502 | CEPH_CAP_OP_UPDATE, /* client->mds update */ | ||
503 | CEPH_CAP_OP_DROP, /* client->mds drop cap bits */ | ||
504 | CEPH_CAP_OP_FLUSH, /* client->mds cap writeback */ | ||
505 | CEPH_CAP_OP_FLUSH_ACK, /* mds->client flushed */ | ||
506 | CEPH_CAP_OP_FLUSHSNAP, /* client->mds flush snapped metadata */ | ||
507 | CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */ | ||
508 | CEPH_CAP_OP_RELEASE, /* client->mds release (clean) cap */ | ||
509 | CEPH_CAP_OP_RENEW, /* client->mds renewal request */ | ||
510 | }; | ||
511 | |||
512 | extern const char *ceph_cap_op_name(int op); | ||
513 | |||
514 | /* | ||
515 | * caps message, used for capability callbacks, acks, requests, etc. | ||
516 | */ | ||
517 | struct ceph_mds_caps { | ||
518 | __le32 op; /* CEPH_CAP_OP_* */ | ||
519 | __le64 ino, realm; | ||
520 | __le64 cap_id; | ||
521 | __le32 seq, issue_seq; | ||
522 | __le32 caps, wanted, dirty; /* latest issued/wanted/dirty */ | ||
523 | __le32 migrate_seq; | ||
524 | __le64 snap_follows; | ||
525 | __le32 snap_trace_len; | ||
526 | __le64 client_tid; /* for FLUSH(SNAP) -> FLUSH(SNAP)_ACK */ | ||
527 | |||
528 | /* authlock */ | ||
529 | __le32 uid, gid, mode; | ||
530 | |||
531 | /* linklock */ | ||
532 | __le32 nlink; | ||
533 | |||
534 | /* xattrlock */ | ||
535 | __le32 xattr_len; | ||
536 | __le64 xattr_version; | ||
537 | |||
538 | /* filelock */ | ||
539 | __le64 size, max_size, truncate_size; | ||
540 | __le32 truncate_seq; | ||
541 | struct ceph_timespec mtime, atime, ctime; | ||
542 | struct ceph_file_layout layout; | ||
543 | __le32 time_warp_seq; | ||
544 | } __attribute__ ((packed)); | ||
545 | |||
546 | /* cap release msg head */ | ||
547 | struct ceph_mds_cap_release { | ||
548 | __le32 num; /* number of cap_items that follow */ | ||
549 | } __attribute__ ((packed)); | ||
550 | |||
551 | struct ceph_mds_cap_item { | ||
552 | __le64 ino; | ||
553 | __le64 cap_id; | ||
554 | __le32 migrate_seq, seq; | ||
555 | } __attribute__ ((packed)); | ||
556 | |||
557 | #define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */ | ||
558 | #define CEPH_MDS_LEASE_RELEASE 2 /* client -> mds */ | ||
559 | #define CEPH_MDS_LEASE_RENEW 3 /* client <-> mds */ | ||
560 | #define CEPH_MDS_LEASE_REVOKE_ACK 4 /* client -> mds */ | ||
561 | |||
562 | extern const char *ceph_lease_op_name(int o); | ||
563 | |||
564 | /* lease msg header */ | ||
565 | struct ceph_mds_lease { | ||
566 | __u8 action; /* CEPH_MDS_LEASE_* */ | ||
567 | __le16 mask; /* which lease */ | ||
568 | __le64 ino; | ||
569 | __le64 first, last; /* snap range */ | ||
570 | __le32 seq; | ||
571 | __le32 duration_ms; /* duration of renewal */ | ||
572 | } __attribute__ ((packed)); | ||
573 | /* followed by a __le32+string for dname */ | ||
574 | |||
575 | /* client reconnect */ | ||
576 | struct ceph_mds_cap_reconnect { | ||
577 | __le64 cap_id; | ||
578 | __le32 wanted; | ||
579 | __le32 issued; | ||
580 | __le64 size; | ||
581 | struct ceph_timespec mtime, atime; | ||
582 | __le64 snaprealm; | ||
583 | __le64 pathbase; /* base ino for our path to this ino */ | ||
584 | } __attribute__ ((packed)); | ||
585 | /* followed by encoded string */ | ||
586 | |||
587 | struct ceph_mds_snaprealm_reconnect { | ||
588 | __le64 ino; /* snap realm base */ | ||
589 | __le64 seq; /* snap seq for this snap realm */ | ||
590 | __le64 parent; /* parent realm */ | ||
591 | } __attribute__ ((packed)); | ||
592 | |||
593 | /* | ||
594 | * snaps | ||
595 | */ | ||
596 | enum { | ||
597 | CEPH_SNAP_OP_UPDATE, /* CREATE or DESTROY */ | ||
598 | CEPH_SNAP_OP_CREATE, | ||
599 | CEPH_SNAP_OP_DESTROY, | ||
600 | CEPH_SNAP_OP_SPLIT, | ||
601 | }; | ||
602 | |||
603 | extern const char *ceph_snap_op_name(int o); | ||
604 | |||
605 | /* snap msg header */ | ||
606 | struct ceph_mds_snap_head { | ||
607 | __le32 op; /* CEPH_SNAP_OP_* */ | ||
608 | __le64 split; /* ino to split off, if any */ | ||
609 | __le32 num_split_inos; /* # inos belonging to new child realm */ | ||
610 | __le32 num_split_realms; /* # child realms udner new child realm */ | ||
611 | __le32 trace_len; /* size of snap trace blob */ | ||
612 | } __attribute__ ((packed)); | ||
613 | /* followed by split ino list, then split realms, then the trace blob */ | ||
614 | |||
615 | /* | ||
616 | * encode info about a snaprealm, as viewed by a client | ||
617 | */ | ||
618 | struct ceph_mds_snap_realm { | ||
619 | __le64 ino; /* ino */ | ||
620 | __le64 created; /* snap: when created */ | ||
621 | __le64 parent; /* ino: parent realm */ | ||
622 | __le64 parent_since; /* snap: same parent since */ | ||
623 | __le64 seq; /* snap: version */ | ||
624 | __le32 num_snaps; | ||
625 | __le32 num_prior_parent_snaps; | ||
626 | } __attribute__ ((packed)); | ||
627 | /* followed by my snap list, then prior parent snap list */ | ||
628 | |||
629 | #endif | ||
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c new file mode 100644 index 000000000000..90d19d9d8d8f --- /dev/null +++ b/fs/ceph/ceph_strings.c | |||
@@ -0,0 +1,163 @@ | |||
1 | /* | ||
2 | * Ceph string constants | ||
3 | */ | ||
4 | #include "types.h" | ||
5 | |||
6 | const char *ceph_osd_op_name(int op) | ||
7 | { | ||
8 | switch (op) { | ||
9 | case CEPH_OSD_OP_READ: return "read"; | ||
10 | case CEPH_OSD_OP_STAT: return "stat"; | ||
11 | |||
12 | case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; | ||
13 | |||
14 | case CEPH_OSD_OP_WRITE: return "write"; | ||
15 | case CEPH_OSD_OP_DELETE: return "delete"; | ||
16 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | ||
17 | case CEPH_OSD_OP_ZERO: return "zero"; | ||
18 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | ||
19 | |||
20 | case CEPH_OSD_OP_APPEND: return "append"; | ||
21 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | ||
22 | case CEPH_OSD_OP_SETTRUNC: return "settrunc"; | ||
23 | case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; | ||
24 | |||
25 | case CEPH_OSD_OP_TMAPUP: return "tmapup"; | ||
26 | case CEPH_OSD_OP_TMAPGET: return "tmapget"; | ||
27 | case CEPH_OSD_OP_TMAPPUT: return "tmapput"; | ||
28 | |||
29 | case CEPH_OSD_OP_GETXATTR: return "getxattr"; | ||
30 | case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; | ||
31 | case CEPH_OSD_OP_SETXATTR: return "setxattr"; | ||
32 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | ||
33 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | ||
34 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | ||
35 | |||
36 | case CEPH_OSD_OP_PULL: return "pull"; | ||
37 | case CEPH_OSD_OP_PUSH: return "push"; | ||
38 | case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; | ||
39 | case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; | ||
40 | case CEPH_OSD_OP_SCRUB: return "scrub"; | ||
41 | |||
42 | case CEPH_OSD_OP_WRLOCK: return "wrlock"; | ||
43 | case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; | ||
44 | case CEPH_OSD_OP_RDLOCK: return "rdlock"; | ||
45 | case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; | ||
46 | case CEPH_OSD_OP_UPLOCK: return "uplock"; | ||
47 | case CEPH_OSD_OP_DNLOCK: return "dnlock"; | ||
48 | |||
49 | case CEPH_OSD_OP_CALL: return "call"; | ||
50 | |||
51 | case CEPH_OSD_OP_PGLS: return "pgls"; | ||
52 | } | ||
53 | return "???"; | ||
54 | } | ||
55 | |||
56 | const char *ceph_mds_state_name(int s) | ||
57 | { | ||
58 | switch (s) { | ||
59 | /* down and out */ | ||
60 | case CEPH_MDS_STATE_DNE: return "down:dne"; | ||
61 | case CEPH_MDS_STATE_STOPPED: return "down:stopped"; | ||
62 | /* up and out */ | ||
63 | case CEPH_MDS_STATE_BOOT: return "up:boot"; | ||
64 | case CEPH_MDS_STATE_STANDBY: return "up:standby"; | ||
65 | case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay"; | ||
66 | case CEPH_MDS_STATE_CREATING: return "up:creating"; | ||
67 | case CEPH_MDS_STATE_STARTING: return "up:starting"; | ||
68 | /* up and in */ | ||
69 | case CEPH_MDS_STATE_REPLAY: return "up:replay"; | ||
70 | case CEPH_MDS_STATE_RESOLVE: return "up:resolve"; | ||
71 | case CEPH_MDS_STATE_RECONNECT: return "up:reconnect"; | ||
72 | case CEPH_MDS_STATE_REJOIN: return "up:rejoin"; | ||
73 | case CEPH_MDS_STATE_CLIENTREPLAY: return "up:clientreplay"; | ||
74 | case CEPH_MDS_STATE_ACTIVE: return "up:active"; | ||
75 | case CEPH_MDS_STATE_STOPPING: return "up:stopping"; | ||
76 | } | ||
77 | return "???"; | ||
78 | } | ||
79 | |||
80 | const char *ceph_session_op_name(int op) | ||
81 | { | ||
82 | switch (op) { | ||
83 | case CEPH_SESSION_REQUEST_OPEN: return "request_open"; | ||
84 | case CEPH_SESSION_OPEN: return "open"; | ||
85 | case CEPH_SESSION_REQUEST_CLOSE: return "request_close"; | ||
86 | case CEPH_SESSION_CLOSE: return "close"; | ||
87 | case CEPH_SESSION_REQUEST_RENEWCAPS: return "request_renewcaps"; | ||
88 | case CEPH_SESSION_RENEWCAPS: return "renewcaps"; | ||
89 | case CEPH_SESSION_STALE: return "stale"; | ||
90 | case CEPH_SESSION_RECALL_STATE: return "recall_state"; | ||
91 | } | ||
92 | return "???"; | ||
93 | } | ||
94 | |||
95 | const char *ceph_mds_op_name(int op) | ||
96 | { | ||
97 | switch (op) { | ||
98 | case CEPH_MDS_OP_LOOKUP: return "lookup"; | ||
99 | case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; | ||
100 | case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; | ||
101 | case CEPH_MDS_OP_GETATTR: return "getattr"; | ||
102 | case CEPH_MDS_OP_SETXATTR: return "setxattr"; | ||
103 | case CEPH_MDS_OP_SETATTR: return "setattr"; | ||
104 | case CEPH_MDS_OP_RMXATTR: return "rmxattr"; | ||
105 | case CEPH_MDS_OP_READDIR: return "readdir"; | ||
106 | case CEPH_MDS_OP_MKNOD: return "mknod"; | ||
107 | case CEPH_MDS_OP_LINK: return "link"; | ||
108 | case CEPH_MDS_OP_UNLINK: return "unlink"; | ||
109 | case CEPH_MDS_OP_RENAME: return "rename"; | ||
110 | case CEPH_MDS_OP_MKDIR: return "mkdir"; | ||
111 | case CEPH_MDS_OP_RMDIR: return "rmdir"; | ||
112 | case CEPH_MDS_OP_SYMLINK: return "symlink"; | ||
113 | case CEPH_MDS_OP_CREATE: return "create"; | ||
114 | case CEPH_MDS_OP_OPEN: return "open"; | ||
115 | case CEPH_MDS_OP_LOOKUPSNAP: return "lookupsnap"; | ||
116 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; | ||
117 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; | ||
118 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; | ||
119 | } | ||
120 | return "???"; | ||
121 | } | ||
122 | |||
123 | const char *ceph_cap_op_name(int op) | ||
124 | { | ||
125 | switch (op) { | ||
126 | case CEPH_CAP_OP_GRANT: return "grant"; | ||
127 | case CEPH_CAP_OP_REVOKE: return "revoke"; | ||
128 | case CEPH_CAP_OP_TRUNC: return "trunc"; | ||
129 | case CEPH_CAP_OP_EXPORT: return "export"; | ||
130 | case CEPH_CAP_OP_IMPORT: return "import"; | ||
131 | case CEPH_CAP_OP_UPDATE: return "update"; | ||
132 | case CEPH_CAP_OP_DROP: return "drop"; | ||
133 | case CEPH_CAP_OP_FLUSH: return "flush"; | ||
134 | case CEPH_CAP_OP_FLUSH_ACK: return "flush_ack"; | ||
135 | case CEPH_CAP_OP_FLUSHSNAP: return "flushsnap"; | ||
136 | case CEPH_CAP_OP_FLUSHSNAP_ACK: return "flushsnap_ack"; | ||
137 | case CEPH_CAP_OP_RELEASE: return "release"; | ||
138 | case CEPH_CAP_OP_RENEW: return "renew"; | ||
139 | } | ||
140 | return "???"; | ||
141 | } | ||
142 | |||
143 | const char *ceph_lease_op_name(int o) | ||
144 | { | ||
145 | switch (o) { | ||
146 | case CEPH_MDS_LEASE_REVOKE: return "revoke"; | ||
147 | case CEPH_MDS_LEASE_RELEASE: return "release"; | ||
148 | case CEPH_MDS_LEASE_RENEW: return "renew"; | ||
149 | case CEPH_MDS_LEASE_REVOKE_ACK: return "revoke_ack"; | ||
150 | } | ||
151 | return "???"; | ||
152 | } | ||
153 | |||
154 | const char *ceph_snap_op_name(int o) | ||
155 | { | ||
156 | switch (o) { | ||
157 | case CEPH_SNAP_OP_UPDATE: return "update"; | ||
158 | case CEPH_SNAP_OP_CREATE: return "create"; | ||
159 | case CEPH_SNAP_OP_DESTROY: return "destroy"; | ||
160 | case CEPH_SNAP_OP_SPLIT: return "split"; | ||
161 | } | ||
162 | return "???"; | ||
163 | } | ||
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h new file mode 100644 index 000000000000..73921ae43faa --- /dev/null +++ b/fs/ceph/msgr.h | |||
@@ -0,0 +1,157 @@ | |||
1 | #ifndef __MSGR_H | ||
2 | #define __MSGR_H | ||
3 | |||
4 | /* | ||
5 | * Data types for message passing layer used by Ceph. | ||
6 | */ | ||
7 | |||
8 | #define CEPH_MON_PORT 6789 /* default monitor port */ | ||
9 | |||
10 | /* | ||
11 | * client-side processes will try to bind to ports in this | ||
12 | * range, simply for the benefit of tools like nmap or wireshark | ||
13 | * that would like to identify the protocol. | ||
14 | */ | ||
15 | #define CEPH_PORT_FIRST 6789 | ||
16 | #define CEPH_PORT_START 6800 /* non-monitors start here */ | ||
17 | #define CEPH_PORT_LAST 6900 | ||
18 | |||
19 | /* | ||
20 | * tcp connection banner. include a protocol version. and adjust | ||
21 | * whenever the wire protocol changes. try to keep this string length | ||
22 | * constant. | ||
23 | */ | ||
24 | #define CEPH_BANNER "ceph v021" | ||
25 | #define CEPH_BANNER_MAX_LEN 30 | ||
26 | |||
27 | |||
28 | /* | ||
29 | * Rollover-safe type and comparator for 32-bit sequence numbers. | ||
30 | * Comparator returns -1, 0, or 1. | ||
31 | */ | ||
32 | typedef __u32 ceph_seq_t; | ||
33 | |||
34 | static inline __s32 ceph_seq_cmp(__u32 a, __u32 b) | ||
35 | { | ||
36 | return (__s32)a - (__s32)b; | ||
37 | } | ||
38 | |||
39 | |||
40 | /* | ||
41 | * entity_name -- logical name for a process participating in the | ||
42 | * network, e.g. 'mds0' or 'osd3'. | ||
43 | */ | ||
44 | struct ceph_entity_name { | ||
45 | __u8 type; /* CEPH_ENTITY_TYPE_* */ | ||
46 | __le64 num; | ||
47 | } __attribute__ ((packed)); | ||
48 | |||
49 | #define CEPH_ENTITY_TYPE_MON 1 | ||
50 | #define CEPH_ENTITY_TYPE_MDS 2 | ||
51 | #define CEPH_ENTITY_TYPE_OSD 3 | ||
52 | #define CEPH_ENTITY_TYPE_CLIENT 4 | ||
53 | #define CEPH_ENTITY_TYPE_ADMIN 5 | ||
54 | |||
55 | /* | ||
56 | * entity_addr -- network address | ||
57 | */ | ||
58 | struct ceph_entity_addr { | ||
59 | __le32 erank; /* entity's rank in process */ | ||
60 | __le32 nonce; /* unique id for process (e.g. pid) */ | ||
61 | struct sockaddr_storage in_addr; | ||
62 | } __attribute__ ((packed)); | ||
63 | |||
64 | static inline bool ceph_entity_addr_is_local(const struct ceph_entity_addr *a, | ||
65 | const struct ceph_entity_addr *b) | ||
66 | { | ||
67 | return a->nonce == b->nonce && | ||
68 | memcmp(&a->in_addr, &b->in_addr, sizeof(a->in_addr)) == 0; | ||
69 | } | ||
70 | |||
71 | static inline bool ceph_entity_addr_equal(const struct ceph_entity_addr *a, | ||
72 | const struct ceph_entity_addr *b) | ||
73 | { | ||
74 | return memcmp(a, b, sizeof(*a)) == 0; | ||
75 | } | ||
76 | |||
77 | struct ceph_entity_inst { | ||
78 | struct ceph_entity_name name; | ||
79 | struct ceph_entity_addr addr; | ||
80 | } __attribute__ ((packed)); | ||
81 | |||
82 | |||
83 | /* used by message exchange protocol */ | ||
84 | #define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */ | ||
85 | #define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */ | ||
86 | #define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing | ||
87 | incoming connection */ | ||
88 | #define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again | ||
89 | with higher cseq */ | ||
90 | #define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again | ||
91 | with higher gseq */ | ||
92 | #define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */ | ||
93 | #define CEPH_MSGR_TAG_MSG 7 /* message */ | ||
94 | #define CEPH_MSGR_TAG_ACK 8 /* message ack */ | ||
95 | #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ | ||
96 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ | ||
97 | |||
98 | |||
99 | /* | ||
100 | * connection negotiation | ||
101 | */ | ||
102 | struct ceph_msg_connect { | ||
103 | __le32 host_type; /* CEPH_ENTITY_TYPE_* */ | ||
104 | __le32 global_seq; /* count connections initiated by this host */ | ||
105 | __le32 connect_seq; /* count connections initiated in this session */ | ||
106 | __le32 protocol_version; | ||
107 | __u8 flags; /* CEPH_MSG_CONNECT_* */ | ||
108 | } __attribute__ ((packed)); | ||
109 | |||
110 | struct ceph_msg_connect_reply { | ||
111 | __u8 tag; | ||
112 | __le32 global_seq; | ||
113 | __le32 connect_seq; | ||
114 | __le32 protocol_version; | ||
115 | __u8 flags; | ||
116 | } __attribute__ ((packed)); | ||
117 | |||
118 | #define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */ | ||
119 | |||
120 | |||
121 | /* | ||
122 | * message header | ||
123 | */ | ||
124 | struct ceph_msg_header { | ||
125 | __le64 seq; /* message seq# for this session */ | ||
126 | __le16 type; /* message type */ | ||
127 | __le16 priority; /* priority. higher value == higher priority */ | ||
128 | |||
129 | __le32 front_len; /* bytes in main payload */ | ||
130 | __le32 middle_len;/* bytes in middle payload */ | ||
131 | __le32 data_len; /* bytes of data payload */ | ||
132 | __le16 data_off; /* sender: include full offset; | ||
133 | receiver: mask against ~PAGE_MASK */ | ||
134 | |||
135 | struct ceph_entity_inst src, orig_src; | ||
136 | __le32 dst_erank; | ||
137 | __le32 crc; /* header crc32c */ | ||
138 | } __attribute__ ((packed)); | ||
139 | |||
140 | #define CEPH_MSG_PRIO_LOW 64 | ||
141 | #define CEPH_MSG_PRIO_DEFAULT 127 | ||
142 | #define CEPH_MSG_PRIO_HIGH 196 | ||
143 | #define CEPH_MSG_PRIO_HIGHEST 255 | ||
144 | |||
145 | /* | ||
146 | * follows data payload | ||
147 | */ | ||
148 | struct ceph_msg_footer { | ||
149 | __le32 front_crc, middle_crc, data_crc; | ||
150 | __u8 flags; | ||
151 | } __attribute__ ((packed)); | ||
152 | |||
153 | #define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */ | ||
154 | #define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */ | ||
155 | |||
156 | |||
157 | #endif | ||
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h new file mode 100644 index 000000000000..a48cf4ae391e --- /dev/null +++ b/fs/ceph/rados.h | |||
@@ -0,0 +1,372 @@ | |||
1 | #ifndef __RADOS_H | ||
2 | #define __RADOS_H | ||
3 | |||
4 | /* | ||
5 | * Data types for the Ceph distributed object storage layer RADOS | ||
6 | * (Reliable Autonomic Distributed Object Store). | ||
7 | */ | ||
8 | |||
9 | #include "msgr.h" | ||
10 | |||
11 | /* | ||
12 | * fs id | ||
13 | */ | ||
14 | struct ceph_fsid { | ||
15 | unsigned char fsid[16]; | ||
16 | }; | ||
17 | |||
18 | static inline int ceph_fsid_compare(const struct ceph_fsid *a, | ||
19 | const struct ceph_fsid *b) | ||
20 | { | ||
21 | return memcmp(a, b, sizeof(*a)); | ||
22 | } | ||
23 | |||
24 | /* | ||
25 | * ino, object, etc. | ||
26 | */ | ||
27 | typedef __le64 ceph_snapid_t; | ||
28 | #define CEPH_SNAPDIR ((__u64)(-1)) /* reserved for hidden .snap dir */ | ||
29 | #define CEPH_NOSNAP ((__u64)(-2)) /* "head", "live" revision */ | ||
30 | #define CEPH_MAXSNAP ((__u64)(-3)) /* largest valid snapid */ | ||
31 | |||
32 | struct ceph_timespec { | ||
33 | __le32 tv_sec; | ||
34 | __le32 tv_nsec; | ||
35 | } __attribute__ ((packed)); | ||
36 | |||
37 | |||
38 | /* | ||
39 | * object layout - how objects are mapped into PGs | ||
40 | */ | ||
41 | #define CEPH_OBJECT_LAYOUT_HASH 1 | ||
42 | #define CEPH_OBJECT_LAYOUT_LINEAR 2 | ||
43 | #define CEPH_OBJECT_LAYOUT_HASHINO 3 | ||
44 | |||
45 | /* | ||
46 | * pg layout -- how PGs are mapped onto (sets of) OSDs | ||
47 | */ | ||
48 | #define CEPH_PG_LAYOUT_CRUSH 0 | ||
49 | #define CEPH_PG_LAYOUT_HASH 1 | ||
50 | #define CEPH_PG_LAYOUT_LINEAR 2 | ||
51 | #define CEPH_PG_LAYOUT_HYBRID 3 | ||
52 | |||
53 | |||
54 | /* | ||
55 | * placement group. | ||
56 | * we encode this into one __le64. | ||
57 | */ | ||
58 | union ceph_pg { | ||
59 | __u64 pg64; | ||
60 | struct { | ||
61 | __s16 preferred; /* preferred primary osd */ | ||
62 | __u16 ps; /* placement seed */ | ||
63 | __u32 pool; /* object pool */ | ||
64 | } __attribute__ ((packed)) pg; | ||
65 | } __attribute__ ((packed)); | ||
66 | |||
67 | /* | ||
68 | * pg_pool is a set of pgs storing a pool of objects | ||
69 | * | ||
70 | * pg_num -- base number of pseudorandomly placed pgs | ||
71 | * | ||
72 | * pgp_num -- effective number when calculating pg placement. this | ||
73 | * is used for pg_num increases. new pgs result in data being "split" | ||
74 | * into new pgs. for this to proceed smoothly, new pgs are intiially | ||
75 | * colocated with their parents; that is, pgp_num doesn't increase | ||
76 | * until the new pgs have successfully split. only _then_ are the new | ||
77 | * pgs placed independently. | ||
78 | * | ||
79 | * lpg_num -- localized pg count (per device). replicas are randomly | ||
80 | * selected. | ||
81 | * | ||
82 | * lpgp_num -- as above. | ||
83 | */ | ||
84 | #define CEPH_PG_TYPE_REP 1 | ||
85 | #define CEPH_PG_TYPE_RAID4 2 | ||
86 | struct ceph_pg_pool { | ||
87 | __u8 type; /* CEPH_PG_TYPE_* */ | ||
88 | __u8 size; /* number of osds in each pg */ | ||
89 | __u8 crush_ruleset; /* crush placement rule */ | ||
90 | __le32 pg_num, pgp_num; /* number of pg's */ | ||
91 | __le32 lpg_num, lpgp_num; /* number of localized pg's */ | ||
92 | __le32 last_change; /* most recent epoch changed */ | ||
93 | __le64 snap_seq; /* seq for per-pool snapshot */ | ||
94 | __le32 snap_epoch; /* epoch of last snap */ | ||
95 | __le32 num_snaps; | ||
96 | __le32 num_removed_snap_intervals; | ||
97 | } __attribute__ ((packed)); | ||
98 | |||
99 | /* | ||
100 | * stable_mod func is used to control number of placement groups. | ||
101 | * similar to straight-up modulo, but produces a stable mapping as b | ||
102 | * increases over time. b is the number of bins, and bmask is the | ||
103 | * containing power of 2 minus 1. | ||
104 | * | ||
105 | * b <= bmask and bmask=(2**n)-1 | ||
106 | * e.g., b=12 -> bmask=15, b=123 -> bmask=127 | ||
107 | */ | ||
108 | static inline int ceph_stable_mod(int x, int b, int bmask) | ||
109 | { | ||
110 | if ((x & bmask) < b) | ||
111 | return x & bmask; | ||
112 | else | ||
113 | return x & (bmask >> 1); | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * object layout - how a given object should be stored. | ||
118 | */ | ||
119 | struct ceph_object_layout { | ||
120 | __le64 ol_pgid; /* raw pg, with _full_ ps precision. */ | ||
121 | __le32 ol_stripe_unit; /* for per-object parity, if any */ | ||
122 | } __attribute__ ((packed)); | ||
123 | |||
124 | /* | ||
125 | * compound epoch+version, used by storage layer to serialize mutations | ||
126 | */ | ||
127 | struct ceph_eversion { | ||
128 | __le32 epoch; | ||
129 | __le64 version; | ||
130 | } __attribute__ ((packed)); | ||
131 | |||
132 | /* | ||
133 | * osd map bits | ||
134 | */ | ||
135 | |||
136 | /* status bits */ | ||
137 | #define CEPH_OSD_EXISTS 1 | ||
138 | #define CEPH_OSD_UP 2 | ||
139 | |||
140 | /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ | ||
141 | #define CEPH_OSD_IN 0x10000 | ||
142 | #define CEPH_OSD_OUT 0 | ||
143 | |||
144 | |||
145 | /* | ||
146 | * osd map flag bits | ||
147 | */ | ||
148 | #define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ | ||
149 | #define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ | ||
150 | #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ | ||
151 | #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ | ||
152 | #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ | ||
153 | |||
154 | /* | ||
155 | * osd ops | ||
156 | */ | ||
157 | #define CEPH_OSD_OP_MODE 0xf000 | ||
158 | #define CEPH_OSD_OP_MODE_RD 0x1000 | ||
159 | #define CEPH_OSD_OP_MODE_WR 0x2000 | ||
160 | #define CEPH_OSD_OP_MODE_RMW 0x3000 | ||
161 | #define CEPH_OSD_OP_MODE_SUB 0x4000 | ||
162 | #define CEPH_OSD_OP_MODE_EXEC 0x8000 | ||
163 | |||
164 | #define CEPH_OSD_OP_TYPE 0x0f00 | ||
165 | #define CEPH_OSD_OP_TYPE_LOCK 0x0100 | ||
166 | #define CEPH_OSD_OP_TYPE_DATA 0x0200 | ||
167 | #define CEPH_OSD_OP_TYPE_ATTR 0x0300 | ||
168 | #define CEPH_OSD_OP_TYPE_EXEC 0x0400 | ||
169 | #define CEPH_OSD_OP_TYPE_PG 0x0500 | ||
170 | |||
171 | enum { | ||
172 | /** data **/ | ||
173 | /* read */ | ||
174 | CEPH_OSD_OP_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1, | ||
175 | CEPH_OSD_OP_STAT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2, | ||
176 | |||
177 | /* fancy read */ | ||
178 | CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4, | ||
179 | |||
180 | /* write */ | ||
181 | CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1, | ||
182 | CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2, | ||
183 | CEPH_OSD_OP_TRUNCATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3, | ||
184 | CEPH_OSD_OP_ZERO = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4, | ||
185 | CEPH_OSD_OP_DELETE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5, | ||
186 | |||
187 | /* fancy write */ | ||
188 | CEPH_OSD_OP_APPEND = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6, | ||
189 | CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7, | ||
190 | CEPH_OSD_OP_SETTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8, | ||
191 | CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9, | ||
192 | |||
193 | CEPH_OSD_OP_TMAPUP = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10, | ||
194 | CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11, | ||
195 | CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, | ||
196 | |||
197 | CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, | ||
198 | |||
199 | /** attrs **/ | ||
200 | /* read */ | ||
201 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, | ||
202 | CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, | ||
203 | |||
204 | /* write */ | ||
205 | CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, | ||
206 | CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2, | ||
207 | CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3, | ||
208 | CEPH_OSD_OP_RMXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4, | ||
209 | |||
210 | /** subop **/ | ||
211 | CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1, | ||
212 | CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2, | ||
213 | CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3, | ||
214 | CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4, | ||
215 | CEPH_OSD_OP_SCRUB = CEPH_OSD_OP_MODE_SUB | 5, | ||
216 | |||
217 | /** lock **/ | ||
218 | CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, | ||
219 | CEPH_OSD_OP_WRUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2, | ||
220 | CEPH_OSD_OP_RDLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3, | ||
221 | CEPH_OSD_OP_RDUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4, | ||
222 | CEPH_OSD_OP_UPLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5, | ||
223 | CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, | ||
224 | |||
225 | /** exec **/ | ||
226 | CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, | ||
227 | |||
228 | /** pg **/ | ||
229 | CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, | ||
230 | }; | ||
231 | |||
232 | static inline int ceph_osd_op_type_lock(int op) | ||
233 | { | ||
234 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK; | ||
235 | } | ||
236 | static inline int ceph_osd_op_type_data(int op) | ||
237 | { | ||
238 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA; | ||
239 | } | ||
240 | static inline int ceph_osd_op_type_attr(int op) | ||
241 | { | ||
242 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR; | ||
243 | } | ||
244 | static inline int ceph_osd_op_type_exec(int op) | ||
245 | { | ||
246 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC; | ||
247 | } | ||
248 | static inline int ceph_osd_op_type_pg(int op) | ||
249 | { | ||
250 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; | ||
251 | } | ||
252 | |||
253 | static inline int ceph_osd_op_mode_subop(int op) | ||
254 | { | ||
255 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB; | ||
256 | } | ||
257 | static inline int ceph_osd_op_mode_read(int op) | ||
258 | { | ||
259 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; | ||
260 | } | ||
261 | static inline int ceph_osd_op_mode_modify(int op) | ||
262 | { | ||
263 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; | ||
264 | } | ||
265 | |||
266 | #define CEPH_OSD_TMAP_HDR 'h' | ||
267 | #define CEPH_OSD_TMAP_SET 's' | ||
268 | #define CEPH_OSD_TMAP_RM 'r' | ||
269 | |||
270 | extern const char *ceph_osd_op_name(int op); | ||
271 | |||
272 | |||
273 | /* | ||
274 | * osd op flags | ||
275 | * | ||
276 | * An op may be READ, WRITE, or READ|WRITE. | ||
277 | */ | ||
278 | enum { | ||
279 | CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ | ||
280 | CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ | ||
281 | CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ | ||
282 | CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ | ||
283 | CEPH_OSD_FLAG_READ = 16, /* op may read */ | ||
284 | CEPH_OSD_FLAG_WRITE = 32, /* op may write */ | ||
285 | CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ | ||
286 | CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ | ||
287 | CEPH_OSD_FLAG_BALANCE_READS = 256, | ||
288 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ | ||
289 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ | ||
290 | }; | ||
291 | |||
292 | enum { | ||
293 | CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ | ||
294 | }; | ||
295 | |||
296 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ | ||
297 | #define EBLACKLISTED ESHUTDOWN /* blacklisted */ | ||
298 | |||
299 | /* | ||
300 | * an individual object operation. each may be accompanied by some data | ||
301 | * payload | ||
302 | */ | ||
303 | struct ceph_osd_op { | ||
304 | __le16 op; /* CEPH_OSD_OP_* */ | ||
305 | __le32 flags; /* CEPH_OSD_FLAG_* */ | ||
306 | union { | ||
307 | struct { | ||
308 | __le64 offset, length; | ||
309 | } __attribute__ ((packed)) extent; | ||
310 | struct { | ||
311 | __le32 name_len; | ||
312 | __le32 value_len; | ||
313 | } __attribute__ ((packed)) xattr; | ||
314 | struct { | ||
315 | __le64 truncate_size; | ||
316 | __le32 truncate_seq; | ||
317 | } __attribute__ ((packed)) trunc; | ||
318 | struct { | ||
319 | __u8 class_len; | ||
320 | __u8 method_len; | ||
321 | __u8 argc; | ||
322 | __le32 indata_len; | ||
323 | } __attribute__ ((packed)) cls; | ||
324 | struct { | ||
325 | __le64 cookie, count; | ||
326 | } __attribute__ ((packed)) pgls; | ||
327 | }; | ||
328 | __le32 payload_len; | ||
329 | } __attribute__ ((packed)); | ||
330 | |||
331 | /* | ||
332 | * osd request message header. each request may include multiple | ||
333 | * ceph_osd_op object operations. | ||
334 | */ | ||
335 | struct ceph_osd_request_head { | ||
336 | __le64 tid; /* transaction id */ | ||
337 | __le32 client_inc; /* client incarnation */ | ||
338 | struct ceph_object_layout layout; /* pgid */ | ||
339 | __le32 osdmap_epoch; /* client's osdmap epoch */ | ||
340 | |||
341 | __le32 flags; | ||
342 | |||
343 | struct ceph_timespec mtime; /* for mutations only */ | ||
344 | struct ceph_eversion reassert_version; /* if we are replaying op */ | ||
345 | |||
346 | __le32 object_len; /* length of object name */ | ||
347 | |||
348 | __le64 snapid; /* snapid to read */ | ||
349 | __le64 snap_seq; /* writer's snap context */ | ||
350 | __le32 num_snaps; | ||
351 | |||
352 | __le16 num_ops; | ||
353 | struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */ | ||
354 | } __attribute__ ((packed)); | ||
355 | |||
356 | struct ceph_osd_reply_head { | ||
357 | __le64 tid; /* transaction id */ | ||
358 | __le32 client_inc; /* client incarnation */ | ||
359 | __le32 flags; | ||
360 | struct ceph_object_layout layout; | ||
361 | __le32 osdmap_epoch; | ||
362 | struct ceph_eversion reassert_version; /* for replaying uncommitted */ | ||
363 | |||
364 | __le32 result; /* result code */ | ||
365 | |||
366 | __le32 object_len; /* length of object name */ | ||
367 | __le32 num_ops; | ||
368 | struct ceph_osd_op ops[0]; /* ops[], object */ | ||
369 | } __attribute__ ((packed)); | ||
370 | |||
371 | |||
372 | #endif | ||