diff options
| author | Sage Weil <sage@newdream.net> | 2009-10-06 14:31:06 -0400 |
|---|---|---|
| committer | Sage Weil <sage@newdream.net> | 2009-10-06 14:31:06 -0400 |
| commit | 0dee3c28af2fbe22ca62739a7f57da5435d35793 (patch) | |
| tree | dd5992a4abc86c5931ce36258b972dbf48ab355d | |
| parent | 7ad920b504a980adcab4d3f6b85695526e6fd7bb (diff) | |
ceph: on-wire types
These headers describe the types used to exchange messages between the
Ceph client and various servers. All types are little-endian and
packed. These headers are shared between the kernel and userspace, so
all types are in terms of e.g. __u32.
Additionally, we define a few magic values to identify the current
version of the protocol(s) in use, so that discrepancies to be
detected on mount.
Signed-off-by: Sage Weil <sage@newdream.net>
| -rw-r--r-- | fs/ceph/ceph_fs.c | 80 | ||||
| -rw-r--r-- | fs/ceph/ceph_fs.h | 629 | ||||
| -rw-r--r-- | fs/ceph/ceph_strings.c | 163 | ||||
| -rw-r--r-- | fs/ceph/msgr.h | 157 | ||||
| -rw-r--r-- | fs/ceph/rados.h | 372 |
5 files changed, 1401 insertions, 0 deletions
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c new file mode 100644 index 00000000000..9371ff1c000 --- /dev/null +++ b/fs/ceph/ceph_fs.c | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | /* | ||
| 2 | * Some non-inline ceph helpers | ||
| 3 | */ | ||
| 4 | #include "types.h" | ||
| 5 | |||
| 6 | int ceph_flags_to_mode(int flags) | ||
| 7 | { | ||
| 8 | #ifdef O_DIRECTORY /* fixme */ | ||
| 9 | if ((flags & O_DIRECTORY) == O_DIRECTORY) | ||
| 10 | return CEPH_FILE_MODE_PIN; | ||
| 11 | #endif | ||
| 12 | #ifdef O_LAZY | ||
| 13 | if (flags & O_LAZY) | ||
| 14 | return CEPH_FILE_MODE_LAZY; | ||
| 15 | #endif | ||
| 16 | if ((flags & O_APPEND) == O_APPEND) | ||
| 17 | flags |= O_WRONLY; | ||
| 18 | |||
| 19 | flags &= O_ACCMODE; | ||
| 20 | if ((flags & O_RDWR) == O_RDWR) | ||
| 21 | return CEPH_FILE_MODE_RDWR; | ||
| 22 | if ((flags & O_WRONLY) == O_WRONLY) | ||
| 23 | return CEPH_FILE_MODE_WR; | ||
| 24 | return CEPH_FILE_MODE_RD; | ||
| 25 | } | ||
| 26 | |||
| 27 | int ceph_caps_for_mode(int mode) | ||
| 28 | { | ||
| 29 | switch (mode) { | ||
| 30 | case CEPH_FILE_MODE_PIN: | ||
| 31 | return CEPH_CAP_PIN; | ||
| 32 | case CEPH_FILE_MODE_RD: | ||
| 33 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
| 34 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; | ||
| 35 | case CEPH_FILE_MODE_RDWR: | ||
| 36 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
| 37 | CEPH_CAP_FILE_EXCL | | ||
| 38 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | | ||
| 39 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | ||
| 40 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | ||
| 41 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | ||
| 42 | case CEPH_FILE_MODE_WR: | ||
| 43 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
| 44 | CEPH_CAP_FILE_EXCL | | ||
| 45 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | ||
| 46 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | ||
| 47 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | ||
| 48 | } | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | |||
| 52 | /* Name hashing routines. Initial hash value */ | ||
| 53 | /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ | ||
| 54 | #define ceph_init_name_hash() 0 | ||
| 55 | |||
| 56 | /* partial hash update function. Assume roughly 4 bits per character */ | ||
| 57 | static unsigned long ceph_partial_name_hash(unsigned long c, | ||
| 58 | unsigned long prevhash) | ||
| 59 | { | ||
| 60 | return (prevhash + (c << 4) + (c >> 4)) * 11; | ||
| 61 | } | ||
| 62 | |||
| 63 | /* | ||
| 64 | * Finally: cut down the number of bits to a int value (and try to avoid | ||
| 65 | * losing bits) | ||
| 66 | */ | ||
| 67 | static unsigned long ceph_end_name_hash(unsigned long hash) | ||
| 68 | { | ||
| 69 | return hash & 0xffffffff; | ||
| 70 | } | ||
| 71 | |||
| 72 | /* Compute the hash for a name string. */ | ||
| 73 | unsigned int ceph_full_name_hash(const char *name, unsigned int len) | ||
| 74 | { | ||
| 75 | unsigned long hash = ceph_init_name_hash(); | ||
| 76 | while (len--) | ||
| 77 | hash = ceph_partial_name_hash(*name++, hash); | ||
| 78 | return ceph_end_name_hash(hash); | ||
| 79 | } | ||
| 80 | |||
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h new file mode 100644 index 00000000000..21ed51b127f --- /dev/null +++ b/fs/ceph/ceph_fs.h | |||
| @@ -0,0 +1,629 @@ | |||
| 1 | /* | ||
| 2 | * ceph_fs.h - Ceph constants and data types to share between kernel and | ||
| 3 | * user space. | ||
| 4 | * | ||
| 5 | * Most types in this file are defined as little-endian, and are | ||
| 6 | * primarily intended to describe data structures that pass over the | ||
| 7 | * wire or that are stored on disk. | ||
| 8 | * | ||
| 9 | * LGPL2 | ||
| 10 | */ | ||
| 11 | |||
| 12 | #ifndef _FS_CEPH_CEPH_FS_H | ||
| 13 | #define _FS_CEPH_CEPH_FS_H | ||
| 14 | |||
| 15 | #include "msgr.h" | ||
| 16 | #include "rados.h" | ||
| 17 | |||
| 18 | /* | ||
| 19 | * Ceph release version | ||
| 20 | */ | ||
| 21 | #define CEPH_VERSION_MAJOR 0 | ||
| 22 | #define CEPH_VERSION_MINOR 16 | ||
| 23 | #define CEPH_VERSION_PATCH 1 | ||
| 24 | |||
| 25 | #define _CEPH_STRINGIFY(x) #x | ||
| 26 | #define CEPH_STRINGIFY(x) _CEPH_STRINGIFY(x) | ||
| 27 | #define CEPH_MAKE_VERSION(x, y, z) CEPH_STRINGIFY(x) "." CEPH_STRINGIFY(y) \ | ||
| 28 | "." CEPH_STRINGIFY(z) | ||
| 29 | #define CEPH_VERSION CEPH_MAKE_VERSION(CEPH_VERSION_MAJOR, \ | ||
| 30 | CEPH_VERSION_MINOR, CEPH_VERSION_PATCH) | ||
| 31 | |||
| 32 | /* | ||
| 33 | * subprotocol versions. when specific messages types or high-level | ||
| 34 | * protocols change, bump the affected components. we keep rev | ||
| 35 | * internal cluster protocols separately from the public, | ||
| 36 | * client-facing protocol. | ||
| 37 | */ | ||
| 38 | #define CEPH_OSD_PROTOCOL 7 /* cluster internal */ | ||
| 39 | #define CEPH_MDS_PROTOCOL 9 /* cluster internal */ | ||
| 40 | #define CEPH_MON_PROTOCOL 4 /* cluster internal */ | ||
| 41 | #define CEPH_OSDC_PROTOCOL 20 /* server/client */ | ||
| 42 | #define CEPH_MDSC_PROTOCOL 29 /* server/client */ | ||
| 43 | #define CEPH_MONC_PROTOCOL 14 /* server/client */ | ||
| 44 | |||
| 45 | |||
| 46 | #define CEPH_INO_ROOT 1 | ||
| 47 | |||
| 48 | /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ | ||
| 49 | #define CEPH_MAX_MON 31 | ||
| 50 | |||
| 51 | |||
| 52 | unsigned int ceph_full_name_hash(const char *name, unsigned int len); | ||
| 53 | |||
| 54 | |||
| 55 | /* | ||
| 56 | * ceph_file_layout - describe data layout for a file/inode | ||
| 57 | */ | ||
| 58 | struct ceph_file_layout { | ||
| 59 | /* file -> object mapping */ | ||
| 60 | __le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple | ||
| 61 | of page size. */ | ||
| 62 | __le32 fl_stripe_count; /* over this many objects */ | ||
| 63 | __le32 fl_object_size; /* until objects are this big, then move to | ||
| 64 | new objects */ | ||
| 65 | __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */ | ||
| 66 | |||
| 67 | /* pg -> disk layout */ | ||
| 68 | __le32 fl_object_stripe_unit; /* for per-object parity, if any */ | ||
| 69 | |||
| 70 | /* object -> pg layout */ | ||
| 71 | __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ | ||
| 72 | __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ | ||
| 73 | } __attribute__ ((packed)); | ||
| 74 | |||
| 75 | |||
| 76 | |||
| 77 | |||
| 78 | /********************************************* | ||
| 79 | * message layer | ||
| 80 | */ | ||
| 81 | |||
| 82 | /* | ||
| 83 | * message types | ||
| 84 | */ | ||
| 85 | |||
| 86 | /* misc */ | ||
| 87 | #define CEPH_MSG_SHUTDOWN 1 | ||
| 88 | #define CEPH_MSG_PING 2 | ||
| 89 | |||
| 90 | /* client <-> monitor */ | ||
| 91 | #define CEPH_MSG_MON_MAP 4 | ||
| 92 | #define CEPH_MSG_MON_GET_MAP 5 | ||
| 93 | #define CEPH_MSG_CLIENT_MOUNT 10 | ||
| 94 | #define CEPH_MSG_CLIENT_MOUNT_ACK 11 | ||
| 95 | #define CEPH_MSG_STATFS 13 | ||
| 96 | #define CEPH_MSG_STATFS_REPLY 14 | ||
| 97 | #define CEPH_MSG_MON_SUBSCRIBE 15 | ||
| 98 | #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 | ||
| 99 | |||
| 100 | /* client <-> mds */ | ||
| 101 | #define CEPH_MSG_MDS_GETMAP 20 | ||
| 102 | #define CEPH_MSG_MDS_MAP 21 | ||
| 103 | |||
| 104 | #define CEPH_MSG_CLIENT_SESSION 22 | ||
| 105 | #define CEPH_MSG_CLIENT_RECONNECT 23 | ||
| 106 | |||
| 107 | #define CEPH_MSG_CLIENT_REQUEST 24 | ||
| 108 | #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 | ||
| 109 | #define CEPH_MSG_CLIENT_REPLY 26 | ||
| 110 | #define CEPH_MSG_CLIENT_CAPS 0x310 | ||
| 111 | #define CEPH_MSG_CLIENT_LEASE 0x311 | ||
| 112 | #define CEPH_MSG_CLIENT_SNAP 0x312 | ||
| 113 | #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 | ||
| 114 | |||
| 115 | /* osd */ | ||
| 116 | #define CEPH_MSG_OSD_GETMAP 40 | ||
| 117 | #define CEPH_MSG_OSD_MAP 41 | ||
| 118 | #define CEPH_MSG_OSD_OP 42 | ||
| 119 | #define CEPH_MSG_OSD_OPREPLY 43 | ||
| 120 | |||
| 121 | |||
| 122 | struct ceph_mon_statfs { | ||
| 123 | __le64 have_version; | ||
| 124 | struct ceph_fsid fsid; | ||
| 125 | __le64 tid; | ||
| 126 | } __attribute__ ((packed)); | ||
| 127 | |||
| 128 | struct ceph_statfs { | ||
| 129 | __le64 kb, kb_used, kb_avail; | ||
| 130 | __le64 num_objects; | ||
| 131 | } __attribute__ ((packed)); | ||
| 132 | |||
| 133 | struct ceph_mon_statfs_reply { | ||
| 134 | struct ceph_fsid fsid; | ||
| 135 | __le64 tid; | ||
| 136 | __le64 version; | ||
| 137 | struct ceph_statfs st; | ||
| 138 | } __attribute__ ((packed)); | ||
| 139 | |||
| 140 | struct ceph_osd_getmap { | ||
| 141 | __le64 have_version; | ||
| 142 | struct ceph_fsid fsid; | ||
| 143 | __le32 start; | ||
| 144 | } __attribute__ ((packed)); | ||
| 145 | |||
| 146 | struct ceph_mds_getmap { | ||
| 147 | __le64 have_version; | ||
| 148 | struct ceph_fsid fsid; | ||
| 149 | } __attribute__ ((packed)); | ||
| 150 | |||
| 151 | struct ceph_client_mount { | ||
| 152 | __le64 have_version; | ||
| 153 | } __attribute__ ((packed)); | ||
| 154 | |||
| 155 | struct ceph_mon_subscribe_item { | ||
| 156 | __le64 have; | ||
| 157 | __u8 onetime; | ||
| 158 | } __attribute__ ((packed)); | ||
| 159 | |||
| 160 | /* | ||
| 161 | * mds states | ||
| 162 | * > 0 -> in | ||
| 163 | * <= 0 -> out | ||
| 164 | */ | ||
| 165 | #define CEPH_MDS_STATE_DNE 0 /* down, does not exist. */ | ||
| 166 | #define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees. | ||
| 167 | empty log. */ | ||
| 168 | #define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. */ | ||
| 169 | #define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment. */ | ||
| 170 | #define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ | ||
| 171 | #define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ | ||
| 172 | #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ | ||
| 173 | |||
| 174 | #define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ | ||
| 175 | #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed | ||
| 176 | operations (import, rename, etc.) */ | ||
| 177 | #define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */ | ||
| 178 | #define CEPH_MDS_STATE_REJOIN 11 /* up, rejoining distributed cache */ | ||
| 179 | #define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */ | ||
| 180 | #define CEPH_MDS_STATE_ACTIVE 13 /* up, active */ | ||
| 181 | #define CEPH_MDS_STATE_STOPPING 14 /* up, but exporting metadata */ | ||
| 182 | |||
| 183 | extern const char *ceph_mds_state_name(int s); | ||
| 184 | |||
| 185 | |||
| 186 | /* | ||
| 187 | * metadata lock types. | ||
| 188 | * - these are bitmasks.. we can compose them | ||
| 189 | * - they also define the lock ordering by the MDS | ||
| 190 | * - a few of these are internal to the mds | ||
| 191 | */ | ||
| 192 | #define CEPH_LOCK_DN 1 | ||
| 193 | #define CEPH_LOCK_ISNAP 2 | ||
| 194 | #define CEPH_LOCK_IVERSION 4 /* mds internal */ | ||
| 195 | #define CEPH_LOCK_IFILE 8 /* mds internal */ | ||
| 196 | #define CEPH_LOCK_IAUTH 32 | ||
| 197 | #define CEPH_LOCK_ILINK 64 | ||
| 198 | #define CEPH_LOCK_IDFT 128 /* dir frag tree */ | ||
| 199 | #define CEPH_LOCK_INEST 256 /* mds internal */ | ||
| 200 | #define CEPH_LOCK_IXATTR 512 | ||
| 201 | #define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */ | ||
| 202 | |||
| 203 | /* client_session ops */ | ||
| 204 | enum { | ||
| 205 | CEPH_SESSION_REQUEST_OPEN, | ||
| 206 | CEPH_SESSION_OPEN, | ||
| 207 | CEPH_SESSION_REQUEST_CLOSE, | ||
| 208 | CEPH_SESSION_CLOSE, | ||
| 209 | CEPH_SESSION_REQUEST_RENEWCAPS, | ||
| 210 | CEPH_SESSION_RENEWCAPS, | ||
| 211 | CEPH_SESSION_STALE, | ||
| 212 | CEPH_SESSION_RECALL_STATE, | ||
| 213 | }; | ||
| 214 | |||
| 215 | extern const char *ceph_session_op_name(int op); | ||
| 216 | |||
| 217 | struct ceph_mds_session_head { | ||
| 218 | __le32 op; | ||
| 219 | __le64 seq; | ||
| 220 | struct ceph_timespec stamp; | ||
| 221 | __le32 max_caps, max_leases; | ||
| 222 | } __attribute__ ((packed)); | ||
| 223 | |||
| 224 | /* client_request */ | ||
| 225 | /* | ||
| 226 | * metadata ops. | ||
| 227 | * & 0x001000 -> write op | ||
| 228 | * & 0x010000 -> follow symlink (e.g. stat(), not lstat()). | ||
| 229 | & & 0x100000 -> use weird ino/path trace | ||
| 230 | */ | ||
| 231 | #define CEPH_MDS_OP_WRITE 0x001000 | ||
| 232 | enum { | ||
| 233 | CEPH_MDS_OP_LOOKUP = 0x00100, | ||
| 234 | CEPH_MDS_OP_GETATTR = 0x00101, | ||
| 235 | CEPH_MDS_OP_LOOKUPHASH = 0x00102, | ||
| 236 | CEPH_MDS_OP_LOOKUPPARENT = 0x00103, | ||
| 237 | |||
| 238 | CEPH_MDS_OP_SETXATTR = 0x01105, | ||
| 239 | CEPH_MDS_OP_RMXATTR = 0x01106, | ||
| 240 | CEPH_MDS_OP_SETLAYOUT = 0x01107, | ||
| 241 | CEPH_MDS_OP_SETATTR = 0x01108, | ||
| 242 | |||
| 243 | CEPH_MDS_OP_MKNOD = 0x01201, | ||
| 244 | CEPH_MDS_OP_LINK = 0x01202, | ||
| 245 | CEPH_MDS_OP_UNLINK = 0x01203, | ||
| 246 | CEPH_MDS_OP_RENAME = 0x01204, | ||
| 247 | CEPH_MDS_OP_MKDIR = 0x01220, | ||
| 248 | CEPH_MDS_OP_RMDIR = 0x01221, | ||
| 249 | CEPH_MDS_OP_SYMLINK = 0x01222, | ||
| 250 | |||
| 251 | CEPH_MDS_OP_CREATE = 0x00301, | ||
| 252 | CEPH_MDS_OP_OPEN = 0x00302, | ||
| 253 | CEPH_MDS_OP_READDIR = 0x00305, | ||
| 254 | |||
| 255 | CEPH_MDS_OP_LOOKUPSNAP = 0x00400, | ||
| 256 | CEPH_MDS_OP_MKSNAP = 0x01400, | ||
| 257 | CEPH_MDS_OP_RMSNAP = 0x01401, | ||
| 258 | CEPH_MDS_OP_LSSNAP = 0x00402, | ||
| 259 | }; | ||
| 260 | |||
| 261 | extern const char *ceph_mds_op_name(int op); | ||
| 262 | |||
| 263 | |||
| 264 | #define CEPH_SETATTR_MODE 1 | ||
| 265 | #define CEPH_SETATTR_UID 2 | ||
| 266 | #define CEPH_SETATTR_GID 4 | ||
| 267 | #define CEPH_SETATTR_MTIME 8 | ||
| 268 | #define CEPH_SETATTR_ATIME 16 | ||
| 269 | #define CEPH_SETATTR_SIZE 32 | ||
| 270 | #define CEPH_SETATTR_CTIME 64 | ||
| 271 | |||
| 272 | union ceph_mds_request_args { | ||
| 273 | struct { | ||
| 274 | __le32 mask; /* CEPH_CAP_* */ | ||
| 275 | } __attribute__ ((packed)) getattr; | ||
| 276 | struct { | ||
| 277 | __le32 mode; | ||
| 278 | __le32 uid; | ||
| 279 | __le32 gid; | ||
| 280 | struct ceph_timespec mtime; | ||
| 281 | struct ceph_timespec atime; | ||
| 282 | __le64 size, old_size; /* old_size needed by truncate */ | ||
| 283 | __le32 mask; /* CEPH_SETATTR_* */ | ||
| 284 | } __attribute__ ((packed)) setattr; | ||
| 285 | struct { | ||
| 286 | __le32 frag; /* which dir fragment */ | ||
| 287 | __le32 max_entries; /* how many dentries to grab */ | ||
| 288 | } __attribute__ ((packed)) readdir; | ||
| 289 | struct { | ||
| 290 | __le32 mode; | ||
| 291 | __le32 rdev; | ||
| 292 | } __attribute__ ((packed)) mknod; | ||
| 293 | struct { | ||
| 294 | __le32 mode; | ||
| 295 | } __attribute__ ((packed)) mkdir; | ||
| 296 | struct { | ||
| 297 | __le32 flags; | ||
| 298 | __le32 mode; | ||
| 299 | __le32 stripe_unit; /* layout for newly created file */ | ||
| 300 | __le32 stripe_count; /* ... */ | ||
| 301 | __le32 object_size; | ||
| 302 | __le32 file_replication; | ||
| 303 | __le32 preferred; | ||
| 304 | } __attribute__ ((packed)) open; | ||
| 305 | struct { | ||
| 306 | __le32 flags; | ||
| 307 | } __attribute__ ((packed)) setxattr; | ||
| 308 | struct { | ||
| 309 | struct ceph_file_layout layout; | ||
| 310 | } __attribute__ ((packed)) setlayout; | ||
| 311 | } __attribute__ ((packed)); | ||
| 312 | |||
| 313 | #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ | ||
| 314 | #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ | ||
| 315 | |||
| 316 | struct ceph_mds_request_head { | ||
| 317 | __le64 tid, oldest_client_tid; | ||
| 318 | __le32 mdsmap_epoch; /* on client */ | ||
| 319 | __le32 flags; /* CEPH_MDS_FLAG_* */ | ||
| 320 | __u8 num_retry, num_fwd; /* count retry, fwd attempts */ | ||
| 321 | __le16 num_releases; /* # include cap/lease release records */ | ||
| 322 | __le32 op; /* mds op code */ | ||
| 323 | __le32 caller_uid, caller_gid; | ||
| 324 | __le64 ino; /* use this ino for openc, mkdir, mknod, | ||
| 325 | etc. (if replaying) */ | ||
| 326 | union ceph_mds_request_args args; | ||
| 327 | } __attribute__ ((packed)); | ||
| 328 | |||
| 329 | /* cap/lease release record */ | ||
| 330 | struct ceph_mds_request_release { | ||
| 331 | __le64 ino, cap_id; /* ino and unique cap id */ | ||
| 332 | __le32 caps, wanted; /* new issued, wanted */ | ||
| 333 | __le32 seq, issue_seq, mseq; | ||
| 334 | __le32 dname_seq; /* if releasing a dentry lease, a */ | ||
| 335 | __le32 dname_len; /* string follows. */ | ||
| 336 | } __attribute__ ((packed)); | ||
| 337 | |||
| 338 | /* client reply */ | ||
| 339 | struct ceph_mds_reply_head { | ||
| 340 | __le64 tid; | ||
| 341 | __le32 op; | ||
| 342 | __le32 result; | ||
| 343 | __le32 mdsmap_epoch; | ||
| 344 | __u8 safe; /* true if committed to disk */ | ||
| 345 | __u8 is_dentry, is_target; /* true if dentry, target inode records | ||
| 346 | are included with reply */ | ||
| 347 | } __attribute__ ((packed)); | ||
| 348 | |||
| 349 | /* one for each node split */ | ||
| 350 | struct ceph_frag_tree_split { | ||
| 351 | __le32 frag; /* this frag splits... */ | ||
| 352 | __le32 by; /* ...by this many bits */ | ||
| 353 | } __attribute__ ((packed)); | ||
| 354 | |||
| 355 | struct ceph_frag_tree_head { | ||
| 356 | __le32 nsplits; /* num ceph_frag_tree_split records */ | ||
| 357 | struct ceph_frag_tree_split splits[]; | ||
| 358 | } __attribute__ ((packed)); | ||
| 359 | |||
| 360 | /* capability issue, for bundling with mds reply */ | ||
| 361 | struct ceph_mds_reply_cap { | ||
| 362 | __le32 caps, wanted; /* caps issued, wanted */ | ||
| 363 | __le64 cap_id; | ||
| 364 | __le32 seq, mseq; | ||
| 365 | __le64 realm; /* snap realm */ | ||
| 366 | __u8 flags; /* CEPH_CAP_FLAG_* */ | ||
| 367 | } __attribute__ ((packed)); | ||
| 368 | |||
| 369 | #define CEPH_CAP_FLAG_AUTH 1 /* cap is issued by auth mds */ | ||
| 370 | |||
| 371 | /* inode record, for bundling with mds reply */ | ||
| 372 | struct ceph_mds_reply_inode { | ||
| 373 | __le64 ino; | ||
| 374 | __le64 snapid; | ||
| 375 | __le32 rdev; | ||
| 376 | __le64 version; /* inode version */ | ||
| 377 | __le64 xattr_version; /* version for xattr blob */ | ||
| 378 | struct ceph_mds_reply_cap cap; /* caps issued for this inode */ | ||
| 379 | struct ceph_file_layout layout; | ||
| 380 | struct ceph_timespec ctime, mtime, atime; | ||
| 381 | __le32 time_warp_seq; | ||
| 382 | __le64 size, max_size, truncate_size; | ||
| 383 | __le32 truncate_seq; | ||
| 384 | __le32 mode, uid, gid; | ||
| 385 | __le32 nlink; | ||
| 386 | __le64 files, subdirs, rbytes, rfiles, rsubdirs; /* dir stats */ | ||
| 387 | struct ceph_timespec rctime; | ||
| 388 | struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */ | ||
| 389 | } __attribute__ ((packed)); | ||
| 390 | /* followed by frag array, then symlink string, then xattr blob */ | ||
| 391 | |||
| 392 | /* reply_lease follows dname, and reply_inode */ | ||
| 393 | struct ceph_mds_reply_lease { | ||
| 394 | __le16 mask; /* lease type(s) */ | ||
| 395 | __le32 duration_ms; /* lease duration */ | ||
| 396 | __le32 seq; | ||
| 397 | } __attribute__ ((packed)); | ||
| 398 | |||
| 399 | struct ceph_mds_reply_dirfrag { | ||
| 400 | __le32 frag; /* fragment */ | ||
| 401 | __le32 auth; /* auth mds, if this is a delegation point */ | ||
| 402 | __le32 ndist; /* number of mds' this is replicated on */ | ||
| 403 | __le32 dist[]; | ||
| 404 | } __attribute__ ((packed)); | ||
| 405 | |||
| 406 | /* file access modes */ | ||
| 407 | #define CEPH_FILE_MODE_PIN 0 | ||
| 408 | #define CEPH_FILE_MODE_RD 1 | ||
| 409 | #define CEPH_FILE_MODE_WR 2 | ||
| 410 | #define CEPH_FILE_MODE_RDWR 3 /* RD | WR */ | ||
| 411 | #define CEPH_FILE_MODE_LAZY 4 /* lazy io */ | ||
| 412 | #define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */ | ||
| 413 | |||
| 414 | int ceph_flags_to_mode(int flags); | ||
| 415 | |||
| 416 | |||
| 417 | /* capability bits */ | ||
| 418 | #define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */ | ||
| 419 | |||
| 420 | /* generic cap bits */ | ||
| 421 | #define CEPH_CAP_GSHARED 1 /* client can reads */ | ||
| 422 | #define CEPH_CAP_GEXCL 2 /* client can read and update */ | ||
| 423 | #define CEPH_CAP_GCACHE 4 /* (file) client can cache reads */ | ||
| 424 | #define CEPH_CAP_GRD 8 /* (file) client can read */ | ||
| 425 | #define CEPH_CAP_GWR 16 /* (file) client can write */ | ||
| 426 | #define CEPH_CAP_GBUFFER 32 /* (file) client can buffer writes */ | ||
| 427 | #define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ | ||
| 428 | #define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ | ||
| 429 | |||
| 430 | /* per-lock shift */ | ||
| 431 | #define CEPH_CAP_SAUTH 2 | ||
| 432 | #define CEPH_CAP_SLINK 4 | ||
| 433 | #define CEPH_CAP_SXATTR 6 | ||
| 434 | #define CEPH_CAP_SFILE 8 /* goes at the end (uses >2 cap bits) */ | ||
| 435 | |||
| 436 | #define CEPH_CAP_BITS 16 | ||
| 437 | |||
| 438 | /* composed values */ | ||
| 439 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) | ||
| 440 | #define CEPH_CAP_AUTH_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SAUTH) | ||
| 441 | #define CEPH_CAP_LINK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SLINK) | ||
| 442 | #define CEPH_CAP_LINK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SLINK) | ||
| 443 | #define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SXATTR) | ||
| 444 | #define CEPH_CAP_XATTR_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR) | ||
| 445 | #define CEPH_CAP_FILE(x) (x << CEPH_CAP_SFILE) | ||
| 446 | #define CEPH_CAP_FILE_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFILE) | ||
| 447 | #define CEPH_CAP_FILE_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFILE) | ||
| 448 | #define CEPH_CAP_FILE_CACHE (CEPH_CAP_GCACHE << CEPH_CAP_SFILE) | ||
| 449 | #define CEPH_CAP_FILE_RD (CEPH_CAP_GRD << CEPH_CAP_SFILE) | ||
| 450 | #define CEPH_CAP_FILE_WR (CEPH_CAP_GWR << CEPH_CAP_SFILE) | ||
| 451 | #define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) | ||
| 452 | #define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) | ||
| 453 | #define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) | ||
| 454 | |||
| 455 | /* cap masks (for getattr) */ | ||
| 456 | #define CEPH_STAT_CAP_INODE CEPH_CAP_PIN | ||
| 457 | #define CEPH_STAT_CAP_TYPE CEPH_CAP_PIN /* mode >> 12 */ | ||
| 458 | #define CEPH_STAT_CAP_SYMLINK CEPH_CAP_PIN | ||
| 459 | #define CEPH_STAT_CAP_UID CEPH_CAP_AUTH_SHARED | ||
| 460 | #define CEPH_STAT_CAP_GID CEPH_CAP_AUTH_SHARED | ||
| 461 | #define CEPH_STAT_CAP_MODE CEPH_CAP_AUTH_SHARED | ||
| 462 | #define CEPH_STAT_CAP_NLINK CEPH_CAP_LINK_SHARED | ||
| 463 | #define CEPH_STAT_CAP_LAYOUT CEPH_CAP_FILE_SHARED | ||
| 464 | #define CEPH_STAT_CAP_MTIME CEPH_CAP_FILE_SHARED | ||
| 465 | #define CEPH_STAT_CAP_SIZE CEPH_CAP_FILE_SHARED | ||
| 466 | #define CEPH_STAT_CAP_ATIME CEPH_CAP_FILE_SHARED /* fixme */ | ||
| 467 | #define CEPH_STAT_CAP_XATTR CEPH_CAP_XATTR_SHARED | ||
| 468 | #define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN | \ | ||
| 469 | CEPH_CAP_AUTH_SHARED | \ | ||
| 470 | CEPH_CAP_LINK_SHARED | \ | ||
| 471 | CEPH_CAP_FILE_SHARED | \ | ||
| 472 | CEPH_CAP_XATTR_SHARED) | ||
| 473 | |||
| 474 | #define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \ | ||
| 475 | CEPH_CAP_LINK_SHARED | \ | ||
| 476 | CEPH_CAP_XATTR_SHARED | \ | ||
| 477 | CEPH_CAP_FILE_SHARED) | ||
| 478 | #define CEPH_CAP_ANY_RD (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD | \ | ||
| 479 | CEPH_CAP_FILE_CACHE) | ||
| 480 | |||
| 481 | #define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL | \ | ||
| 482 | CEPH_CAP_LINK_EXCL | \ | ||
| 483 | CEPH_CAP_XATTR_EXCL | \ | ||
| 484 | CEPH_CAP_FILE_EXCL) | ||
| 485 | #define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \ | ||
| 486 | CEPH_CAP_FILE_EXCL) | ||
| 487 | #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) | ||
| 488 | #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ | ||
| 489 | CEPH_CAP_ANY_FILE_WR | CEPH_CAP_PIN) | ||
| 490 | |||
| 491 | #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ | ||
| 492 | CEPH_LOCK_IXATTR) | ||
| 493 | |||
| 494 | int ceph_caps_for_mode(int mode); | ||
| 495 | |||
| 496 | enum { | ||
| 497 | CEPH_CAP_OP_GRANT, /* mds->client grant */ | ||
| 498 | CEPH_CAP_OP_REVOKE, /* mds->client revoke */ | ||
| 499 | CEPH_CAP_OP_TRUNC, /* mds->client trunc notify */ | ||
| 500 | CEPH_CAP_OP_EXPORT, /* mds has exported the cap */ | ||
| 501 | CEPH_CAP_OP_IMPORT, /* mds has imported the cap */ | ||
| 502 | CEPH_CAP_OP_UPDATE, /* client->mds update */ | ||
| 503 | CEPH_CAP_OP_DROP, /* client->mds drop cap bits */ | ||
| 504 | CEPH_CAP_OP_FLUSH, /* client->mds cap writeback */ | ||
| 505 | CEPH_CAP_OP_FLUSH_ACK, /* mds->client flushed */ | ||
| 506 | CEPH_CAP_OP_FLUSHSNAP, /* client->mds flush snapped metadata */ | ||
| 507 | CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */ | ||
| 508 | CEPH_CAP_OP_RELEASE, /* client->mds release (clean) cap */ | ||
| 509 | CEPH_CAP_OP_RENEW, /* client->mds renewal request */ | ||
| 510 | }; | ||
| 511 | |||
| 512 | extern const char *ceph_cap_op_name(int op); | ||
| 513 | |||
| 514 | /* | ||
| 515 | * caps message, used for capability callbacks, acks, requests, etc. | ||
| 516 | */ | ||
| 517 | struct ceph_mds_caps { | ||
| 518 | __le32 op; /* CEPH_CAP_OP_* */ | ||
| 519 | __le64 ino, realm; | ||
| 520 | __le64 cap_id; | ||
| 521 | __le32 seq, issue_seq; | ||
| 522 | __le32 caps, wanted, dirty; /* latest issued/wanted/dirty */ | ||
| 523 | __le32 migrate_seq; | ||
| 524 | __le64 snap_follows; | ||
| 525 | __le32 snap_trace_len; | ||
| 526 | __le64 client_tid; /* for FLUSH(SNAP) -> FLUSH(SNAP)_ACK */ | ||
| 527 | |||
| 528 | /* authlock */ | ||
| 529 | __le32 uid, gid, mode; | ||
| 530 | |||
| 531 | /* linklock */ | ||
| 532 | __le32 nlink; | ||
| 533 | |||
| 534 | /* xattrlock */ | ||
| 535 | __le32 xattr_len; | ||
| 536 | __le64 xattr_version; | ||
| 537 | |||
| 538 | /* filelock */ | ||
| 539 | __le64 size, max_size, truncate_size; | ||
| 540 | __le32 truncate_seq; | ||
| 541 | struct ceph_timespec mtime, atime, ctime; | ||
| 542 | struct ceph_file_layout layout; | ||
| 543 | __le32 time_warp_seq; | ||
| 544 | } __attribute__ ((packed)); | ||
| 545 | |||
| 546 | /* cap release msg head */ | ||
| 547 | struct ceph_mds_cap_release { | ||
| 548 | __le32 num; /* number of cap_items that follow */ | ||
| 549 | } __attribute__ ((packed)); | ||
| 550 | |||
| 551 | struct ceph_mds_cap_item { | ||
| 552 | __le64 ino; | ||
| 553 | __le64 cap_id; | ||
| 554 | __le32 migrate_seq, seq; | ||
| 555 | } __attribute__ ((packed)); | ||
| 556 | |||
| 557 | #define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */ | ||
| 558 | #define CEPH_MDS_LEASE_RELEASE 2 /* client -> mds */ | ||
| 559 | #define CEPH_MDS_LEASE_RENEW 3 /* client <-> mds */ | ||
| 560 | #define CEPH_MDS_LEASE_REVOKE_ACK 4 /* client -> mds */ | ||
| 561 | |||
| 562 | extern const char *ceph_lease_op_name(int o); | ||
| 563 | |||
| 564 | /* lease msg header */ | ||
| 565 | struct ceph_mds_lease { | ||
| 566 | __u8 action; /* CEPH_MDS_LEASE_* */ | ||
| 567 | __le16 mask; /* which lease */ | ||
| 568 | __le64 ino; | ||
| 569 | __le64 first, last; /* snap range */ | ||
| 570 | __le32 seq; | ||
| 571 | __le32 duration_ms; /* duration of renewal */ | ||
| 572 | } __attribute__ ((packed)); | ||
| 573 | /* followed by a __le32+string for dname */ | ||
| 574 | |||
| 575 | /* client reconnect */ | ||
| 576 | struct ceph_mds_cap_reconnect { | ||
| 577 | __le64 cap_id; | ||
| 578 | __le32 wanted; | ||
| 579 | __le32 issued; | ||
| 580 | __le64 size; | ||
| 581 | struct ceph_timespec mtime, atime; | ||
| 582 | __le64 snaprealm; | ||
| 583 | __le64 pathbase; /* base ino for our path to this ino */ | ||
| 584 | } __attribute__ ((packed)); | ||
| 585 | /* followed by encoded string */ | ||
| 586 | |||
| 587 | struct ceph_mds_snaprealm_reconnect { | ||
| 588 | __le64 ino; /* snap realm base */ | ||
| 589 | __le64 seq; /* snap seq for this snap realm */ | ||
| 590 | __le64 parent; /* parent realm */ | ||
| 591 | } __attribute__ ((packed)); | ||
| 592 | |||
| 593 | /* | ||
| 594 | * snaps | ||
| 595 | */ | ||
| 596 | enum { | ||
| 597 | CEPH_SNAP_OP_UPDATE, /* CREATE or DESTROY */ | ||
| 598 | CEPH_SNAP_OP_CREATE, | ||
| 599 | CEPH_SNAP_OP_DESTROY, | ||
| 600 | CEPH_SNAP_OP_SPLIT, | ||
| 601 | }; | ||
| 602 | |||
| 603 | extern const char *ceph_snap_op_name(int o); | ||
| 604 | |||
| 605 | /* snap msg header */ | ||
| 606 | struct ceph_mds_snap_head { | ||
| 607 | __le32 op; /* CEPH_SNAP_OP_* */ | ||
| 608 | __le64 split; /* ino to split off, if any */ | ||
| 609 | __le32 num_split_inos; /* # inos belonging to new child realm */ | ||
| 610 | __le32 num_split_realms; /* # child realms udner new child realm */ | ||
| 611 | __le32 trace_len; /* size of snap trace blob */ | ||
| 612 | } __attribute__ ((packed)); | ||
| 613 | /* followed by split ino list, then split realms, then the trace blob */ | ||
| 614 | |||
| 615 | /* | ||
| 616 | * encode info about a snaprealm, as viewed by a client | ||
| 617 | */ | ||
| 618 | struct ceph_mds_snap_realm { | ||
| 619 | __le64 ino; /* ino */ | ||
| 620 | __le64 created; /* snap: when created */ | ||
| 621 | __le64 parent; /* ino: parent realm */ | ||
| 622 | __le64 parent_since; /* snap: same parent since */ | ||
| 623 | __le64 seq; /* snap: version */ | ||
| 624 | __le32 num_snaps; | ||
| 625 | __le32 num_prior_parent_snaps; | ||
| 626 | } __attribute__ ((packed)); | ||
| 627 | /* followed by my snap list, then prior parent snap list */ | ||
| 628 | |||
| 629 | #endif | ||
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c new file mode 100644 index 00000000000..90d19d9d8d8 --- /dev/null +++ b/fs/ceph/ceph_strings.c | |||
| @@ -0,0 +1,163 @@ | |||
| 1 | /* | ||
| 2 | * Ceph string constants | ||
| 3 | */ | ||
| 4 | #include "types.h" | ||
| 5 | |||
| 6 | const char *ceph_osd_op_name(int op) | ||
| 7 | { | ||
| 8 | switch (op) { | ||
| 9 | case CEPH_OSD_OP_READ: return "read"; | ||
| 10 | case CEPH_OSD_OP_STAT: return "stat"; | ||
| 11 | |||
| 12 | case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; | ||
| 13 | |||
| 14 | case CEPH_OSD_OP_WRITE: return "write"; | ||
| 15 | case CEPH_OSD_OP_DELETE: return "delete"; | ||
| 16 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | ||
| 17 | case CEPH_OSD_OP_ZERO: return "zero"; | ||
| 18 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | ||
| 19 | |||
| 20 | case CEPH_OSD_OP_APPEND: return "append"; | ||
| 21 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | ||
| 22 | case CEPH_OSD_OP_SETTRUNC: return "settrunc"; | ||
| 23 | case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; | ||
| 24 | |||
| 25 | case CEPH_OSD_OP_TMAPUP: return "tmapup"; | ||
| 26 | case CEPH_OSD_OP_TMAPGET: return "tmapget"; | ||
| 27 | case CEPH_OSD_OP_TMAPPUT: return "tmapput"; | ||
| 28 | |||
| 29 | case CEPH_OSD_OP_GETXATTR: return "getxattr"; | ||
| 30 | case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; | ||
| 31 | case CEPH_OSD_OP_SETXATTR: return "setxattr"; | ||
| 32 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | ||
| 33 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | ||
| 34 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | ||
| 35 | |||
| 36 | case CEPH_OSD_OP_PULL: return "pull"; | ||
| 37 | case CEPH_OSD_OP_PUSH: return "push"; | ||
| 38 | case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; | ||
| 39 | case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; | ||
| 40 | case CEPH_OSD_OP_SCRUB: return "scrub"; | ||
| 41 | |||
| 42 | case CEPH_OSD_OP_WRLOCK: return "wrlock"; | ||
| 43 | case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; | ||
| 44 | case CEPH_OSD_OP_RDLOCK: return "rdlock"; | ||
| 45 | case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; | ||
| 46 | case CEPH_OSD_OP_UPLOCK: return "uplock"; | ||
| 47 | case CEPH_OSD_OP_DNLOCK: return "dnlock"; | ||
| 48 | |||
| 49 | case CEPH_OSD_OP_CALL: return "call"; | ||
| 50 | |||
| 51 | case CEPH_OSD_OP_PGLS: return "pgls"; | ||
| 52 | } | ||
| 53 | return "???"; | ||
| 54 | } | ||
| 55 | |||
| 56 | const char *ceph_mds_state_name(int s) | ||
| 57 | { | ||
| 58 | switch (s) { | ||
| 59 | /* down and out */ | ||
| 60 | case CEPH_MDS_STATE_DNE: return "down:dne"; | ||
| 61 | case CEPH_MDS_STATE_STOPPED: return "down:stopped"; | ||
| 62 | /* up and out */ | ||
| 63 | case CEPH_MDS_STATE_BOOT: return "up:boot"; | ||
| 64 | case CEPH_MDS_STATE_STANDBY: return "up:standby"; | ||
| 65 | case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay"; | ||
| 66 | case CEPH_MDS_STATE_CREATING: return "up:creating"; | ||
| 67 | case CEPH_MDS_STATE_STARTING: return "up:starting"; | ||
| 68 | /* up and in */ | ||
| 69 | case CEPH_MDS_STATE_REPLAY: return "up:replay"; | ||
| 70 | case CEPH_MDS_STATE_RESOLVE: return "up:resolve"; | ||
| 71 | case CEPH_MDS_STATE_RECONNECT: return "up:reconnect"; | ||
| 72 | case CEPH_MDS_STATE_REJOIN: return "up:rejoin"; | ||
| 73 | case CEPH_MDS_STATE_CLIENTREPLAY: return "up:clientreplay"; | ||
| 74 | case CEPH_MDS_STATE_ACTIVE: return "up:active"; | ||
| 75 | case CEPH_MDS_STATE_STOPPING: return "up:stopping"; | ||
| 76 | } | ||
| 77 | return "???"; | ||
| 78 | } | ||
| 79 | |||
| 80 | const char *ceph_session_op_name(int op) | ||
| 81 | { | ||
| 82 | switch (op) { | ||
| 83 | case CEPH_SESSION_REQUEST_OPEN: return "request_open"; | ||
| 84 | case CEPH_SESSION_OPEN: return "open"; | ||
| 85 | case CEPH_SESSION_REQUEST_CLOSE: return "request_close"; | ||
| 86 | case CEPH_SESSION_CLOSE: return "close"; | ||
| 87 | case CEPH_SESSION_REQUEST_RENEWCAPS: return "request_renewcaps"; | ||
| 88 | case CEPH_SESSION_RENEWCAPS: return "renewcaps"; | ||
| 89 | case CEPH_SESSION_STALE: return "stale"; | ||
| 90 | case CEPH_SESSION_RECALL_STATE: return "recall_state"; | ||
| 91 | } | ||
| 92 | return "???"; | ||
| 93 | } | ||
| 94 | |||
| 95 | const char *ceph_mds_op_name(int op) | ||
| 96 | { | ||
| 97 | switch (op) { | ||
| 98 | case CEPH_MDS_OP_LOOKUP: return "lookup"; | ||
| 99 | case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; | ||
| 100 | case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; | ||
| 101 | case CEPH_MDS_OP_GETATTR: return "getattr"; | ||
| 102 | case CEPH_MDS_OP_SETXATTR: return "setxattr"; | ||
| 103 | case CEPH_MDS_OP_SETATTR: return "setattr"; | ||
| 104 | case CEPH_MDS_OP_RMXATTR: return "rmxattr"; | ||
| 105 | case CEPH_MDS_OP_READDIR: return "readdir"; | ||
| 106 | case CEPH_MDS_OP_MKNOD: return "mknod"; | ||
| 107 | case CEPH_MDS_OP_LINK: return "link"; | ||
| 108 | case CEPH_MDS_OP_UNLINK: return "unlink"; | ||
| 109 | case CEPH_MDS_OP_RENAME: return "rename"; | ||
| 110 | case CEPH_MDS_OP_MKDIR: return "mkdir"; | ||
| 111 | case CEPH_MDS_OP_RMDIR: return "rmdir"; | ||
| 112 | case CEPH_MDS_OP_SYMLINK: return "symlink"; | ||
| 113 | case CEPH_MDS_OP_CREATE: return "create"; | ||
| 114 | case CEPH_MDS_OP_OPEN: return "open"; | ||
| 115 | case CEPH_MDS_OP_LOOKUPSNAP: return "lookupsnap"; | ||
| 116 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; | ||
| 117 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; | ||
| 118 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; | ||
| 119 | } | ||
| 120 | return "???"; | ||
| 121 | } | ||
| 122 | |||
| 123 | const char *ceph_cap_op_name(int op) | ||
| 124 | { | ||
| 125 | switch (op) { | ||
| 126 | case CEPH_CAP_OP_GRANT: return "grant"; | ||
| 127 | case CEPH_CAP_OP_REVOKE: return "revoke"; | ||
| 128 | case CEPH_CAP_OP_TRUNC: return "trunc"; | ||
| 129 | case CEPH_CAP_OP_EXPORT: return "export"; | ||
| 130 | case CEPH_CAP_OP_IMPORT: return "import"; | ||
| 131 | case CEPH_CAP_OP_UPDATE: return "update"; | ||
| 132 | case CEPH_CAP_OP_DROP: return "drop"; | ||
| 133 | case CEPH_CAP_OP_FLUSH: return "flush"; | ||
| 134 | case CEPH_CAP_OP_FLUSH_ACK: return "flush_ack"; | ||
| 135 | case CEPH_CAP_OP_FLUSHSNAP: return "flushsnap"; | ||
| 136 | case CEPH_CAP_OP_FLUSHSNAP_ACK: return "flushsnap_ack"; | ||
| 137 | case CEPH_CAP_OP_RELEASE: return "release"; | ||
| 138 | case CEPH_CAP_OP_RENEW: return "renew"; | ||
| 139 | } | ||
| 140 | return "???"; | ||
| 141 | } | ||
| 142 | |||
| 143 | const char *ceph_lease_op_name(int o) | ||
| 144 | { | ||
| 145 | switch (o) { | ||
| 146 | case CEPH_MDS_LEASE_REVOKE: return "revoke"; | ||
| 147 | case CEPH_MDS_LEASE_RELEASE: return "release"; | ||
| 148 | case CEPH_MDS_LEASE_RENEW: return "renew"; | ||
| 149 | case CEPH_MDS_LEASE_REVOKE_ACK: return "revoke_ack"; | ||
| 150 | } | ||
| 151 | return "???"; | ||
| 152 | } | ||
| 153 | |||
| 154 | const char *ceph_snap_op_name(int o) | ||
| 155 | { | ||
| 156 | switch (o) { | ||
| 157 | case CEPH_SNAP_OP_UPDATE: return "update"; | ||
| 158 | case CEPH_SNAP_OP_CREATE: return "create"; | ||
| 159 | case CEPH_SNAP_OP_DESTROY: return "destroy"; | ||
| 160 | case CEPH_SNAP_OP_SPLIT: return "split"; | ||
| 161 | } | ||
| 162 | return "???"; | ||
| 163 | } | ||
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h new file mode 100644 index 00000000000..73921ae43fa --- /dev/null +++ b/fs/ceph/msgr.h | |||
| @@ -0,0 +1,157 @@ | |||
| 1 | #ifndef __MSGR_H | ||
| 2 | #define __MSGR_H | ||
| 3 | |||
| 4 | /* | ||
| 5 | * Data types for message passing layer used by Ceph. | ||
| 6 | */ | ||
| 7 | |||
| 8 | #define CEPH_MON_PORT 6789 /* default monitor port */ | ||
| 9 | |||
| 10 | /* | ||
| 11 | * client-side processes will try to bind to ports in this | ||
| 12 | * range, simply for the benefit of tools like nmap or wireshark | ||
| 13 | * that would like to identify the protocol. | ||
| 14 | */ | ||
| 15 | #define CEPH_PORT_FIRST 6789 | ||
| 16 | #define CEPH_PORT_START 6800 /* non-monitors start here */ | ||
| 17 | #define CEPH_PORT_LAST 6900 | ||
| 18 | |||
| 19 | /* | ||
| 20 | * tcp connection banner. include a protocol version. and adjust | ||
| 21 | * whenever the wire protocol changes. try to keep this string length | ||
| 22 | * constant. | ||
| 23 | */ | ||
| 24 | #define CEPH_BANNER "ceph v021" | ||
| 25 | #define CEPH_BANNER_MAX_LEN 30 | ||
| 26 | |||
| 27 | |||
| 28 | /* | ||
| 29 | * Rollover-safe type and comparator for 32-bit sequence numbers. | ||
| 30 | * Comparator returns -1, 0, or 1. | ||
| 31 | */ | ||
| 32 | typedef __u32 ceph_seq_t; | ||
| 33 | |||
| 34 | static inline __s32 ceph_seq_cmp(__u32 a, __u32 b) | ||
| 35 | { | ||
| 36 | return (__s32)a - (__s32)b; | ||
| 37 | } | ||
| 38 | |||
| 39 | |||
| 40 | /* | ||
| 41 | * entity_name -- logical name for a process participating in the | ||
| 42 | * network, e.g. 'mds0' or 'osd3'. | ||
| 43 | */ | ||
| 44 | struct ceph_entity_name { | ||
| 45 | __u8 type; /* CEPH_ENTITY_TYPE_* */ | ||
| 46 | __le64 num; | ||
| 47 | } __attribute__ ((packed)); | ||
| 48 | |||
| 49 | #define CEPH_ENTITY_TYPE_MON 1 | ||
| 50 | #define CEPH_ENTITY_TYPE_MDS 2 | ||
| 51 | #define CEPH_ENTITY_TYPE_OSD 3 | ||
| 52 | #define CEPH_ENTITY_TYPE_CLIENT 4 | ||
| 53 | #define CEPH_ENTITY_TYPE_ADMIN 5 | ||
| 54 | |||
| 55 | /* | ||
| 56 | * entity_addr -- network address | ||
| 57 | */ | ||
| 58 | struct ceph_entity_addr { | ||
| 59 | __le32 erank; /* entity's rank in process */ | ||
| 60 | __le32 nonce; /* unique id for process (e.g. pid) */ | ||
| 61 | struct sockaddr_storage in_addr; | ||
| 62 | } __attribute__ ((packed)); | ||
| 63 | |||
| 64 | static inline bool ceph_entity_addr_is_local(const struct ceph_entity_addr *a, | ||
| 65 | const struct ceph_entity_addr *b) | ||
| 66 | { | ||
| 67 | return a->nonce == b->nonce && | ||
| 68 | memcmp(&a->in_addr, &b->in_addr, sizeof(a->in_addr)) == 0; | ||
| 69 | } | ||
| 70 | |||
| 71 | static inline bool ceph_entity_addr_equal(const struct ceph_entity_addr *a, | ||
| 72 | const struct ceph_entity_addr *b) | ||
| 73 | { | ||
| 74 | return memcmp(a, b, sizeof(*a)) == 0; | ||
| 75 | } | ||
| 76 | |||
| 77 | struct ceph_entity_inst { | ||
| 78 | struct ceph_entity_name name; | ||
| 79 | struct ceph_entity_addr addr; | ||
| 80 | } __attribute__ ((packed)); | ||
| 81 | |||
| 82 | |||
| 83 | /* used by message exchange protocol */ | ||
| 84 | #define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */ | ||
| 85 | #define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */ | ||
| 86 | #define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing | ||
| 87 | incoming connection */ | ||
| 88 | #define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again | ||
| 89 | with higher cseq */ | ||
| 90 | #define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again | ||
| 91 | with higher gseq */ | ||
| 92 | #define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */ | ||
| 93 | #define CEPH_MSGR_TAG_MSG 7 /* message */ | ||
| 94 | #define CEPH_MSGR_TAG_ACK 8 /* message ack */ | ||
| 95 | #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ | ||
| 96 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ | ||
| 97 | |||
| 98 | |||
| 99 | /* | ||
| 100 | * connection negotiation | ||
| 101 | */ | ||
| 102 | struct ceph_msg_connect { | ||
| 103 | __le32 host_type; /* CEPH_ENTITY_TYPE_* */ | ||
| 104 | __le32 global_seq; /* count connections initiated by this host */ | ||
| 105 | __le32 connect_seq; /* count connections initiated in this session */ | ||
| 106 | __le32 protocol_version; | ||
| 107 | __u8 flags; /* CEPH_MSG_CONNECT_* */ | ||
| 108 | } __attribute__ ((packed)); | ||
| 109 | |||
| 110 | struct ceph_msg_connect_reply { | ||
| 111 | __u8 tag; | ||
| 112 | __le32 global_seq; | ||
| 113 | __le32 connect_seq; | ||
| 114 | __le32 protocol_version; | ||
| 115 | __u8 flags; | ||
| 116 | } __attribute__ ((packed)); | ||
| 117 | |||
| 118 | #define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */ | ||
| 119 | |||
| 120 | |||
| 121 | /* | ||
| 122 | * message header | ||
| 123 | */ | ||
| 124 | struct ceph_msg_header { | ||
| 125 | __le64 seq; /* message seq# for this session */ | ||
| 126 | __le16 type; /* message type */ | ||
| 127 | __le16 priority; /* priority. higher value == higher priority */ | ||
| 128 | |||
| 129 | __le32 front_len; /* bytes in main payload */ | ||
| 130 | __le32 middle_len;/* bytes in middle payload */ | ||
| 131 | __le32 data_len; /* bytes of data payload */ | ||
| 132 | __le16 data_off; /* sender: include full offset; | ||
| 133 | receiver: mask against ~PAGE_MASK */ | ||
| 134 | |||
| 135 | struct ceph_entity_inst src, orig_src; | ||
| 136 | __le32 dst_erank; | ||
| 137 | __le32 crc; /* header crc32c */ | ||
| 138 | } __attribute__ ((packed)); | ||
| 139 | |||
| 140 | #define CEPH_MSG_PRIO_LOW 64 | ||
| 141 | #define CEPH_MSG_PRIO_DEFAULT 127 | ||
| 142 | #define CEPH_MSG_PRIO_HIGH 196 | ||
| 143 | #define CEPH_MSG_PRIO_HIGHEST 255 | ||
| 144 | |||
| 145 | /* | ||
| 146 | * follows data payload | ||
| 147 | */ | ||
| 148 | struct ceph_msg_footer { | ||
| 149 | __le32 front_crc, middle_crc, data_crc; | ||
| 150 | __u8 flags; | ||
| 151 | } __attribute__ ((packed)); | ||
| 152 | |||
| 153 | #define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */ | ||
| 154 | #define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */ | ||
| 155 | |||
| 156 | |||
| 157 | #endif | ||
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h new file mode 100644 index 00000000000..a48cf4ae391 --- /dev/null +++ b/fs/ceph/rados.h | |||
| @@ -0,0 +1,372 @@ | |||
| 1 | #ifndef __RADOS_H | ||
| 2 | #define __RADOS_H | ||
| 3 | |||
| 4 | /* | ||
| 5 | * Data types for the Ceph distributed object storage layer RADOS | ||
| 6 | * (Reliable Autonomic Distributed Object Store). | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "msgr.h" | ||
| 10 | |||
| 11 | /* | ||
| 12 | * fs id | ||
| 13 | */ | ||
| 14 | struct ceph_fsid { | ||
| 15 | unsigned char fsid[16]; | ||
| 16 | }; | ||
| 17 | |||
| 18 | static inline int ceph_fsid_compare(const struct ceph_fsid *a, | ||
| 19 | const struct ceph_fsid *b) | ||
| 20 | { | ||
| 21 | return memcmp(a, b, sizeof(*a)); | ||
| 22 | } | ||
| 23 | |||
| 24 | /* | ||
| 25 | * ino, object, etc. | ||
| 26 | */ | ||
| 27 | typedef __le64 ceph_snapid_t; | ||
| 28 | #define CEPH_SNAPDIR ((__u64)(-1)) /* reserved for hidden .snap dir */ | ||
| 29 | #define CEPH_NOSNAP ((__u64)(-2)) /* "head", "live" revision */ | ||
| 30 | #define CEPH_MAXSNAP ((__u64)(-3)) /* largest valid snapid */ | ||
| 31 | |||
| 32 | struct ceph_timespec { | ||
| 33 | __le32 tv_sec; | ||
| 34 | __le32 tv_nsec; | ||
| 35 | } __attribute__ ((packed)); | ||
| 36 | |||
| 37 | |||
| 38 | /* | ||
| 39 | * object layout - how objects are mapped into PGs | ||
| 40 | */ | ||
| 41 | #define CEPH_OBJECT_LAYOUT_HASH 1 | ||
| 42 | #define CEPH_OBJECT_LAYOUT_LINEAR 2 | ||
| 43 | #define CEPH_OBJECT_LAYOUT_HASHINO 3 | ||
| 44 | |||
| 45 | /* | ||
| 46 | * pg layout -- how PGs are mapped onto (sets of) OSDs | ||
| 47 | */ | ||
| 48 | #define CEPH_PG_LAYOUT_CRUSH 0 | ||
| 49 | #define CEPH_PG_LAYOUT_HASH 1 | ||
| 50 | #define CEPH_PG_LAYOUT_LINEAR 2 | ||
| 51 | #define CEPH_PG_LAYOUT_HYBRID 3 | ||
| 52 | |||
| 53 | |||
| 54 | /* | ||
| 55 | * placement group. | ||
| 56 | * we encode this into one __le64. | ||
| 57 | */ | ||
| 58 | union ceph_pg { | ||
| 59 | __u64 pg64; | ||
| 60 | struct { | ||
| 61 | __s16 preferred; /* preferred primary osd */ | ||
| 62 | __u16 ps; /* placement seed */ | ||
| 63 | __u32 pool; /* object pool */ | ||
| 64 | } __attribute__ ((packed)) pg; | ||
| 65 | } __attribute__ ((packed)); | ||
| 66 | |||
| 67 | /* | ||
| 68 | * pg_pool is a set of pgs storing a pool of objects | ||
| 69 | * | ||
| 70 | * pg_num -- base number of pseudorandomly placed pgs | ||
| 71 | * | ||
| 72 | * pgp_num -- effective number when calculating pg placement. this | ||
| 73 | * is used for pg_num increases. new pgs result in data being "split" | ||
| 74 | * into new pgs. for this to proceed smoothly, new pgs are intiially | ||
| 75 | * colocated with their parents; that is, pgp_num doesn't increase | ||
| 76 | * until the new pgs have successfully split. only _then_ are the new | ||
| 77 | * pgs placed independently. | ||
| 78 | * | ||
| 79 | * lpg_num -- localized pg count (per device). replicas are randomly | ||
| 80 | * selected. | ||
| 81 | * | ||
| 82 | * lpgp_num -- as above. | ||
| 83 | */ | ||
| 84 | #define CEPH_PG_TYPE_REP 1 | ||
| 85 | #define CEPH_PG_TYPE_RAID4 2 | ||
| 86 | struct ceph_pg_pool { | ||
| 87 | __u8 type; /* CEPH_PG_TYPE_* */ | ||
| 88 | __u8 size; /* number of osds in each pg */ | ||
| 89 | __u8 crush_ruleset; /* crush placement rule */ | ||
| 90 | __le32 pg_num, pgp_num; /* number of pg's */ | ||
| 91 | __le32 lpg_num, lpgp_num; /* number of localized pg's */ | ||
| 92 | __le32 last_change; /* most recent epoch changed */ | ||
| 93 | __le64 snap_seq; /* seq for per-pool snapshot */ | ||
| 94 | __le32 snap_epoch; /* epoch of last snap */ | ||
| 95 | __le32 num_snaps; | ||
| 96 | __le32 num_removed_snap_intervals; | ||
| 97 | } __attribute__ ((packed)); | ||
| 98 | |||
| 99 | /* | ||
| 100 | * stable_mod func is used to control number of placement groups. | ||
| 101 | * similar to straight-up modulo, but produces a stable mapping as b | ||
| 102 | * increases over time. b is the number of bins, and bmask is the | ||
| 103 | * containing power of 2 minus 1. | ||
| 104 | * | ||
| 105 | * b <= bmask and bmask=(2**n)-1 | ||
| 106 | * e.g., b=12 -> bmask=15, b=123 -> bmask=127 | ||
| 107 | */ | ||
| 108 | static inline int ceph_stable_mod(int x, int b, int bmask) | ||
| 109 | { | ||
| 110 | if ((x & bmask) < b) | ||
| 111 | return x & bmask; | ||
| 112 | else | ||
| 113 | return x & (bmask >> 1); | ||
| 114 | } | ||
| 115 | |||
| 116 | /* | ||
| 117 | * object layout - how a given object should be stored. | ||
| 118 | */ | ||
| 119 | struct ceph_object_layout { | ||
| 120 | __le64 ol_pgid; /* raw pg, with _full_ ps precision. */ | ||
| 121 | __le32 ol_stripe_unit; /* for per-object parity, if any */ | ||
| 122 | } __attribute__ ((packed)); | ||
| 123 | |||
| 124 | /* | ||
| 125 | * compound epoch+version, used by storage layer to serialize mutations | ||
| 126 | */ | ||
| 127 | struct ceph_eversion { | ||
| 128 | __le32 epoch; | ||
| 129 | __le64 version; | ||
| 130 | } __attribute__ ((packed)); | ||
| 131 | |||
| 132 | /* | ||
| 133 | * osd map bits | ||
| 134 | */ | ||
| 135 | |||
| 136 | /* status bits */ | ||
| 137 | #define CEPH_OSD_EXISTS 1 | ||
| 138 | #define CEPH_OSD_UP 2 | ||
| 139 | |||
| 140 | /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ | ||
| 141 | #define CEPH_OSD_IN 0x10000 | ||
| 142 | #define CEPH_OSD_OUT 0 | ||
| 143 | |||
| 144 | |||
| 145 | /* | ||
| 146 | * osd map flag bits | ||
| 147 | */ | ||
| 148 | #define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ | ||
| 149 | #define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ | ||
| 150 | #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ | ||
| 151 | #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ | ||
| 152 | #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ | ||
| 153 | |||
| 154 | /* | ||
| 155 | * osd ops | ||
| 156 | */ | ||
| 157 | #define CEPH_OSD_OP_MODE 0xf000 | ||
| 158 | #define CEPH_OSD_OP_MODE_RD 0x1000 | ||
| 159 | #define CEPH_OSD_OP_MODE_WR 0x2000 | ||
| 160 | #define CEPH_OSD_OP_MODE_RMW 0x3000 | ||
| 161 | #define CEPH_OSD_OP_MODE_SUB 0x4000 | ||
| 162 | #define CEPH_OSD_OP_MODE_EXEC 0x8000 | ||
| 163 | |||
| 164 | #define CEPH_OSD_OP_TYPE 0x0f00 | ||
| 165 | #define CEPH_OSD_OP_TYPE_LOCK 0x0100 | ||
| 166 | #define CEPH_OSD_OP_TYPE_DATA 0x0200 | ||
| 167 | #define CEPH_OSD_OP_TYPE_ATTR 0x0300 | ||
| 168 | #define CEPH_OSD_OP_TYPE_EXEC 0x0400 | ||
| 169 | #define CEPH_OSD_OP_TYPE_PG 0x0500 | ||
| 170 | |||
| 171 | enum { | ||
| 172 | /** data **/ | ||
| 173 | /* read */ | ||
| 174 | CEPH_OSD_OP_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1, | ||
| 175 | CEPH_OSD_OP_STAT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2, | ||
| 176 | |||
| 177 | /* fancy read */ | ||
| 178 | CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4, | ||
| 179 | |||
| 180 | /* write */ | ||
| 181 | CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1, | ||
| 182 | CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2, | ||
| 183 | CEPH_OSD_OP_TRUNCATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3, | ||
| 184 | CEPH_OSD_OP_ZERO = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4, | ||
| 185 | CEPH_OSD_OP_DELETE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5, | ||
| 186 | |||
| 187 | /* fancy write */ | ||
| 188 | CEPH_OSD_OP_APPEND = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6, | ||
| 189 | CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7, | ||
| 190 | CEPH_OSD_OP_SETTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8, | ||
| 191 | CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9, | ||
| 192 | |||
| 193 | CEPH_OSD_OP_TMAPUP = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10, | ||
| 194 | CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11, | ||
| 195 | CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, | ||
| 196 | |||
| 197 | CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, | ||
| 198 | |||
| 199 | /** attrs **/ | ||
| 200 | /* read */ | ||
| 201 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, | ||
| 202 | CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, | ||
| 203 | |||
| 204 | /* write */ | ||
| 205 | CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, | ||
| 206 | CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2, | ||
| 207 | CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3, | ||
| 208 | CEPH_OSD_OP_RMXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4, | ||
| 209 | |||
| 210 | /** subop **/ | ||
| 211 | CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1, | ||
| 212 | CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2, | ||
| 213 | CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3, | ||
| 214 | CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4, | ||
| 215 | CEPH_OSD_OP_SCRUB = CEPH_OSD_OP_MODE_SUB | 5, | ||
| 216 | |||
| 217 | /** lock **/ | ||
| 218 | CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, | ||
| 219 | CEPH_OSD_OP_WRUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2, | ||
| 220 | CEPH_OSD_OP_RDLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3, | ||
| 221 | CEPH_OSD_OP_RDUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4, | ||
| 222 | CEPH_OSD_OP_UPLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5, | ||
| 223 | CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, | ||
| 224 | |||
| 225 | /** exec **/ | ||
| 226 | CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, | ||
| 227 | |||
| 228 | /** pg **/ | ||
| 229 | CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, | ||
| 230 | }; | ||
| 231 | |||
| 232 | static inline int ceph_osd_op_type_lock(int op) | ||
| 233 | { | ||
| 234 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK; | ||
| 235 | } | ||
| 236 | static inline int ceph_osd_op_type_data(int op) | ||
| 237 | { | ||
| 238 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA; | ||
| 239 | } | ||
| 240 | static inline int ceph_osd_op_type_attr(int op) | ||
| 241 | { | ||
| 242 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR; | ||
| 243 | } | ||
| 244 | static inline int ceph_osd_op_type_exec(int op) | ||
| 245 | { | ||
| 246 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC; | ||
| 247 | } | ||
| 248 | static inline int ceph_osd_op_type_pg(int op) | ||
| 249 | { | ||
| 250 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; | ||
| 251 | } | ||
| 252 | |||
| 253 | static inline int ceph_osd_op_mode_subop(int op) | ||
| 254 | { | ||
| 255 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB; | ||
| 256 | } | ||
| 257 | static inline int ceph_osd_op_mode_read(int op) | ||
| 258 | { | ||
| 259 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; | ||
| 260 | } | ||
| 261 | static inline int ceph_osd_op_mode_modify(int op) | ||
| 262 | { | ||
| 263 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; | ||
| 264 | } | ||
| 265 | |||
| 266 | #define CEPH_OSD_TMAP_HDR 'h' | ||
| 267 | #define CEPH_OSD_TMAP_SET 's' | ||
| 268 | #define CEPH_OSD_TMAP_RM 'r' | ||
| 269 | |||
| 270 | extern const char *ceph_osd_op_name(int op); | ||
| 271 | |||
| 272 | |||
| 273 | /* | ||
| 274 | * osd op flags | ||
| 275 | * | ||
| 276 | * An op may be READ, WRITE, or READ|WRITE. | ||
| 277 | */ | ||
| 278 | enum { | ||
| 279 | CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ | ||
| 280 | CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ | ||
| 281 | CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ | ||
| 282 | CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ | ||
| 283 | CEPH_OSD_FLAG_READ = 16, /* op may read */ | ||
| 284 | CEPH_OSD_FLAG_WRITE = 32, /* op may write */ | ||
| 285 | CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ | ||
| 286 | CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ | ||
| 287 | CEPH_OSD_FLAG_BALANCE_READS = 256, | ||
| 288 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ | ||
| 289 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ | ||
| 290 | }; | ||
| 291 | |||
| 292 | enum { | ||
| 293 | CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ | ||
| 294 | }; | ||
| 295 | |||
| 296 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ | ||
| 297 | #define EBLACKLISTED ESHUTDOWN /* blacklisted */ | ||
| 298 | |||
| 299 | /* | ||
| 300 | * an individual object operation. each may be accompanied by some data | ||
| 301 | * payload | ||
| 302 | */ | ||
| 303 | struct ceph_osd_op { | ||
| 304 | __le16 op; /* CEPH_OSD_OP_* */ | ||
| 305 | __le32 flags; /* CEPH_OSD_FLAG_* */ | ||
| 306 | union { | ||
| 307 | struct { | ||
| 308 | __le64 offset, length; | ||
| 309 | } __attribute__ ((packed)) extent; | ||
| 310 | struct { | ||
| 311 | __le32 name_len; | ||
| 312 | __le32 value_len; | ||
| 313 | } __attribute__ ((packed)) xattr; | ||
| 314 | struct { | ||
| 315 | __le64 truncate_size; | ||
| 316 | __le32 truncate_seq; | ||
| 317 | } __attribute__ ((packed)) trunc; | ||
| 318 | struct { | ||
| 319 | __u8 class_len; | ||
| 320 | __u8 method_len; | ||
| 321 | __u8 argc; | ||
| 322 | __le32 indata_len; | ||
| 323 | } __attribute__ ((packed)) cls; | ||
| 324 | struct { | ||
| 325 | __le64 cookie, count; | ||
| 326 | } __attribute__ ((packed)) pgls; | ||
| 327 | }; | ||
| 328 | __le32 payload_len; | ||
| 329 | } __attribute__ ((packed)); | ||
| 330 | |||
| 331 | /* | ||
| 332 | * osd request message header. each request may include multiple | ||
| 333 | * ceph_osd_op object operations. | ||
| 334 | */ | ||
| 335 | struct ceph_osd_request_head { | ||
| 336 | __le64 tid; /* transaction id */ | ||
| 337 | __le32 client_inc; /* client incarnation */ | ||
| 338 | struct ceph_object_layout layout; /* pgid */ | ||
| 339 | __le32 osdmap_epoch; /* client's osdmap epoch */ | ||
| 340 | |||
| 341 | __le32 flags; | ||
| 342 | |||
| 343 | struct ceph_timespec mtime; /* for mutations only */ | ||
| 344 | struct ceph_eversion reassert_version; /* if we are replaying op */ | ||
| 345 | |||
| 346 | __le32 object_len; /* length of object name */ | ||
| 347 | |||
| 348 | __le64 snapid; /* snapid to read */ | ||
| 349 | __le64 snap_seq; /* writer's snap context */ | ||
| 350 | __le32 num_snaps; | ||
| 351 | |||
| 352 | __le16 num_ops; | ||
| 353 | struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */ | ||
| 354 | } __attribute__ ((packed)); | ||
| 355 | |||
| 356 | struct ceph_osd_reply_head { | ||
| 357 | __le64 tid; /* transaction id */ | ||
| 358 | __le32 client_inc; /* client incarnation */ | ||
| 359 | __le32 flags; | ||
| 360 | struct ceph_object_layout layout; | ||
| 361 | __le32 osdmap_epoch; | ||
| 362 | struct ceph_eversion reassert_version; /* for replaying uncommitted */ | ||
| 363 | |||
| 364 | __le32 result; /* result code */ | ||
| 365 | |||
| 366 | __le32 object_len; /* length of object name */ | ||
| 367 | __le32 num_ops; | ||
| 368 | struct ceph_osd_op ops[0]; /* ops[], object */ | ||
| 369 | } __attribute__ ((packed)); | ||
| 370 | |||
| 371 | |||
| 372 | #endif | ||
