diff options
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/ceph/ceph_features.h | 12 | ||||
| -rw-r--r-- | include/linux/ceph/ceph_fs.h | 5 | ||||
| -rw-r--r-- | include/linux/ceph/osd_client.h | 11 | ||||
| -rw-r--r-- | include/linux/ceph/osdmap.h | 50 | ||||
| -rw-r--r-- | include/linux/ceph/rados.h | 18 | ||||
| -rw-r--r-- | include/linux/crush/crush.h | 7 |
6 files changed, 91 insertions, 12 deletions
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 138448f766b4..d12659ce550d 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h | |||
| @@ -43,6 +43,13 @@ | |||
| 43 | #define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */ | 43 | #define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */ |
| 44 | #define CEPH_FEATURE_EXPORT_PEER (1ULL<<37) | 44 | #define CEPH_FEATURE_EXPORT_PEER (1ULL<<37) |
| 45 | #define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38) | 45 | #define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38) |
| 46 | #define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */ | ||
| 47 | /* The process supports new-style OSDMap encoding. Monitors also use | ||
| 48 | this bit to determine if peers support NAK messages. */ | ||
| 49 | #define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39) | ||
| 50 | #define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40) | ||
| 51 | #define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41) | ||
| 52 | #define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */ | ||
| 46 | 53 | ||
| 47 | /* | 54 | /* |
| 48 | * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature | 55 | * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature |
| @@ -82,7 +89,10 @@ static inline u64 ceph_sanitize_features(u64 features) | |||
| 82 | CEPH_FEATURE_OSDHASHPSPOOL | \ | 89 | CEPH_FEATURE_OSDHASHPSPOOL | \ |
| 83 | CEPH_FEATURE_OSD_CACHEPOOL | \ | 90 | CEPH_FEATURE_OSD_CACHEPOOL | \ |
| 84 | CEPH_FEATURE_CRUSH_V2 | \ | 91 | CEPH_FEATURE_CRUSH_V2 | \ |
| 85 | CEPH_FEATURE_EXPORT_PEER) | 92 | CEPH_FEATURE_EXPORT_PEER | \ |
| 93 | CEPH_FEATURE_OSDMAP_ENC | \ | ||
| 94 | CEPH_FEATURE_CRUSH_TUNABLES3 | \ | ||
| 95 | CEPH_FEATURE_OSD_PRIMARY_AFFINITY) | ||
| 86 | 96 | ||
| 87 | #define CEPH_FEATURES_REQUIRED_DEFAULT \ | 97 | #define CEPH_FEATURES_REQUIRED_DEFAULT \ |
| 88 | (CEPH_FEATURE_NOSRCADDR | \ | 98 | (CEPH_FEATURE_NOSRCADDR | \ |
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 25bfb0eff772..5f6db18d72e8 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h | |||
| @@ -332,6 +332,7 @@ enum { | |||
| 332 | CEPH_MDS_OP_LOOKUPHASH = 0x00102, | 332 | CEPH_MDS_OP_LOOKUPHASH = 0x00102, |
| 333 | CEPH_MDS_OP_LOOKUPPARENT = 0x00103, | 333 | CEPH_MDS_OP_LOOKUPPARENT = 0x00103, |
| 334 | CEPH_MDS_OP_LOOKUPINO = 0x00104, | 334 | CEPH_MDS_OP_LOOKUPINO = 0x00104, |
| 335 | CEPH_MDS_OP_LOOKUPNAME = 0x00105, | ||
| 335 | 336 | ||
| 336 | CEPH_MDS_OP_SETXATTR = 0x01105, | 337 | CEPH_MDS_OP_SETXATTR = 0x01105, |
| 337 | CEPH_MDS_OP_RMXATTR = 0x01106, | 338 | CEPH_MDS_OP_RMXATTR = 0x01106, |
| @@ -420,8 +421,8 @@ union ceph_mds_request_args { | |||
| 420 | struct { | 421 | struct { |
| 421 | __u8 rule; /* currently fcntl or flock */ | 422 | __u8 rule; /* currently fcntl or flock */ |
| 422 | __u8 type; /* shared, exclusive, remove*/ | 423 | __u8 type; /* shared, exclusive, remove*/ |
| 424 | __le64 owner; /* owner of the lock */ | ||
| 423 | __le64 pid; /* process id requesting the lock */ | 425 | __le64 pid; /* process id requesting the lock */ |
| 424 | __le64 pid_namespace; | ||
| 425 | __le64 start; /* initial location to lock */ | 426 | __le64 start; /* initial location to lock */ |
| 426 | __le64 length; /* num bytes to lock from start */ | 427 | __le64 length; /* num bytes to lock from start */ |
| 427 | __u8 wait; /* will caller wait for lock to become available? */ | 428 | __u8 wait; /* will caller wait for lock to become available? */ |
| @@ -532,8 +533,8 @@ struct ceph_filelock { | |||
| 532 | __le64 start;/* file offset to start lock at */ | 533 | __le64 start;/* file offset to start lock at */ |
| 533 | __le64 length; /* num bytes to lock; 0 for all following start */ | 534 | __le64 length; /* num bytes to lock; 0 for all following start */ |
| 534 | __le64 client; /* which client holds the lock */ | 535 | __le64 client; /* which client holds the lock */ |
| 536 | __le64 owner; /* owner the lock */ | ||
| 535 | __le64 pid; /* process id holding the lock on the client */ | 537 | __le64 pid; /* process id holding the lock on the client */ |
| 536 | __le64 pid_namespace; | ||
| 537 | __u8 type; /* shared lock, exclusive lock, or unlock */ | 538 | __u8 type; /* shared lock, exclusive lock, or unlock */ |
| 538 | } __attribute__ ((packed)); | 539 | } __attribute__ ((packed)); |
| 539 | 540 | ||
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fd47e872ebcc..94ec69672164 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h | |||
| @@ -43,7 +43,7 @@ struct ceph_osd { | |||
| 43 | }; | 43 | }; |
| 44 | 44 | ||
| 45 | 45 | ||
| 46 | #define CEPH_OSD_MAX_OP 2 | 46 | #define CEPH_OSD_MAX_OP 3 |
| 47 | 47 | ||
| 48 | enum ceph_osd_data_type { | 48 | enum ceph_osd_data_type { |
| 49 | CEPH_OSD_DATA_TYPE_NONE = 0, | 49 | CEPH_OSD_DATA_TYPE_NONE = 0, |
| @@ -76,6 +76,7 @@ struct ceph_osd_data { | |||
| 76 | 76 | ||
| 77 | struct ceph_osd_req_op { | 77 | struct ceph_osd_req_op { |
| 78 | u16 op; /* CEPH_OSD_OP_* */ | 78 | u16 op; /* CEPH_OSD_OP_* */ |
| 79 | u32 flags; /* CEPH_OSD_OP_FLAG_* */ | ||
| 79 | u32 payload_len; | 80 | u32 payload_len; |
| 80 | union { | 81 | union { |
| 81 | struct ceph_osd_data raw_data_in; | 82 | struct ceph_osd_data raw_data_in; |
| @@ -102,6 +103,10 @@ struct ceph_osd_req_op { | |||
| 102 | u32 timeout; | 103 | u32 timeout; |
| 103 | __u8 flag; | 104 | __u8 flag; |
| 104 | } watch; | 105 | } watch; |
| 106 | struct { | ||
| 107 | u64 expected_object_size; | ||
| 108 | u64 expected_write_size; | ||
| 109 | } alloc_hint; | ||
| 105 | }; | 110 | }; |
| 106 | }; | 111 | }; |
| 107 | 112 | ||
| @@ -293,6 +298,10 @@ extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req, | |||
| 293 | extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req, | 298 | extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req, |
| 294 | unsigned int which, u16 opcode, | 299 | unsigned int which, u16 opcode, |
| 295 | u64 cookie, u64 version, int flag); | 300 | u64 cookie, u64 version, int flag); |
| 301 | extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, | ||
| 302 | unsigned int which, | ||
| 303 | u64 expected_object_size, | ||
| 304 | u64 expected_write_size); | ||
| 296 | 305 | ||
| 297 | extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | 306 | extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, |
| 298 | struct ceph_snap_context *snapc, | 307 | struct ceph_snap_context *snapc, |
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 49ff69f0746b..561ea896c657 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h | |||
| @@ -41,6 +41,18 @@ struct ceph_pg_pool_info { | |||
| 41 | char *name; | 41 | char *name; |
| 42 | }; | 42 | }; |
| 43 | 43 | ||
| 44 | static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) | ||
| 45 | { | ||
| 46 | switch (pool->type) { | ||
| 47 | case CEPH_POOL_TYPE_REP: | ||
| 48 | return true; | ||
| 49 | case CEPH_POOL_TYPE_EC: | ||
| 50 | return false; | ||
| 51 | default: | ||
| 52 | BUG_ON(1); | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 44 | struct ceph_object_locator { | 56 | struct ceph_object_locator { |
| 45 | s64 pool; | 57 | s64 pool; |
| 46 | }; | 58 | }; |
| @@ -60,8 +72,16 @@ struct ceph_object_id { | |||
| 60 | struct ceph_pg_mapping { | 72 | struct ceph_pg_mapping { |
| 61 | struct rb_node node; | 73 | struct rb_node node; |
| 62 | struct ceph_pg pgid; | 74 | struct ceph_pg pgid; |
| 63 | int len; | 75 | |
| 64 | int osds[]; | 76 | union { |
| 77 | struct { | ||
| 78 | int len; | ||
| 79 | int osds[]; | ||
| 80 | } pg_temp; | ||
| 81 | struct { | ||
| 82 | int osd; | ||
| 83 | } primary_temp; | ||
| 84 | }; | ||
| 65 | }; | 85 | }; |
| 66 | 86 | ||
| 67 | struct ceph_osdmap { | 87 | struct ceph_osdmap { |
| @@ -78,12 +98,19 @@ struct ceph_osdmap { | |||
| 78 | struct ceph_entity_addr *osd_addr; | 98 | struct ceph_entity_addr *osd_addr; |
| 79 | 99 | ||
| 80 | struct rb_root pg_temp; | 100 | struct rb_root pg_temp; |
| 101 | struct rb_root primary_temp; | ||
| 102 | |||
| 103 | u32 *osd_primary_affinity; | ||
| 104 | |||
| 81 | struct rb_root pg_pools; | 105 | struct rb_root pg_pools; |
| 82 | u32 pool_max; | 106 | u32 pool_max; |
| 83 | 107 | ||
| 84 | /* the CRUSH map specifies the mapping of placement groups to | 108 | /* the CRUSH map specifies the mapping of placement groups to |
| 85 | * the list of osds that store+replicate them. */ | 109 | * the list of osds that store+replicate them. */ |
| 86 | struct crush_map *crush; | 110 | struct crush_map *crush; |
| 111 | |||
| 112 | struct mutex crush_scratch_mutex; | ||
| 113 | int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; | ||
| 87 | }; | 114 | }; |
| 88 | 115 | ||
| 89 | static inline void ceph_oid_set_name(struct ceph_object_id *oid, | 116 | static inline void ceph_oid_set_name(struct ceph_object_id *oid, |
| @@ -110,9 +137,21 @@ static inline void ceph_oid_copy(struct ceph_object_id *dest, | |||
| 110 | dest->name_len = src->name_len; | 137 | dest->name_len = src->name_len; |
| 111 | } | 138 | } |
| 112 | 139 | ||
| 140 | static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd) | ||
| 141 | { | ||
| 142 | return osd >= 0 && osd < map->max_osd && | ||
| 143 | (map->osd_state[osd] & CEPH_OSD_EXISTS); | ||
| 144 | } | ||
| 145 | |||
| 113 | static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) | 146 | static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) |
| 114 | { | 147 | { |
| 115 | return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP); | 148 | return ceph_osd_exists(map, osd) && |
| 149 | (map->osd_state[osd] & CEPH_OSD_UP); | ||
| 150 | } | ||
| 151 | |||
| 152 | static inline int ceph_osd_is_down(struct ceph_osdmap *map, int osd) | ||
| 153 | { | ||
| 154 | return !ceph_osd_is_up(map, osd); | ||
| 116 | } | 155 | } |
| 117 | 156 | ||
| 118 | static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) | 157 | static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) |
| @@ -121,6 +160,7 @@ static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) | |||
| 121 | } | 160 | } |
| 122 | 161 | ||
| 123 | extern char *ceph_osdmap_state_str(char *str, int len, int state); | 162 | extern char *ceph_osdmap_state_str(char *str, int len, int state); |
| 163 | extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd); | ||
| 124 | 164 | ||
| 125 | static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, | 165 | static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, |
| 126 | int osd) | 166 | int osd) |
| @@ -153,7 +193,7 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) | |||
| 153 | return 0; | 193 | return 0; |
| 154 | } | 194 | } |
| 155 | 195 | ||
| 156 | extern struct ceph_osdmap *osdmap_decode(void **p, void *end); | 196 | extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); |
| 157 | extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | 197 | extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, |
| 158 | struct ceph_osdmap *map, | 198 | struct ceph_osdmap *map, |
| 159 | struct ceph_messenger *msgr); | 199 | struct ceph_messenger *msgr); |
| @@ -172,7 +212,7 @@ extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, | |||
| 172 | 212 | ||
| 173 | extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, | 213 | extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, |
| 174 | struct ceph_pg pgid, | 214 | struct ceph_pg pgid, |
| 175 | int *acting); | 215 | int *osds, int *primary); |
| 176 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, | 216 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, |
| 177 | struct ceph_pg pgid); | 217 | struct ceph_pg pgid); |
| 178 | 218 | ||
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 96292df4041b..f20e0d8a2155 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h | |||
| @@ -81,8 +81,9 @@ struct ceph_pg_v1 { | |||
| 81 | */ | 81 | */ |
| 82 | #define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */ | 82 | #define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */ |
| 83 | 83 | ||
| 84 | #define CEPH_PG_TYPE_REP 1 | 84 | #define CEPH_POOL_TYPE_REP 1 |
| 85 | #define CEPH_PG_TYPE_RAID4 2 | 85 | #define CEPH_POOL_TYPE_RAID4 2 /* never implemented */ |
| 86 | #define CEPH_POOL_TYPE_EC 3 | ||
| 86 | 87 | ||
| 87 | /* | 88 | /* |
| 88 | * stable_mod func is used to control number of placement groups. | 89 | * stable_mod func is used to control number of placement groups. |
| @@ -133,6 +134,10 @@ extern const char *ceph_osd_state_name(int s); | |||
| 133 | #define CEPH_OSD_IN 0x10000 | 134 | #define CEPH_OSD_IN 0x10000 |
| 134 | #define CEPH_OSD_OUT 0 | 135 | #define CEPH_OSD_OUT 0 |
| 135 | 136 | ||
| 137 | /* osd primary-affinity. fixed point value: 0x10000 == baseline */ | ||
| 138 | #define CEPH_OSD_MAX_PRIMARY_AFFINITY 0x10000 | ||
| 139 | #define CEPH_OSD_DEFAULT_PRIMARY_AFFINITY 0x10000 | ||
| 140 | |||
| 136 | 141 | ||
| 137 | /* | 142 | /* |
| 138 | * osd map flag bits | 143 | * osd map flag bits |
| @@ -227,6 +232,9 @@ enum { | |||
| 227 | CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, | 232 | CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, |
| 228 | CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, | 233 | CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, |
| 229 | 234 | ||
| 235 | /* hints */ | ||
| 236 | CEPH_OSD_OP_SETALLOCHINT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 35, | ||
| 237 | |||
| 230 | /** multi **/ | 238 | /** multi **/ |
| 231 | CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, | 239 | CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, |
| 232 | CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, | 240 | CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, |
| @@ -382,7 +390,7 @@ enum { | |||
| 382 | */ | 390 | */ |
| 383 | struct ceph_osd_op { | 391 | struct ceph_osd_op { |
| 384 | __le16 op; /* CEPH_OSD_OP_* */ | 392 | __le16 op; /* CEPH_OSD_OP_* */ |
| 385 | __le32 flags; /* CEPH_OSD_FLAG_* */ | 393 | __le32 flags; /* CEPH_OSD_OP_FLAG_* */ |
| 386 | union { | 394 | union { |
| 387 | struct { | 395 | struct { |
| 388 | __le64 offset, length; | 396 | __le64 offset, length; |
| @@ -416,6 +424,10 @@ struct ceph_osd_op { | |||
| 416 | __le64 offset, length; | 424 | __le64 offset, length; |
| 417 | __le64 src_offset; | 425 | __le64 src_offset; |
| 418 | } __attribute__ ((packed)) clonerange; | 426 | } __attribute__ ((packed)) clonerange; |
| 427 | struct { | ||
| 428 | __le64 expected_object_size; | ||
| 429 | __le64 expected_write_size; | ||
| 430 | } __attribute__ ((packed)) alloc_hint; | ||
| 419 | }; | 431 | }; |
| 420 | __le32 payload_len; | 432 | __le32 payload_len; |
| 421 | } __attribute__ ((packed)); | 433 | } __attribute__ ((packed)); |
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index acaa5615d634..4fad5f8ee01d 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h | |||
| @@ -51,6 +51,7 @@ enum { | |||
| 51 | CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */ | 51 | CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */ |
| 52 | CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10, | 52 | CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10, |
| 53 | CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11, | 53 | CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11, |
| 54 | CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12 | ||
| 54 | }; | 55 | }; |
| 55 | 56 | ||
| 56 | /* | 57 | /* |
| @@ -173,6 +174,12 @@ struct crush_map { | |||
| 173 | * apply to a collision: in that case we will retry as we used | 174 | * apply to a collision: in that case we will retry as we used |
| 174 | * to. */ | 175 | * to. */ |
| 175 | __u32 chooseleaf_descend_once; | 176 | __u32 chooseleaf_descend_once; |
| 177 | |||
| 178 | /* if non-zero, feed r into chooseleaf, bit-shifted right by (r-1) | ||
| 179 | * bits. a value of 1 is best for new clusters. for legacy clusters | ||
| 180 | * that want to limit reshuffling, a value of 3 or 4 will make the | ||
| 181 | * mappings line up a bit better with previous mappings. */ | ||
| 182 | __u8 chooseleaf_vary_r; | ||
| 176 | }; | 183 | }; |
| 177 | 184 | ||
| 178 | 185 | ||
