diff options
| -rw-r--r-- | fs/ceph/mds_client.c | 19 | ||||
| -rw-r--r-- | fs/ceph/mdsmap.c | 163 | ||||
| -rw-r--r-- | fs/ceph/super.c | 10 | ||||
| -rw-r--r-- | fs/ceph/super.h | 1 | ||||
| -rw-r--r-- | include/linux/ceph/mdsmap.h | 5 |
5 files changed, 187 insertions, 11 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index bf4d3d26850c..4f49253387a0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -2100,17 +2100,26 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
| 2100 | err = -EIO; | 2100 | err = -EIO; |
| 2101 | goto finish; | 2101 | goto finish; |
| 2102 | } | 2102 | } |
| 2103 | if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) { | ||
| 2104 | if (mdsc->mdsmap_err) { | ||
| 2105 | err = mdsc->mdsmap_err; | ||
| 2106 | dout("do_request mdsmap err %d\n", err); | ||
| 2107 | goto finish; | ||
| 2108 | } | ||
| 2109 | if (!(mdsc->fsc->mount_options->flags & | ||
| 2110 | CEPH_MOUNT_OPT_MOUNTWAIT) && | ||
| 2111 | !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { | ||
| 2112 | err = -ENOENT; | ||
| 2113 | pr_info("probably no mds server is up\n"); | ||
| 2114 | goto finish; | ||
| 2115 | } | ||
| 2116 | } | ||
| 2103 | 2117 | ||
| 2104 | put_request_session(req); | 2118 | put_request_session(req); |
| 2105 | 2119 | ||
| 2106 | mds = __choose_mds(mdsc, req); | 2120 | mds = __choose_mds(mdsc, req); |
| 2107 | if (mds < 0 || | 2121 | if (mds < 0 || |
| 2108 | ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { | 2122 | ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { |
| 2109 | if (mdsc->mdsmap_err) { | ||
| 2110 | err = mdsc->mdsmap_err; | ||
| 2111 | dout("do_request mdsmap err %d\n", err); | ||
| 2112 | goto finish; | ||
| 2113 | } | ||
| 2114 | dout("do_request no mds or not active, waiting for map\n"); | 2123 | dout("do_request no mds or not active, waiting for map\n"); |
| 2115 | list_add(&req->r_wait, &mdsc->waiting_for_map); | 2124 | list_add(&req->r_wait, &mdsc->waiting_for_map); |
| 2116 | goto out; | 2125 | goto out; |
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 8c3591a7fbae..5454e2327a5f 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
| @@ -42,6 +42,60 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) | |||
| 42 | return i; | 42 | return i; |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | #define __decode_and_drop_type(p, end, type, bad) \ | ||
| 46 | do { \ | ||
| 47 | if (*p + sizeof(type) > end) \ | ||
| 48 | goto bad; \ | ||
| 49 | *p += sizeof(type); \ | ||
| 50 | } while (0) | ||
| 51 | |||
| 52 | #define __decode_and_drop_set(p, end, type, bad) \ | ||
| 53 | do { \ | ||
| 54 | u32 n; \ | ||
| 55 | size_t need; \ | ||
| 56 | ceph_decode_32_safe(p, end, n, bad); \ | ||
| 57 | need = sizeof(type) * n; \ | ||
| 58 | ceph_decode_need(p, end, need, bad); \ | ||
| 59 | *p += need; \ | ||
| 60 | } while (0) | ||
| 61 | |||
| 62 | #define __decode_and_drop_map(p, end, ktype, vtype, bad) \ | ||
| 63 | do { \ | ||
| 64 | u32 n; \ | ||
| 65 | size_t need; \ | ||
| 66 | ceph_decode_32_safe(p, end, n, bad); \ | ||
| 67 | need = (sizeof(ktype) + sizeof(vtype)) * n; \ | ||
| 68 | ceph_decode_need(p, end, need, bad); \ | ||
| 69 | *p += need; \ | ||
| 70 | } while (0) | ||
| 71 | |||
| 72 | |||
| 73 | static int __decode_and_drop_compat_set(void **p, void* end) | ||
| 74 | { | ||
| 75 | int i; | ||
| 76 | /* compat, ro_compat, incompat*/ | ||
| 77 | for (i = 0; i < 3; i++) { | ||
| 78 | u32 n; | ||
| 79 | ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); | ||
| 80 | /* mask */ | ||
| 81 | *p += sizeof(u64); | ||
| 82 | /* names (map<u64, string>) */ | ||
| 83 | n = ceph_decode_32(p); | ||
| 84 | while (n-- > 0) { | ||
| 85 | u32 len; | ||
| 86 | ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), | ||
| 87 | bad); | ||
| 88 | *p += sizeof(u64); | ||
| 89 | len = ceph_decode_32(p); | ||
| 90 | ceph_decode_need(p, end, len, bad); | ||
| 91 | *p += len; | ||
| 92 | } | ||
| 93 | } | ||
| 94 | return 0; | ||
| 95 | bad: | ||
| 96 | return -1; | ||
| 97 | } | ||
| 98 | |||
| 45 | /* | 99 | /* |
| 46 | * Decode an MDS map | 100 | * Decode an MDS map |
| 47 | * | 101 | * |
| @@ -55,6 +109,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
| 55 | int i, j, n; | 109 | int i, j, n; |
| 56 | int err = -EINVAL; | 110 | int err = -EINVAL; |
| 57 | u8 mdsmap_v, mdsmap_cv; | 111 | u8 mdsmap_v, mdsmap_cv; |
| 112 | u16 mdsmap_ev; | ||
| 58 | 113 | ||
| 59 | m = kzalloc(sizeof(*m), GFP_NOFS); | 114 | m = kzalloc(sizeof(*m), GFP_NOFS); |
| 60 | if (m == NULL) | 115 | if (m == NULL) |
| @@ -83,7 +138,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
| 83 | 138 | ||
| 84 | m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS); | 139 | m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS); |
| 85 | if (m->m_info == NULL) | 140 | if (m->m_info == NULL) |
| 86 | goto badmem; | 141 | goto nomem; |
| 87 | 142 | ||
| 88 | /* pick out active nodes from mds_info (state > 0) */ | 143 | /* pick out active nodes from mds_info (state > 0) */ |
| 89 | n = ceph_decode_32(p); | 144 | n = ceph_decode_32(p); |
| @@ -166,7 +221,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
| 166 | info->export_targets = kcalloc(num_export_targets, | 221 | info->export_targets = kcalloc(num_export_targets, |
| 167 | sizeof(u32), GFP_NOFS); | 222 | sizeof(u32), GFP_NOFS); |
| 168 | if (info->export_targets == NULL) | 223 | if (info->export_targets == NULL) |
| 169 | goto badmem; | 224 | goto nomem; |
| 170 | for (j = 0; j < num_export_targets; j++) | 225 | for (j = 0; j < num_export_targets; j++) |
| 171 | info->export_targets[j] = | 226 | info->export_targets[j] = |
| 172 | ceph_decode_32(&pexport_targets); | 227 | ceph_decode_32(&pexport_targets); |
| @@ -180,24 +235,104 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
| 180 | m->m_num_data_pg_pools = n; | 235 | m->m_num_data_pg_pools = n; |
| 181 | m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); | 236 | m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); |
| 182 | if (!m->m_data_pg_pools) | 237 | if (!m->m_data_pg_pools) |
| 183 | goto badmem; | 238 | goto nomem; |
| 184 | ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); | 239 | ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); |
| 185 | for (i = 0; i < n; i++) | 240 | for (i = 0; i < n; i++) |
| 186 | m->m_data_pg_pools[i] = ceph_decode_64(p); | 241 | m->m_data_pg_pools[i] = ceph_decode_64(p); |
| 187 | m->m_cas_pg_pool = ceph_decode_64(p); | 242 | m->m_cas_pg_pool = ceph_decode_64(p); |
| 243 | m->m_enabled = m->m_epoch > 1; | ||
| 244 | |||
| 245 | mdsmap_ev = 1; | ||
| 246 | if (mdsmap_v >= 2) { | ||
| 247 | ceph_decode_16_safe(p, end, mdsmap_ev, bad_ext); | ||
| 248 | } | ||
| 249 | if (mdsmap_ev >= 3) { | ||
| 250 | if (__decode_and_drop_compat_set(p, end) < 0) | ||
| 251 | goto bad_ext; | ||
| 252 | } | ||
| 253 | /* metadata_pool */ | ||
| 254 | if (mdsmap_ev < 5) { | ||
| 255 | __decode_and_drop_type(p, end, u32, bad_ext); | ||
| 256 | } else { | ||
| 257 | __decode_and_drop_type(p, end, u64, bad_ext); | ||
| 258 | } | ||
| 188 | 259 | ||
| 189 | /* ok, we don't care about the rest. */ | 260 | /* created + modified + tableserver */ |
| 261 | __decode_and_drop_type(p, end, struct ceph_timespec, bad_ext); | ||
| 262 | __decode_and_drop_type(p, end, struct ceph_timespec, bad_ext); | ||
| 263 | __decode_and_drop_type(p, end, u32, bad_ext); | ||
| 264 | |||
| 265 | /* in */ | ||
| 266 | { | ||
| 267 | int num_laggy = 0; | ||
| 268 | ceph_decode_32_safe(p, end, n, bad_ext); | ||
| 269 | ceph_decode_need(p, end, sizeof(u32) * n, bad_ext); | ||
| 270 | |||
| 271 | for (i = 0; i < n; i++) { | ||
| 272 | s32 mds = ceph_decode_32(p); | ||
| 273 | if (mds >= 0 && mds < m->m_max_mds) { | ||
| 274 | if (m->m_info[mds].laggy) | ||
| 275 | num_laggy++; | ||
| 276 | } | ||
| 277 | } | ||
| 278 | m->m_num_laggy = num_laggy; | ||
| 279 | } | ||
| 280 | |||
| 281 | /* inc */ | ||
| 282 | __decode_and_drop_map(p, end, u32, u32, bad_ext); | ||
| 283 | /* up */ | ||
| 284 | __decode_and_drop_map(p, end, u32, u64, bad_ext); | ||
| 285 | /* failed */ | ||
| 286 | __decode_and_drop_set(p, end, u32, bad_ext); | ||
| 287 | /* stopped */ | ||
| 288 | __decode_and_drop_set(p, end, u32, bad_ext); | ||
| 289 | |||
| 290 | if (mdsmap_ev >= 4) { | ||
| 291 | /* last_failure_osd_epoch */ | ||
| 292 | __decode_and_drop_type(p, end, u32, bad_ext); | ||
| 293 | } | ||
| 294 | if (mdsmap_ev >= 6) { | ||
| 295 | /* ever_allowed_snaps */ | ||
| 296 | __decode_and_drop_type(p, end, u8, bad_ext); | ||
| 297 | /* explicitly_allowed_snaps */ | ||
| 298 | __decode_and_drop_type(p, end, u8, bad_ext); | ||
| 299 | } | ||
| 300 | if (mdsmap_ev >= 7) { | ||
| 301 | /* inline_data_enabled */ | ||
| 302 | __decode_and_drop_type(p, end, u8, bad_ext); | ||
| 303 | } | ||
| 304 | if (mdsmap_ev >= 8) { | ||
| 305 | u32 name_len; | ||
| 306 | /* enabled */ | ||
| 307 | ceph_decode_8_safe(p, end, m->m_enabled, bad_ext); | ||
| 308 | ceph_decode_32_safe(p, end, name_len, bad_ext); | ||
| 309 | ceph_decode_need(p, end, name_len, bad_ext); | ||
| 310 | *p += name_len; | ||
| 311 | } | ||
| 312 | /* damaged */ | ||
| 313 | if (mdsmap_ev >= 9) { | ||
| 314 | size_t need; | ||
| 315 | ceph_decode_32_safe(p, end, n, bad_ext); | ||
| 316 | need = sizeof(u32) * n; | ||
| 317 | ceph_decode_need(p, end, need, bad_ext); | ||
| 318 | *p += need; | ||
| 319 | m->m_damaged = n > 0; | ||
| 320 | } else { | ||
| 321 | m->m_damaged = false; | ||
| 322 | } | ||
| 323 | bad_ext: | ||
| 190 | *p = end; | 324 | *p = end; |
| 191 | dout("mdsmap_decode success epoch %u\n", m->m_epoch); | 325 | dout("mdsmap_decode success epoch %u\n", m->m_epoch); |
| 192 | return m; | 326 | return m; |
| 193 | 327 | nomem: | |
| 194 | badmem: | ||
| 195 | err = -ENOMEM; | 328 | err = -ENOMEM; |
| 329 | goto out_err; | ||
| 196 | bad: | 330 | bad: |
| 197 | pr_err("corrupt mdsmap\n"); | 331 | pr_err("corrupt mdsmap\n"); |
| 198 | print_hex_dump(KERN_DEBUG, "mdsmap: ", | 332 | print_hex_dump(KERN_DEBUG, "mdsmap: ", |
| 199 | DUMP_PREFIX_OFFSET, 16, 1, | 333 | DUMP_PREFIX_OFFSET, 16, 1, |
| 200 | start, end - start, true); | 334 | start, end - start, true); |
| 335 | out_err: | ||
| 201 | ceph_mdsmap_destroy(m); | 336 | ceph_mdsmap_destroy(m); |
| 202 | return ERR_PTR(err); | 337 | return ERR_PTR(err); |
| 203 | } | 338 | } |
| @@ -212,3 +347,19 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m) | |||
| 212 | kfree(m->m_data_pg_pools); | 347 | kfree(m->m_data_pg_pools); |
| 213 | kfree(m); | 348 | kfree(m); |
| 214 | } | 349 | } |
| 350 | |||
| 351 | bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m) | ||
| 352 | { | ||
| 353 | int i, nr_active = 0; | ||
| 354 | if (!m->m_enabled) | ||
| 355 | return false; | ||
| 356 | if (m->m_damaged) | ||
| 357 | return false; | ||
| 358 | if (m->m_num_laggy > 0) | ||
| 359 | return false; | ||
| 360 | for (i = 0; i < m->m_max_mds; i++) { | ||
| 361 | if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE) | ||
| 362 | nr_active++; | ||
| 363 | } | ||
| 364 | return nr_active > 0; | ||
| 365 | } | ||
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b382e5910eea..537f96631785 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
| @@ -137,6 +137,8 @@ enum { | |||
| 137 | Opt_nofscache, | 137 | Opt_nofscache, |
| 138 | Opt_poolperm, | 138 | Opt_poolperm, |
| 139 | Opt_nopoolperm, | 139 | Opt_nopoolperm, |
| 140 | Opt_require_active_mds, | ||
| 141 | Opt_norequire_active_mds, | ||
| 140 | #ifdef CONFIG_CEPH_FS_POSIX_ACL | 142 | #ifdef CONFIG_CEPH_FS_POSIX_ACL |
| 141 | Opt_acl, | 143 | Opt_acl, |
| 142 | #endif | 144 | #endif |
| @@ -171,6 +173,8 @@ static match_table_t fsopt_tokens = { | |||
| 171 | {Opt_nofscache, "nofsc"}, | 173 | {Opt_nofscache, "nofsc"}, |
| 172 | {Opt_poolperm, "poolperm"}, | 174 | {Opt_poolperm, "poolperm"}, |
| 173 | {Opt_nopoolperm, "nopoolperm"}, | 175 | {Opt_nopoolperm, "nopoolperm"}, |
| 176 | {Opt_require_active_mds, "require_active_mds"}, | ||
| 177 | {Opt_norequire_active_mds, "norequire_active_mds"}, | ||
| 174 | #ifdef CONFIG_CEPH_FS_POSIX_ACL | 178 | #ifdef CONFIG_CEPH_FS_POSIX_ACL |
| 175 | {Opt_acl, "acl"}, | 179 | {Opt_acl, "acl"}, |
| 176 | #endif | 180 | #endif |
| @@ -287,6 +291,12 @@ static int parse_fsopt_token(char *c, void *private) | |||
| 287 | case Opt_nopoolperm: | 291 | case Opt_nopoolperm: |
| 288 | fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; | 292 | fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; |
| 289 | break; | 293 | break; |
| 294 | case Opt_require_active_mds: | ||
| 295 | fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; | ||
| 296 | break; | ||
| 297 | case Opt_norequire_active_mds: | ||
| 298 | fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; | ||
| 299 | break; | ||
| 290 | #ifdef CONFIG_CEPH_FS_POSIX_ACL | 300 | #ifdef CONFIG_CEPH_FS_POSIX_ACL |
| 291 | case Opt_acl: | 301 | case Opt_acl: |
| 292 | fsopt->sb_flags |= MS_POSIXACL; | 302 | fsopt->sb_flags |= MS_POSIXACL; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 622d5dd9f616..b07f55e55f60 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ | 36 | #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ |
| 37 | #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ | 37 | #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ |
| 38 | #define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ | 38 | #define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ |
| 39 | #define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */ | ||
| 39 | 40 | ||
| 40 | #define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE | 41 | #define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE |
| 41 | 42 | ||
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 87ed09f54800..8ed5dc505fbb 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h | |||
| @@ -31,6 +31,10 @@ struct ceph_mdsmap { | |||
| 31 | int m_num_data_pg_pools; | 31 | int m_num_data_pg_pools; |
| 32 | u64 *m_data_pg_pools; | 32 | u64 *m_data_pg_pools; |
| 33 | u64 m_cas_pg_pool; | 33 | u64 m_cas_pg_pool; |
| 34 | |||
| 35 | bool m_enabled; | ||
| 36 | bool m_damaged; | ||
| 37 | int m_num_laggy; | ||
| 34 | }; | 38 | }; |
| 35 | 39 | ||
| 36 | static inline struct ceph_entity_addr * | 40 | static inline struct ceph_entity_addr * |
| @@ -59,5 +63,6 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) | |||
| 59 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); | 63 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); |
| 60 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); | 64 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); |
| 61 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); | 65 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); |
| 66 | extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); | ||
| 62 | 67 | ||
| 63 | #endif | 68 | #endif |
