diff options
author | Yan, Zheng <zyan@redhat.com> | 2016-11-10 03:02:06 -0500 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2016-12-12 17:54:27 -0500 |
commit | e9e427f0a14f7e4773896dd7af357819a56d097a (patch) | |
tree | 0dc4cbb9e97788412760a6d03d86b0967e655db4 | |
parent | 7ce469a53e7106acdaca2e25027941d0f7c12a8e (diff) |
ceph: check availability of mds cluster on mount
Signed-off-by: Yan, Zheng <zyan@redhat.com>
-rw-r--r-- | fs/ceph/mds_client.c | 19 | ||||
-rw-r--r-- | fs/ceph/mdsmap.c | 163 | ||||
-rw-r--r-- | fs/ceph/super.c | 10 | ||||
-rw-r--r-- | fs/ceph/super.h | 1 | ||||
-rw-r--r-- | include/linux/ceph/mdsmap.h | 5 |
5 files changed, 187 insertions, 11 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index bf4d3d26850c..4f49253387a0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -2100,17 +2100,26 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
2100 | err = -EIO; | 2100 | err = -EIO; |
2101 | goto finish; | 2101 | goto finish; |
2102 | } | 2102 | } |
2103 | if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) { | ||
2104 | if (mdsc->mdsmap_err) { | ||
2105 | err = mdsc->mdsmap_err; | ||
2106 | dout("do_request mdsmap err %d\n", err); | ||
2107 | goto finish; | ||
2108 | } | ||
2109 | if (!(mdsc->fsc->mount_options->flags & | ||
2110 | CEPH_MOUNT_OPT_MOUNTWAIT) && | ||
2111 | !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { | ||
2112 | err = -ENOENT; | ||
2113 | pr_info("probably no mds server is up\n"); | ||
2114 | goto finish; | ||
2115 | } | ||
2116 | } | ||
2103 | 2117 | ||
2104 | put_request_session(req); | 2118 | put_request_session(req); |
2105 | 2119 | ||
2106 | mds = __choose_mds(mdsc, req); | 2120 | mds = __choose_mds(mdsc, req); |
2107 | if (mds < 0 || | 2121 | if (mds < 0 || |
2108 | ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { | 2122 | ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { |
2109 | if (mdsc->mdsmap_err) { | ||
2110 | err = mdsc->mdsmap_err; | ||
2111 | dout("do_request mdsmap err %d\n", err); | ||
2112 | goto finish; | ||
2113 | } | ||
2114 | dout("do_request no mds or not active, waiting for map\n"); | 2123 | dout("do_request no mds or not active, waiting for map\n"); |
2115 | list_add(&req->r_wait, &mdsc->waiting_for_map); | 2124 | list_add(&req->r_wait, &mdsc->waiting_for_map); |
2116 | goto out; | 2125 | goto out; |
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 8c3591a7fbae..5454e2327a5f 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
@@ -42,6 +42,60 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) | |||
42 | return i; | 42 | return i; |
43 | } | 43 | } |
44 | 44 | ||
45 | #define __decode_and_drop_type(p, end, type, bad) \ | ||
46 | do { \ | ||
47 | if (*p + sizeof(type) > end) \ | ||
48 | goto bad; \ | ||
49 | *p += sizeof(type); \ | ||
50 | } while (0) | ||
51 | |||
52 | #define __decode_and_drop_set(p, end, type, bad) \ | ||
53 | do { \ | ||
54 | u32 n; \ | ||
55 | size_t need; \ | ||
56 | ceph_decode_32_safe(p, end, n, bad); \ | ||
57 | need = sizeof(type) * n; \ | ||
58 | ceph_decode_need(p, end, need, bad); \ | ||
59 | *p += need; \ | ||
60 | } while (0) | ||
61 | |||
62 | #define __decode_and_drop_map(p, end, ktype, vtype, bad) \ | ||
63 | do { \ | ||
64 | u32 n; \ | ||
65 | size_t need; \ | ||
66 | ceph_decode_32_safe(p, end, n, bad); \ | ||
67 | need = (sizeof(ktype) + sizeof(vtype)) * n; \ | ||
68 | ceph_decode_need(p, end, need, bad); \ | ||
69 | *p += need; \ | ||
70 | } while (0) | ||
71 | |||
72 | |||
73 | static int __decode_and_drop_compat_set(void **p, void* end) | ||
74 | { | ||
75 | int i; | ||
76 | /* compat, ro_compat, incompat*/ | ||
77 | for (i = 0; i < 3; i++) { | ||
78 | u32 n; | ||
79 | ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); | ||
80 | /* mask */ | ||
81 | *p += sizeof(u64); | ||
82 | /* names (map<u64, string>) */ | ||
83 | n = ceph_decode_32(p); | ||
84 | while (n-- > 0) { | ||
85 | u32 len; | ||
86 | ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), | ||
87 | bad); | ||
88 | *p += sizeof(u64); | ||
89 | len = ceph_decode_32(p); | ||
90 | ceph_decode_need(p, end, len, bad); | ||
91 | *p += len; | ||
92 | } | ||
93 | } | ||
94 | return 0; | ||
95 | bad: | ||
96 | return -1; | ||
97 | } | ||
98 | |||
45 | /* | 99 | /* |
46 | * Decode an MDS map | 100 | * Decode an MDS map |
47 | * | 101 | * |
@@ -55,6 +109,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
55 | int i, j, n; | 109 | int i, j, n; |
56 | int err = -EINVAL; | 110 | int err = -EINVAL; |
57 | u8 mdsmap_v, mdsmap_cv; | 111 | u8 mdsmap_v, mdsmap_cv; |
112 | u16 mdsmap_ev; | ||
58 | 113 | ||
59 | m = kzalloc(sizeof(*m), GFP_NOFS); | 114 | m = kzalloc(sizeof(*m), GFP_NOFS); |
60 | if (m == NULL) | 115 | if (m == NULL) |
@@ -83,7 +138,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
83 | 138 | ||
84 | m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS); | 139 | m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS); |
85 | if (m->m_info == NULL) | 140 | if (m->m_info == NULL) |
86 | goto badmem; | 141 | goto nomem; |
87 | 142 | ||
88 | /* pick out active nodes from mds_info (state > 0) */ | 143 | /* pick out active nodes from mds_info (state > 0) */ |
89 | n = ceph_decode_32(p); | 144 | n = ceph_decode_32(p); |
@@ -166,7 +221,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
166 | info->export_targets = kcalloc(num_export_targets, | 221 | info->export_targets = kcalloc(num_export_targets, |
167 | sizeof(u32), GFP_NOFS); | 222 | sizeof(u32), GFP_NOFS); |
168 | if (info->export_targets == NULL) | 223 | if (info->export_targets == NULL) |
169 | goto badmem; | 224 | goto nomem; |
170 | for (j = 0; j < num_export_targets; j++) | 225 | for (j = 0; j < num_export_targets; j++) |
171 | info->export_targets[j] = | 226 | info->export_targets[j] = |
172 | ceph_decode_32(&pexport_targets); | 227 | ceph_decode_32(&pexport_targets); |
@@ -180,24 +235,104 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
180 | m->m_num_data_pg_pools = n; | 235 | m->m_num_data_pg_pools = n; |
181 | m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); | 236 | m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); |
182 | if (!m->m_data_pg_pools) | 237 | if (!m->m_data_pg_pools) |
183 | goto badmem; | 238 | goto nomem; |
184 | ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); | 239 | ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); |
185 | for (i = 0; i < n; i++) | 240 | for (i = 0; i < n; i++) |
186 | m->m_data_pg_pools[i] = ceph_decode_64(p); | 241 | m->m_data_pg_pools[i] = ceph_decode_64(p); |
187 | m->m_cas_pg_pool = ceph_decode_64(p); | 242 | m->m_cas_pg_pool = ceph_decode_64(p); |
243 | m->m_enabled = m->m_epoch > 1; | ||
244 | |||
245 | mdsmap_ev = 1; | ||
246 | if (mdsmap_v >= 2) { | ||
247 | ceph_decode_16_safe(p, end, mdsmap_ev, bad_ext); | ||
248 | } | ||
249 | if (mdsmap_ev >= 3) { | ||
250 | if (__decode_and_drop_compat_set(p, end) < 0) | ||
251 | goto bad_ext; | ||
252 | } | ||
253 | /* metadata_pool */ | ||
254 | if (mdsmap_ev < 5) { | ||
255 | __decode_and_drop_type(p, end, u32, bad_ext); | ||
256 | } else { | ||
257 | __decode_and_drop_type(p, end, u64, bad_ext); | ||
258 | } | ||
188 | 259 | ||
189 | /* ok, we don't care about the rest. */ | 260 | /* created + modified + tableserver */ |
261 | __decode_and_drop_type(p, end, struct ceph_timespec, bad_ext); | ||
262 | __decode_and_drop_type(p, end, struct ceph_timespec, bad_ext); | ||
263 | __decode_and_drop_type(p, end, u32, bad_ext); | ||
264 | |||
265 | /* in */ | ||
266 | { | ||
267 | int num_laggy = 0; | ||
268 | ceph_decode_32_safe(p, end, n, bad_ext); | ||
269 | ceph_decode_need(p, end, sizeof(u32) * n, bad_ext); | ||
270 | |||
271 | for (i = 0; i < n; i++) { | ||
272 | s32 mds = ceph_decode_32(p); | ||
273 | if (mds >= 0 && mds < m->m_max_mds) { | ||
274 | if (m->m_info[mds].laggy) | ||
275 | num_laggy++; | ||
276 | } | ||
277 | } | ||
278 | m->m_num_laggy = num_laggy; | ||
279 | } | ||
280 | |||
281 | /* inc */ | ||
282 | __decode_and_drop_map(p, end, u32, u32, bad_ext); | ||
283 | /* up */ | ||
284 | __decode_and_drop_map(p, end, u32, u64, bad_ext); | ||
285 | /* failed */ | ||
286 | __decode_and_drop_set(p, end, u32, bad_ext); | ||
287 | /* stopped */ | ||
288 | __decode_and_drop_set(p, end, u32, bad_ext); | ||
289 | |||
290 | if (mdsmap_ev >= 4) { | ||
291 | /* last_failure_osd_epoch */ | ||
292 | __decode_and_drop_type(p, end, u32, bad_ext); | ||
293 | } | ||
294 | if (mdsmap_ev >= 6) { | ||
295 | /* ever_allowed_snaps */ | ||
296 | __decode_and_drop_type(p, end, u8, bad_ext); | ||
297 | /* explicitly_allowed_snaps */ | ||
298 | __decode_and_drop_type(p, end, u8, bad_ext); | ||
299 | } | ||
300 | if (mdsmap_ev >= 7) { | ||
301 | /* inline_data_enabled */ | ||
302 | __decode_and_drop_type(p, end, u8, bad_ext); | ||
303 | } | ||
304 | if (mdsmap_ev >= 8) { | ||
305 | u32 name_len; | ||
306 | /* enabled */ | ||
307 | ceph_decode_8_safe(p, end, m->m_enabled, bad_ext); | ||
308 | ceph_decode_32_safe(p, end, name_len, bad_ext); | ||
309 | ceph_decode_need(p, end, name_len, bad_ext); | ||
310 | *p += name_len; | ||
311 | } | ||
312 | /* damaged */ | ||
313 | if (mdsmap_ev >= 9) { | ||
314 | size_t need; | ||
315 | ceph_decode_32_safe(p, end, n, bad_ext); | ||
316 | need = sizeof(u32) * n; | ||
317 | ceph_decode_need(p, end, need, bad_ext); | ||
318 | *p += need; | ||
319 | m->m_damaged = n > 0; | ||
320 | } else { | ||
321 | m->m_damaged = false; | ||
322 | } | ||
323 | bad_ext: | ||
190 | *p = end; | 324 | *p = end; |
191 | dout("mdsmap_decode success epoch %u\n", m->m_epoch); | 325 | dout("mdsmap_decode success epoch %u\n", m->m_epoch); |
192 | return m; | 326 | return m; |
193 | 327 | nomem: | |
194 | badmem: | ||
195 | err = -ENOMEM; | 328 | err = -ENOMEM; |
329 | goto out_err; | ||
196 | bad: | 330 | bad: |
197 | pr_err("corrupt mdsmap\n"); | 331 | pr_err("corrupt mdsmap\n"); |
198 | print_hex_dump(KERN_DEBUG, "mdsmap: ", | 332 | print_hex_dump(KERN_DEBUG, "mdsmap: ", |
199 | DUMP_PREFIX_OFFSET, 16, 1, | 333 | DUMP_PREFIX_OFFSET, 16, 1, |
200 | start, end - start, true); | 334 | start, end - start, true); |
335 | out_err: | ||
201 | ceph_mdsmap_destroy(m); | 336 | ceph_mdsmap_destroy(m); |
202 | return ERR_PTR(err); | 337 | return ERR_PTR(err); |
203 | } | 338 | } |
@@ -212,3 +347,19 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m) | |||
212 | kfree(m->m_data_pg_pools); | 347 | kfree(m->m_data_pg_pools); |
213 | kfree(m); | 348 | kfree(m); |
214 | } | 349 | } |
350 | |||
351 | bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m) | ||
352 | { | ||
353 | int i, nr_active = 0; | ||
354 | if (!m->m_enabled) | ||
355 | return false; | ||
356 | if (m->m_damaged) | ||
357 | return false; | ||
358 | if (m->m_num_laggy > 0) | ||
359 | return false; | ||
360 | for (i = 0; i < m->m_max_mds; i++) { | ||
361 | if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE) | ||
362 | nr_active++; | ||
363 | } | ||
364 | return nr_active > 0; | ||
365 | } | ||
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b382e5910eea..537f96631785 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -137,6 +137,8 @@ enum { | |||
137 | Opt_nofscache, | 137 | Opt_nofscache, |
138 | Opt_poolperm, | 138 | Opt_poolperm, |
139 | Opt_nopoolperm, | 139 | Opt_nopoolperm, |
140 | Opt_require_active_mds, | ||
141 | Opt_norequire_active_mds, | ||
140 | #ifdef CONFIG_CEPH_FS_POSIX_ACL | 142 | #ifdef CONFIG_CEPH_FS_POSIX_ACL |
141 | Opt_acl, | 143 | Opt_acl, |
142 | #endif | 144 | #endif |
@@ -171,6 +173,8 @@ static match_table_t fsopt_tokens = { | |||
171 | {Opt_nofscache, "nofsc"}, | 173 | {Opt_nofscache, "nofsc"}, |
172 | {Opt_poolperm, "poolperm"}, | 174 | {Opt_poolperm, "poolperm"}, |
173 | {Opt_nopoolperm, "nopoolperm"}, | 175 | {Opt_nopoolperm, "nopoolperm"}, |
176 | {Opt_require_active_mds, "require_active_mds"}, | ||
177 | {Opt_norequire_active_mds, "norequire_active_mds"}, | ||
174 | #ifdef CONFIG_CEPH_FS_POSIX_ACL | 178 | #ifdef CONFIG_CEPH_FS_POSIX_ACL |
175 | {Opt_acl, "acl"}, | 179 | {Opt_acl, "acl"}, |
176 | #endif | 180 | #endif |
@@ -287,6 +291,12 @@ static int parse_fsopt_token(char *c, void *private) | |||
287 | case Opt_nopoolperm: | 291 | case Opt_nopoolperm: |
288 | fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; | 292 | fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; |
289 | break; | 293 | break; |
294 | case Opt_require_active_mds: | ||
295 | fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; | ||
296 | break; | ||
297 | case Opt_norequire_active_mds: | ||
298 | fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; | ||
299 | break; | ||
290 | #ifdef CONFIG_CEPH_FS_POSIX_ACL | 300 | #ifdef CONFIG_CEPH_FS_POSIX_ACL |
291 | case Opt_acl: | 301 | case Opt_acl: |
292 | fsopt->sb_flags |= MS_POSIXACL; | 302 | fsopt->sb_flags |= MS_POSIXACL; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 622d5dd9f616..b07f55e55f60 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -36,6 +36,7 @@ | |||
36 | #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ | 36 | #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ |
37 | #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ | 37 | #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ |
38 | #define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ | 38 | #define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ |
39 | #define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */ | ||
39 | 40 | ||
40 | #define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE | 41 | #define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE |
41 | 42 | ||
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 87ed09f54800..8ed5dc505fbb 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h | |||
@@ -31,6 +31,10 @@ struct ceph_mdsmap { | |||
31 | int m_num_data_pg_pools; | 31 | int m_num_data_pg_pools; |
32 | u64 *m_data_pg_pools; | 32 | u64 *m_data_pg_pools; |
33 | u64 m_cas_pg_pool; | 33 | u64 m_cas_pg_pool; |
34 | |||
35 | bool m_enabled; | ||
36 | bool m_damaged; | ||
37 | int m_num_laggy; | ||
34 | }; | 38 | }; |
35 | 39 | ||
36 | static inline struct ceph_entity_addr * | 40 | static inline struct ceph_entity_addr * |
@@ -59,5 +63,6 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) | |||
59 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); | 63 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); |
60 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); | 64 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); |
61 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); | 65 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); |
66 | extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); | ||
62 | 67 | ||
63 | #endif | 68 | #endif |