diff options
author | Yehuda Sadeh <yehuda@hq.newdream.net> | 2010-04-06 18:14:15 -0400 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-10-20 18:37:28 -0400 |
commit | 3d14c5d2b6e15c21d8e5467dc62d33127c23a644 (patch) | |
tree | 7d123c47847df9d1e865b6b78dc7da3fe739b704 /net/ceph/ceph_common.c | |
parent | ae1533b62b3369e6ae32338f4a77d64d0e88f676 (diff) |
ceph: factor out libceph from Ceph file system
This factors out protocol and low-level storage parts of ceph into a
separate libceph module living in net/ceph and include/linux/ceph. This
is mostly a matter of moving files around. However, a few key pieces
of the interface change as well:
- ceph_client becomes ceph_fs_client and ceph_client, where the latter
captures the mon and osd clients, and the fs_client gets the mds client
and file system specific pieces.
- Mount option parsing and debugfs setup is correspondingly broken into
two pieces.
- The mon client gets a generic handler callback for otherwise unknown
messages (mds map, in this case).
- The basic supported/required feature bits can be expanded (and are by
ceph_fs_client).
No functional change, aside from some subtle error handling cases that got
cleaned up in the refactoring process.
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'net/ceph/ceph_common.c')
-rw-r--r-- | net/ceph/ceph_common.c | 529 |
1 files changed, 529 insertions, 0 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c new file mode 100644 index 000000000000..f6f2eebc0767 --- /dev/null +++ b/net/ceph/ceph_common.c | |||
@@ -0,0 +1,529 @@ | |||
1 | |||
2 | #include <linux/ceph/ceph_debug.h> | ||
3 | #include <linux/backing-dev.h> | ||
4 | #include <linux/ctype.h> | ||
5 | #include <linux/fs.h> | ||
6 | #include <linux/inet.h> | ||
7 | #include <linux/in6.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/mount.h> | ||
10 | #include <linux/parser.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/seq_file.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/statfs.h> | ||
15 | #include <linux/string.h> | ||
16 | |||
17 | |||
18 | #include <linux/ceph/libceph.h> | ||
19 | #include <linux/ceph/debugfs.h> | ||
20 | #include <linux/ceph/decode.h> | ||
21 | #include <linux/ceph/mon_client.h> | ||
22 | #include <linux/ceph/auth.h> | ||
23 | |||
24 | |||
25 | |||
26 | /* | ||
27 | * find filename portion of a path (/foo/bar/baz -> baz) | ||
28 | */ | ||
29 | const char *ceph_file_part(const char *s, int len) | ||
30 | { | ||
31 | const char *e = s + len; | ||
32 | |||
33 | while (e != s && *(e-1) != '/') | ||
34 | e--; | ||
35 | return e; | ||
36 | } | ||
37 | EXPORT_SYMBOL(ceph_file_part); | ||
38 | |||
39 | const char *ceph_msg_type_name(int type) | ||
40 | { | ||
41 | switch (type) { | ||
42 | case CEPH_MSG_SHUTDOWN: return "shutdown"; | ||
43 | case CEPH_MSG_PING: return "ping"; | ||
44 | case CEPH_MSG_AUTH: return "auth"; | ||
45 | case CEPH_MSG_AUTH_REPLY: return "auth_reply"; | ||
46 | case CEPH_MSG_MON_MAP: return "mon_map"; | ||
47 | case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; | ||
48 | case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; | ||
49 | case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; | ||
50 | case CEPH_MSG_STATFS: return "statfs"; | ||
51 | case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; | ||
52 | case CEPH_MSG_MDS_MAP: return "mds_map"; | ||
53 | case CEPH_MSG_CLIENT_SESSION: return "client_session"; | ||
54 | case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; | ||
55 | case CEPH_MSG_CLIENT_REQUEST: return "client_request"; | ||
56 | case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; | ||
57 | case CEPH_MSG_CLIENT_REPLY: return "client_reply"; | ||
58 | case CEPH_MSG_CLIENT_CAPS: return "client_caps"; | ||
59 | case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; | ||
60 | case CEPH_MSG_CLIENT_SNAP: return "client_snap"; | ||
61 | case CEPH_MSG_CLIENT_LEASE: return "client_lease"; | ||
62 | case CEPH_MSG_OSD_MAP: return "osd_map"; | ||
63 | case CEPH_MSG_OSD_OP: return "osd_op"; | ||
64 | case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; | ||
65 | default: return "unknown"; | ||
66 | } | ||
67 | } | ||
68 | EXPORT_SYMBOL(ceph_msg_type_name); | ||
69 | |||
70 | /* | ||
71 | * Initially learn our fsid, or verify an fsid matches. | ||
72 | */ | ||
73 | int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | ||
74 | { | ||
75 | if (client->have_fsid) { | ||
76 | if (ceph_fsid_compare(&client->fsid, fsid)) { | ||
77 | pr_err("bad fsid, had %pU got %pU", | ||
78 | &client->fsid, fsid); | ||
79 | return -1; | ||
80 | } | ||
81 | } else { | ||
82 | pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); | ||
83 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | ||
84 | ceph_debugfs_client_init(client); | ||
85 | client->have_fsid = true; | ||
86 | } | ||
87 | return 0; | ||
88 | } | ||
89 | EXPORT_SYMBOL(ceph_check_fsid); | ||
90 | |||
91 | static int strcmp_null(const char *s1, const char *s2) | ||
92 | { | ||
93 | if (!s1 && !s2) | ||
94 | return 0; | ||
95 | if (s1 && !s2) | ||
96 | return -1; | ||
97 | if (!s1 && s2) | ||
98 | return 1; | ||
99 | return strcmp(s1, s2); | ||
100 | } | ||
101 | |||
102 | int ceph_compare_options(struct ceph_options *new_opt, | ||
103 | struct ceph_client *client) | ||
104 | { | ||
105 | struct ceph_options *opt1 = new_opt; | ||
106 | struct ceph_options *opt2 = client->options; | ||
107 | int ofs = offsetof(struct ceph_options, mon_addr); | ||
108 | int i; | ||
109 | int ret; | ||
110 | |||
111 | ret = memcmp(opt1, opt2, ofs); | ||
112 | if (ret) | ||
113 | return ret; | ||
114 | |||
115 | ret = strcmp_null(opt1->name, opt2->name); | ||
116 | if (ret) | ||
117 | return ret; | ||
118 | |||
119 | ret = strcmp_null(opt1->secret, opt2->secret); | ||
120 | if (ret) | ||
121 | return ret; | ||
122 | |||
123 | /* any matching mon ip implies a match */ | ||
124 | for (i = 0; i < opt1->num_mon; i++) { | ||
125 | if (ceph_monmap_contains(client->monc.monmap, | ||
126 | &opt1->mon_addr[i])) | ||
127 | return 0; | ||
128 | } | ||
129 | return -1; | ||
130 | } | ||
131 | EXPORT_SYMBOL(ceph_compare_options); | ||
132 | |||
133 | |||
134 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | ||
135 | { | ||
136 | int i = 0; | ||
137 | char tmp[3]; | ||
138 | int err = -EINVAL; | ||
139 | int d; | ||
140 | |||
141 | dout("parse_fsid '%s'\n", str); | ||
142 | tmp[2] = 0; | ||
143 | while (*str && i < 16) { | ||
144 | if (ispunct(*str)) { | ||
145 | str++; | ||
146 | continue; | ||
147 | } | ||
148 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | ||
149 | break; | ||
150 | tmp[0] = str[0]; | ||
151 | tmp[1] = str[1]; | ||
152 | if (sscanf(tmp, "%x", &d) < 1) | ||
153 | break; | ||
154 | fsid->fsid[i] = d & 0xff; | ||
155 | i++; | ||
156 | str += 2; | ||
157 | } | ||
158 | |||
159 | if (i == 16) | ||
160 | err = 0; | ||
161 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | ||
162 | return err; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * ceph options | ||
167 | */ | ||
168 | enum { | ||
169 | Opt_osdtimeout, | ||
170 | Opt_osdkeepalivetimeout, | ||
171 | Opt_mount_timeout, | ||
172 | Opt_osd_idle_ttl, | ||
173 | Opt_last_int, | ||
174 | /* int args above */ | ||
175 | Opt_fsid, | ||
176 | Opt_name, | ||
177 | Opt_secret, | ||
178 | Opt_ip, | ||
179 | Opt_last_string, | ||
180 | /* string args above */ | ||
181 | Opt_noshare, | ||
182 | Opt_nocrc, | ||
183 | }; | ||
184 | |||
185 | static match_table_t opt_tokens = { | ||
186 | {Opt_osdtimeout, "osdtimeout=%d"}, | ||
187 | {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, | ||
188 | {Opt_mount_timeout, "mount_timeout=%d"}, | ||
189 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | ||
190 | /* int args above */ | ||
191 | {Opt_fsid, "fsid=%s"}, | ||
192 | {Opt_name, "name=%s"}, | ||
193 | {Opt_secret, "secret=%s"}, | ||
194 | {Opt_ip, "ip=%s"}, | ||
195 | /* string args above */ | ||
196 | {Opt_noshare, "noshare"}, | ||
197 | {Opt_nocrc, "nocrc"}, | ||
198 | {-1, NULL} | ||
199 | }; | ||
200 | |||
201 | void ceph_destroy_options(struct ceph_options *opt) | ||
202 | { | ||
203 | dout("destroy_options %p\n", opt); | ||
204 | kfree(opt->name); | ||
205 | kfree(opt->secret); | ||
206 | kfree(opt); | ||
207 | } | ||
208 | EXPORT_SYMBOL(ceph_destroy_options); | ||
209 | |||
210 | int ceph_parse_options(struct ceph_options **popt, char *options, | ||
211 | const char *dev_name, const char *dev_name_end, | ||
212 | int (*parse_extra_token)(char *c, void *private), | ||
213 | void *private) | ||
214 | { | ||
215 | struct ceph_options *opt; | ||
216 | const char *c; | ||
217 | int err = -ENOMEM; | ||
218 | substring_t argstr[MAX_OPT_ARGS]; | ||
219 | |||
220 | opt = kzalloc(sizeof(*opt), GFP_KERNEL); | ||
221 | if (!opt) | ||
222 | return err; | ||
223 | opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), | ||
224 | GFP_KERNEL); | ||
225 | if (!opt->mon_addr) | ||
226 | goto out; | ||
227 | |||
228 | dout("parse_options %p options '%s' dev_name '%s'\n", opt, options, | ||
229 | dev_name); | ||
230 | |||
231 | /* start with defaults */ | ||
232 | opt->flags = CEPH_OPT_DEFAULT; | ||
233 | opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; | ||
234 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | ||
235 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | ||
236 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ | ||
237 | |||
238 | /* get mon ip(s) */ | ||
239 | /* ip1[:port1][,ip2[:port2]...] */ | ||
240 | err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr, | ||
241 | CEPH_MAX_MON, &opt->num_mon); | ||
242 | if (err < 0) | ||
243 | goto out; | ||
244 | |||
245 | /* parse mount options */ | ||
246 | while ((c = strsep(&options, ",")) != NULL) { | ||
247 | int token, intval, ret; | ||
248 | if (!*c) | ||
249 | continue; | ||
250 | err = -EINVAL; | ||
251 | token = match_token((char *)c, opt_tokens, argstr); | ||
252 | if (token < 0) { | ||
253 | /* extra? */ | ||
254 | err = parse_extra_token((char *)c, private); | ||
255 | if (err < 0) { | ||
256 | pr_err("bad option at '%s'\n", c); | ||
257 | goto out; | ||
258 | } | ||
259 | continue; | ||
260 | } | ||
261 | if (token < Opt_last_int) { | ||
262 | ret = match_int(&argstr[0], &intval); | ||
263 | if (ret < 0) { | ||
264 | pr_err("bad mount option arg (not int) " | ||
265 | "at '%s'\n", c); | ||
266 | continue; | ||
267 | } | ||
268 | dout("got int token %d val %d\n", token, intval); | ||
269 | } else if (token > Opt_last_int && token < Opt_last_string) { | ||
270 | dout("got string token %d val %s\n", token, | ||
271 | argstr[0].from); | ||
272 | } else { | ||
273 | dout("got token %d\n", token); | ||
274 | } | ||
275 | switch (token) { | ||
276 | case Opt_ip: | ||
277 | err = ceph_parse_ips(argstr[0].from, | ||
278 | argstr[0].to, | ||
279 | &opt->my_addr, | ||
280 | 1, NULL); | ||
281 | if (err < 0) | ||
282 | goto out; | ||
283 | opt->flags |= CEPH_OPT_MYIP; | ||
284 | break; | ||
285 | |||
286 | case Opt_fsid: | ||
287 | err = parse_fsid(argstr[0].from, &opt->fsid); | ||
288 | if (err == 0) | ||
289 | opt->flags |= CEPH_OPT_FSID; | ||
290 | break; | ||
291 | case Opt_name: | ||
292 | opt->name = kstrndup(argstr[0].from, | ||
293 | argstr[0].to-argstr[0].from, | ||
294 | GFP_KERNEL); | ||
295 | break; | ||
296 | case Opt_secret: | ||
297 | opt->secret = kstrndup(argstr[0].from, | ||
298 | argstr[0].to-argstr[0].from, | ||
299 | GFP_KERNEL); | ||
300 | break; | ||
301 | |||
302 | /* misc */ | ||
303 | case Opt_osdtimeout: | ||
304 | opt->osd_timeout = intval; | ||
305 | break; | ||
306 | case Opt_osdkeepalivetimeout: | ||
307 | opt->osd_keepalive_timeout = intval; | ||
308 | break; | ||
309 | case Opt_osd_idle_ttl: | ||
310 | opt->osd_idle_ttl = intval; | ||
311 | break; | ||
312 | case Opt_mount_timeout: | ||
313 | opt->mount_timeout = intval; | ||
314 | break; | ||
315 | |||
316 | case Opt_noshare: | ||
317 | opt->flags |= CEPH_OPT_NOSHARE; | ||
318 | break; | ||
319 | |||
320 | case Opt_nocrc: | ||
321 | opt->flags |= CEPH_OPT_NOCRC; | ||
322 | break; | ||
323 | |||
324 | default: | ||
325 | BUG_ON(token); | ||
326 | } | ||
327 | } | ||
328 | |||
329 | /* success */ | ||
330 | *popt = opt; | ||
331 | return 0; | ||
332 | |||
333 | out: | ||
334 | ceph_destroy_options(opt); | ||
335 | return err; | ||
336 | } | ||
337 | EXPORT_SYMBOL(ceph_parse_options); | ||
338 | |||
339 | u64 ceph_client_id(struct ceph_client *client) | ||
340 | { | ||
341 | return client->monc.auth->global_id; | ||
342 | } | ||
343 | EXPORT_SYMBOL(ceph_client_id); | ||
344 | |||
345 | /* | ||
346 | * create a fresh client instance | ||
347 | */ | ||
348 | struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) | ||
349 | { | ||
350 | struct ceph_client *client; | ||
351 | int err = -ENOMEM; | ||
352 | |||
353 | client = kzalloc(sizeof(*client), GFP_KERNEL); | ||
354 | if (client == NULL) | ||
355 | return ERR_PTR(-ENOMEM); | ||
356 | |||
357 | client->private = private; | ||
358 | client->options = opt; | ||
359 | |||
360 | mutex_init(&client->mount_mutex); | ||
361 | init_waitqueue_head(&client->auth_wq); | ||
362 | client->auth_err = 0; | ||
363 | |||
364 | client->extra_mon_dispatch = NULL; | ||
365 | client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT; | ||
366 | client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT; | ||
367 | |||
368 | client->msgr = NULL; | ||
369 | |||
370 | /* subsystems */ | ||
371 | err = ceph_monc_init(&client->monc, client); | ||
372 | if (err < 0) | ||
373 | goto fail; | ||
374 | err = ceph_osdc_init(&client->osdc, client); | ||
375 | if (err < 0) | ||
376 | goto fail_monc; | ||
377 | |||
378 | return client; | ||
379 | |||
380 | fail_monc: | ||
381 | ceph_monc_stop(&client->monc); | ||
382 | fail: | ||
383 | kfree(client); | ||
384 | return ERR_PTR(err); | ||
385 | } | ||
386 | EXPORT_SYMBOL(ceph_create_client); | ||
387 | |||
388 | void ceph_destroy_client(struct ceph_client *client) | ||
389 | { | ||
390 | dout("destroy_client %p\n", client); | ||
391 | |||
392 | /* unmount */ | ||
393 | ceph_osdc_stop(&client->osdc); | ||
394 | |||
395 | /* | ||
396 | * make sure mds and osd connections close out before destroying | ||
397 | * the auth module, which is needed to free those connections' | ||
398 | * ceph_authorizers. | ||
399 | */ | ||
400 | ceph_msgr_flush(); | ||
401 | |||
402 | ceph_monc_stop(&client->monc); | ||
403 | |||
404 | ceph_debugfs_client_cleanup(client); | ||
405 | |||
406 | if (client->msgr) | ||
407 | ceph_messenger_destroy(client->msgr); | ||
408 | |||
409 | ceph_destroy_options(client->options); | ||
410 | |||
411 | kfree(client); | ||
412 | dout("destroy_client %p done\n", client); | ||
413 | } | ||
414 | EXPORT_SYMBOL(ceph_destroy_client); | ||
415 | |||
416 | /* | ||
417 | * true if we have the mon map (and have thus joined the cluster) | ||
418 | */ | ||
419 | static int have_mon_and_osd_map(struct ceph_client *client) | ||
420 | { | ||
421 | return client->monc.monmap && client->monc.monmap->epoch && | ||
422 | client->osdc.osdmap && client->osdc.osdmap->epoch; | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * mount: join the ceph cluster, and open root directory. | ||
427 | */ | ||
428 | int __ceph_open_session(struct ceph_client *client, unsigned long started) | ||
429 | { | ||
430 | struct ceph_entity_addr *myaddr = NULL; | ||
431 | int err; | ||
432 | unsigned long timeout = client->options->mount_timeout * HZ; | ||
433 | |||
434 | /* initialize the messenger */ | ||
435 | if (client->msgr == NULL) { | ||
436 | if (ceph_test_opt(client, MYIP)) | ||
437 | myaddr = &client->options->my_addr; | ||
438 | client->msgr = ceph_messenger_create(myaddr, | ||
439 | client->supported_features, | ||
440 | client->required_features); | ||
441 | if (IS_ERR(client->msgr)) { | ||
442 | client->msgr = NULL; | ||
443 | return PTR_ERR(client->msgr); | ||
444 | } | ||
445 | client->msgr->nocrc = ceph_test_opt(client, NOCRC); | ||
446 | } | ||
447 | |||
448 | /* open session, and wait for mon and osd maps */ | ||
449 | err = ceph_monc_open_session(&client->monc); | ||
450 | if (err < 0) | ||
451 | return err; | ||
452 | |||
453 | while (!have_mon_and_osd_map(client)) { | ||
454 | err = -EIO; | ||
455 | if (timeout && time_after_eq(jiffies, started + timeout)) | ||
456 | return err; | ||
457 | |||
458 | /* wait */ | ||
459 | dout("mount waiting for mon_map\n"); | ||
460 | err = wait_event_interruptible_timeout(client->auth_wq, | ||
461 | have_mon_and_osd_map(client) || (client->auth_err < 0), | ||
462 | timeout); | ||
463 | if (err == -EINTR || err == -ERESTARTSYS) | ||
464 | return err; | ||
465 | if (client->auth_err < 0) | ||
466 | return client->auth_err; | ||
467 | } | ||
468 | |||
469 | return 0; | ||
470 | } | ||
471 | EXPORT_SYMBOL(__ceph_open_session); | ||
472 | |||
473 | |||
474 | int ceph_open_session(struct ceph_client *client) | ||
475 | { | ||
476 | int ret; | ||
477 | unsigned long started = jiffies; /* note the start time */ | ||
478 | |||
479 | dout("open_session start\n"); | ||
480 | mutex_lock(&client->mount_mutex); | ||
481 | |||
482 | ret = __ceph_open_session(client, started); | ||
483 | |||
484 | mutex_unlock(&client->mount_mutex); | ||
485 | return ret; | ||
486 | } | ||
487 | EXPORT_SYMBOL(ceph_open_session); | ||
488 | |||
489 | |||
490 | static int __init init_ceph_lib(void) | ||
491 | { | ||
492 | int ret = 0; | ||
493 | |||
494 | ret = ceph_debugfs_init(); | ||
495 | if (ret < 0) | ||
496 | goto out; | ||
497 | |||
498 | ret = ceph_msgr_init(); | ||
499 | if (ret < 0) | ||
500 | goto out_debugfs; | ||
501 | |||
502 | pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", | ||
503 | CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, | ||
504 | CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, | ||
505 | CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); | ||
506 | |||
507 | return 0; | ||
508 | |||
509 | out_debugfs: | ||
510 | ceph_debugfs_cleanup(); | ||
511 | out: | ||
512 | return ret; | ||
513 | } | ||
514 | |||
515 | static void __exit exit_ceph_lib(void) | ||
516 | { | ||
517 | dout("exit_ceph_lib\n"); | ||
518 | ceph_msgr_exit(); | ||
519 | ceph_debugfs_cleanup(); | ||
520 | } | ||
521 | |||
522 | module_init(init_ceph_lib); | ||
523 | module_exit(exit_ceph_lib); | ||
524 | |||
525 | MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); | ||
526 | MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); | ||
527 | MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); | ||
528 | MODULE_DESCRIPTION("Ceph filesystem for Linux"); | ||
529 | MODULE_LICENSE("GPL"); | ||