aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/super.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/ceph/super.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'fs/ceph/super.c')
-rw-r--r--fs/ceph/super.c1202
1 files changed, 504 insertions, 698 deletions
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9922628532b2..f2f77fd3c14c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1,5 +1,5 @@
1 1
2#include "ceph_debug.h" 2#include <linux/ceph/ceph_debug.h>
3 3
4#include <linux/backing-dev.h> 4#include <linux/backing-dev.h>
5#include <linux/ctype.h> 5#include <linux/ctype.h>
@@ -15,10 +15,13 @@
15#include <linux/statfs.h> 15#include <linux/statfs.h>
16#include <linux/string.h> 16#include <linux/string.h>
17 17
18#include "decode.h"
19#include "super.h" 18#include "super.h"
20#include "mon_client.h" 19#include "mds_client.h"
21#include "auth.h" 20
21#include <linux/ceph/decode.h>
22#include <linux/ceph/mon_client.h>
23#include <linux/ceph/auth.h>
24#include <linux/ceph/debugfs.h>
22 25
23/* 26/*
24 * Ceph superblock operations 27 * Ceph superblock operations
@@ -26,36 +29,22 @@
26 * Handle the basics of mounting, unmounting. 29 * Handle the basics of mounting, unmounting.
27 */ 30 */
28 31
29
30/*
31 * find filename portion of a path (/foo/bar/baz -> baz)
32 */
33const char *ceph_file_part(const char *s, int len)
34{
35 const char *e = s + len;
36
37 while (e != s && *(e-1) != '/')
38 e--;
39 return e;
40}
41
42
43/* 32/*
44 * super ops 33 * super ops
45 */ 34 */
46static void ceph_put_super(struct super_block *s) 35static void ceph_put_super(struct super_block *s)
47{ 36{
48 struct ceph_client *client = ceph_sb_to_client(s); 37 struct ceph_fs_client *fsc = ceph_sb_to_client(s);
49 38
50 dout("put_super\n"); 39 dout("put_super\n");
51 ceph_mdsc_close_sessions(&client->mdsc); 40 ceph_mdsc_close_sessions(fsc->mdsc);
52 41
53 /* 42 /*
54 * ensure we release the bdi before put_anon_super releases 43 * ensure we release the bdi before put_anon_super releases
55 * the device name. 44 * the device name.
56 */ 45 */
57 if (s->s_bdi == &client->backing_dev_info) { 46 if (s->s_bdi == &fsc->backing_dev_info) {
58 bdi_unregister(&client->backing_dev_info); 47 bdi_unregister(&fsc->backing_dev_info);
59 s->s_bdi = NULL; 48 s->s_bdi = NULL;
60 } 49 }
61 50
@@ -64,14 +53,14 @@ static void ceph_put_super(struct super_block *s)
64 53
65static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 54static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
66{ 55{
67 struct ceph_client *client = ceph_inode_to_client(dentry->d_inode); 56 struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
68 struct ceph_monmap *monmap = client->monc.monmap; 57 struct ceph_monmap *monmap = fsc->client->monc.monmap;
69 struct ceph_statfs st; 58 struct ceph_statfs st;
70 u64 fsid; 59 u64 fsid;
71 int err; 60 int err;
72 61
73 dout("statfs\n"); 62 dout("statfs\n");
74 err = ceph_monc_do_statfs(&client->monc, &st); 63 err = ceph_monc_do_statfs(&fsc->client->monc, &st);
75 if (err < 0) 64 if (err < 0)
76 return err; 65 return err;
77 66
@@ -104,238 +93,28 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
104 93
105static int ceph_sync_fs(struct super_block *sb, int wait) 94static int ceph_sync_fs(struct super_block *sb, int wait)
106{ 95{
107 struct ceph_client *client = ceph_sb_to_client(sb); 96 struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
108 97
109 if (!wait) { 98 if (!wait) {
110 dout("sync_fs (non-blocking)\n"); 99 dout("sync_fs (non-blocking)\n");
111 ceph_flush_dirty_caps(&client->mdsc); 100 ceph_flush_dirty_caps(fsc->mdsc);
112 dout("sync_fs (non-blocking) done\n"); 101 dout("sync_fs (non-blocking) done\n");
113 return 0; 102 return 0;
114 } 103 }
115 104
116 dout("sync_fs (blocking)\n"); 105 dout("sync_fs (blocking)\n");
117 ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); 106 ceph_osdc_sync(&fsc->client->osdc);
118 ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); 107 ceph_mdsc_sync(fsc->mdsc);
119 dout("sync_fs (blocking) done\n"); 108 dout("sync_fs (blocking) done\n");
120 return 0; 109 return 0;
121} 110}
122 111
123static int default_congestion_kb(void)
124{
125 int congestion_kb;
126
127 /*
128 * Copied from NFS
129 *
130 * congestion size, scale with available memory.
131 *
132 * 64MB: 8192k
133 * 128MB: 11585k
134 * 256MB: 16384k
135 * 512MB: 23170k
136 * 1GB: 32768k
137 * 2GB: 46340k
138 * 4GB: 65536k
139 * 8GB: 92681k
140 * 16GB: 131072k
141 *
142 * This allows larger machines to have larger/more transfers.
143 * Limit the default to 256M
144 */
145 congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
146 if (congestion_kb > 256*1024)
147 congestion_kb = 256*1024;
148
149 return congestion_kb;
150}
151
152/**
153 * ceph_show_options - Show mount options in /proc/mounts
154 * @m: seq_file to write to
155 * @mnt: mount descriptor
156 */
157static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
158{
159 struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb);
160 struct ceph_mount_args *args = client->mount_args;
161
162 if (args->flags & CEPH_OPT_FSID)
163 seq_printf(m, ",fsid=%pU", &args->fsid);
164 if (args->flags & CEPH_OPT_NOSHARE)
165 seq_puts(m, ",noshare");
166 if (args->flags & CEPH_OPT_DIRSTAT)
167 seq_puts(m, ",dirstat");
168 if ((args->flags & CEPH_OPT_RBYTES) == 0)
169 seq_puts(m, ",norbytes");
170 if (args->flags & CEPH_OPT_NOCRC)
171 seq_puts(m, ",nocrc");
172 if (args->flags & CEPH_OPT_NOASYNCREADDIR)
173 seq_puts(m, ",noasyncreaddir");
174
175 if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
176 seq_printf(m, ",mount_timeout=%d", args->mount_timeout);
177 if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
178 seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl);
179 if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
180 seq_printf(m, ",osdtimeout=%d", args->osd_timeout);
181 if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
182 seq_printf(m, ",osdkeepalivetimeout=%d",
183 args->osd_keepalive_timeout);
184 if (args->wsize)
185 seq_printf(m, ",wsize=%d", args->wsize);
186 if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
187 seq_printf(m, ",rsize=%d", args->rsize);
188 if (args->congestion_kb != default_congestion_kb())
189 seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb);
190 if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
191 seq_printf(m, ",caps_wanted_delay_min=%d",
192 args->caps_wanted_delay_min);
193 if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
194 seq_printf(m, ",caps_wanted_delay_max=%d",
195 args->caps_wanted_delay_max);
196 if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
197 seq_printf(m, ",cap_release_safety=%d",
198 args->cap_release_safety);
199 if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT)
200 seq_printf(m, ",readdir_max_entries=%d", args->max_readdir);
201 if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
202 seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes);
203 if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
204 seq_printf(m, ",snapdirname=%s", args->snapdir_name);
205 if (args->name)
206 seq_printf(m, ",name=%s", args->name);
207 if (args->secret)
208 seq_puts(m, ",secret=<hidden>");
209 return 0;
210}
211
212/*
213 * caches
214 */
215struct kmem_cache *ceph_inode_cachep;
216struct kmem_cache *ceph_cap_cachep;
217struct kmem_cache *ceph_dentry_cachep;
218struct kmem_cache *ceph_file_cachep;
219
220static void ceph_inode_init_once(void *foo)
221{
222 struct ceph_inode_info *ci = foo;
223 inode_init_once(&ci->vfs_inode);
224}
225
226static int __init init_caches(void)
227{
228 ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
229 sizeof(struct ceph_inode_info),
230 __alignof__(struct ceph_inode_info),
231 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
232 ceph_inode_init_once);
233 if (ceph_inode_cachep == NULL)
234 return -ENOMEM;
235
236 ceph_cap_cachep = KMEM_CACHE(ceph_cap,
237 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
238 if (ceph_cap_cachep == NULL)
239 goto bad_cap;
240
241 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
242 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
243 if (ceph_dentry_cachep == NULL)
244 goto bad_dentry;
245
246 ceph_file_cachep = KMEM_CACHE(ceph_file_info,
247 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
248 if (ceph_file_cachep == NULL)
249 goto bad_file;
250
251 return 0;
252
253bad_file:
254 kmem_cache_destroy(ceph_dentry_cachep);
255bad_dentry:
256 kmem_cache_destroy(ceph_cap_cachep);
257bad_cap:
258 kmem_cache_destroy(ceph_inode_cachep);
259 return -ENOMEM;
260}
261
262static void destroy_caches(void)
263{
264 kmem_cache_destroy(ceph_inode_cachep);
265 kmem_cache_destroy(ceph_cap_cachep);
266 kmem_cache_destroy(ceph_dentry_cachep);
267 kmem_cache_destroy(ceph_file_cachep);
268}
269
270
271/*
272 * ceph_umount_begin - initiate forced umount. Tear down down the
273 * mount, skipping steps that may hang while waiting for server(s).
274 */
275static void ceph_umount_begin(struct super_block *sb)
276{
277 struct ceph_client *client = ceph_sb_to_client(sb);
278
279 dout("ceph_umount_begin - starting forced umount\n");
280 if (!client)
281 return;
282 client->mount_state = CEPH_MOUNT_SHUTDOWN;
283 return;
284}
285
286static const struct super_operations ceph_super_ops = {
287 .alloc_inode = ceph_alloc_inode,
288 .destroy_inode = ceph_destroy_inode,
289 .write_inode = ceph_write_inode,
290 .sync_fs = ceph_sync_fs,
291 .put_super = ceph_put_super,
292 .show_options = ceph_show_options,
293 .statfs = ceph_statfs,
294 .umount_begin = ceph_umount_begin,
295};
296
297
298const char *ceph_msg_type_name(int type)
299{
300 switch (type) {
301 case CEPH_MSG_SHUTDOWN: return "shutdown";
302 case CEPH_MSG_PING: return "ping";
303 case CEPH_MSG_AUTH: return "auth";
304 case CEPH_MSG_AUTH_REPLY: return "auth_reply";
305 case CEPH_MSG_MON_MAP: return "mon_map";
306 case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
307 case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
308 case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
309 case CEPH_MSG_STATFS: return "statfs";
310 case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
311 case CEPH_MSG_MDS_MAP: return "mds_map";
312 case CEPH_MSG_CLIENT_SESSION: return "client_session";
313 case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
314 case CEPH_MSG_CLIENT_REQUEST: return "client_request";
315 case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
316 case CEPH_MSG_CLIENT_REPLY: return "client_reply";
317 case CEPH_MSG_CLIENT_CAPS: return "client_caps";
318 case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
319 case CEPH_MSG_CLIENT_SNAP: return "client_snap";
320 case CEPH_MSG_CLIENT_LEASE: return "client_lease";
321 case CEPH_MSG_OSD_MAP: return "osd_map";
322 case CEPH_MSG_OSD_OP: return "osd_op";
323 case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
324 default: return "unknown";
325 }
326}
327
328
329/* 112/*
330 * mount options 113 * mount options
331 */ 114 */
332enum { 115enum {
333 Opt_wsize, 116 Opt_wsize,
334 Opt_rsize, 117 Opt_rsize,
335 Opt_osdtimeout,
336 Opt_osdkeepalivetimeout,
337 Opt_mount_timeout,
338 Opt_osd_idle_ttl,
339 Opt_caps_wanted_delay_min, 118 Opt_caps_wanted_delay_min,
340 Opt_caps_wanted_delay_max, 119 Opt_caps_wanted_delay_max,
341 Opt_cap_release_safety, 120 Opt_cap_release_safety,
@@ -344,29 +123,20 @@ enum {
344 Opt_congestion_kb, 123 Opt_congestion_kb,
345 Opt_last_int, 124 Opt_last_int,
346 /* int args above */ 125 /* int args above */
347 Opt_fsid,
348 Opt_snapdirname, 126 Opt_snapdirname,
349 Opt_name,
350 Opt_secret,
351 Opt_last_string, 127 Opt_last_string,
352 /* string args above */ 128 /* string args above */
353 Opt_ip,
354 Opt_noshare,
355 Opt_dirstat, 129 Opt_dirstat,
356 Opt_nodirstat, 130 Opt_nodirstat,
357 Opt_rbytes, 131 Opt_rbytes,
358 Opt_norbytes, 132 Opt_norbytes,
359 Opt_nocrc,
360 Opt_noasyncreaddir, 133 Opt_noasyncreaddir,
134 Opt_ino32,
361}; 135};
362 136
363static match_table_t arg_tokens = { 137static match_table_t fsopt_tokens = {
364 {Opt_wsize, "wsize=%d"}, 138 {Opt_wsize, "wsize=%d"},
365 {Opt_rsize, "rsize=%d"}, 139 {Opt_rsize, "rsize=%d"},
366 {Opt_osdtimeout, "osdtimeout=%d"},
367 {Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
368 {Opt_mount_timeout, "mount_timeout=%d"},
369 {Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
370 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 140 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
371 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 141 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
372 {Opt_cap_release_safety, "cap_release_safety=%d"}, 142 {Opt_cap_release_safety, "cap_release_safety=%d"},
@@ -374,403 +144,470 @@ static match_table_t arg_tokens = {
374 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 144 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
375 {Opt_congestion_kb, "write_congestion_kb=%d"}, 145 {Opt_congestion_kb, "write_congestion_kb=%d"},
376 /* int args above */ 146 /* int args above */
377 {Opt_fsid, "fsid=%s"},
378 {Opt_snapdirname, "snapdirname=%s"}, 147 {Opt_snapdirname, "snapdirname=%s"},
379 {Opt_name, "name=%s"},
380 {Opt_secret, "secret=%s"},
381 /* string args above */ 148 /* string args above */
382 {Opt_ip, "ip=%s"},
383 {Opt_noshare, "noshare"},
384 {Opt_dirstat, "dirstat"}, 149 {Opt_dirstat, "dirstat"},
385 {Opt_nodirstat, "nodirstat"}, 150 {Opt_nodirstat, "nodirstat"},
386 {Opt_rbytes, "rbytes"}, 151 {Opt_rbytes, "rbytes"},
387 {Opt_norbytes, "norbytes"}, 152 {Opt_norbytes, "norbytes"},
388 {Opt_nocrc, "nocrc"},
389 {Opt_noasyncreaddir, "noasyncreaddir"}, 153 {Opt_noasyncreaddir, "noasyncreaddir"},
154 {Opt_ino32, "ino32"},
390 {-1, NULL} 155 {-1, NULL}
391}; 156};
392 157
393static int parse_fsid(const char *str, struct ceph_fsid *fsid) 158static int parse_fsopt_token(char *c, void *private)
394{ 159{
395 int i = 0; 160 struct ceph_mount_options *fsopt = private;
396 char tmp[3]; 161 substring_t argstr[MAX_OPT_ARGS];
397 int err = -EINVAL; 162 int token, intval, ret;
398 int d; 163
399 164 token = match_token((char *)c, fsopt_tokens, argstr);
400 dout("parse_fsid '%s'\n", str); 165 if (token < 0)
401 tmp[2] = 0; 166 return -EINVAL;
402 while (*str && i < 16) { 167
403 if (ispunct(*str)) { 168 if (token < Opt_last_int) {
404 str++; 169 ret = match_int(&argstr[0], &intval);
405 continue; 170 if (ret < 0) {
171 pr_err("bad mount option arg (not int) "
172 "at '%s'\n", c);
173 return ret;
406 } 174 }
407 if (!isxdigit(str[0]) || !isxdigit(str[1])) 175 dout("got int token %d val %d\n", token, intval);
408 break; 176 } else if (token > Opt_last_int && token < Opt_last_string) {
409 tmp[0] = str[0]; 177 dout("got string token %d val %s\n", token,
410 tmp[1] = str[1]; 178 argstr[0].from);
411 if (sscanf(tmp, "%x", &d) < 1) 179 } else {
412 break; 180 dout("got token %d\n", token);
413 fsid->fsid[i] = d & 0xff;
414 i++;
415 str += 2;
416 } 181 }
417 182
418 if (i == 16) 183 switch (token) {
419 err = 0; 184 case Opt_snapdirname:
420 dout("parse_fsid ret %d got fsid %pU", err, fsid); 185 kfree(fsopt->snapdir_name);
421 return err; 186 fsopt->snapdir_name = kstrndup(argstr[0].from,
187 argstr[0].to-argstr[0].from,
188 GFP_KERNEL);
189 if (!fsopt->snapdir_name)
190 return -ENOMEM;
191 break;
192
193 /* misc */
194 case Opt_wsize:
195 fsopt->wsize = intval;
196 break;
197 case Opt_rsize:
198 fsopt->rsize = intval;
199 break;
200 case Opt_caps_wanted_delay_min:
201 fsopt->caps_wanted_delay_min = intval;
202 break;
203 case Opt_caps_wanted_delay_max:
204 fsopt->caps_wanted_delay_max = intval;
205 break;
206 case Opt_readdir_max_entries:
207 fsopt->max_readdir = intval;
208 break;
209 case Opt_readdir_max_bytes:
210 fsopt->max_readdir_bytes = intval;
211 break;
212 case Opt_congestion_kb:
213 fsopt->congestion_kb = intval;
214 break;
215 case Opt_dirstat:
216 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
217 break;
218 case Opt_nodirstat:
219 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
220 break;
221 case Opt_rbytes:
222 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
223 break;
224 case Opt_norbytes:
225 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
226 break;
227 case Opt_noasyncreaddir:
228 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
229 break;
230 case Opt_ino32:
231 fsopt->flags |= CEPH_MOUNT_OPT_INO32;
232 break;
233 default:
234 BUG_ON(token);
235 }
236 return 0;
422} 237}
423 238
424static struct ceph_mount_args *parse_mount_args(int flags, char *options, 239static void destroy_mount_options(struct ceph_mount_options *args)
425 const char *dev_name,
426 const char **path)
427{ 240{
428 struct ceph_mount_args *args; 241 dout("destroy_mount_options %p\n", args);
429 const char *c; 242 kfree(args->snapdir_name);
430 int err = -ENOMEM; 243 kfree(args);
431 substring_t argstr[MAX_OPT_ARGS]; 244}
432 245
433 args = kzalloc(sizeof(*args), GFP_KERNEL); 246static int strcmp_null(const char *s1, const char *s2)
434 if (!args) 247{
435 return ERR_PTR(-ENOMEM); 248 if (!s1 && !s2)
436 args->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*args->mon_addr), 249 return 0;
437 GFP_KERNEL); 250 if (s1 && !s2)
438 if (!args->mon_addr) 251 return -1;
439 goto out; 252 if (!s1 && s2)
253 return 1;
254 return strcmp(s1, s2);
255}
440 256
441 dout("parse_mount_args %p, dev_name '%s'\n", args, dev_name); 257static int compare_mount_options(struct ceph_mount_options *new_fsopt,
442 258 struct ceph_options *new_opt,
443 /* start with defaults */ 259 struct ceph_fs_client *fsc)
444 args->sb_flags = flags; 260{
445 args->flags = CEPH_OPT_DEFAULT; 261 struct ceph_mount_options *fsopt1 = new_fsopt;
446 args->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; 262 struct ceph_mount_options *fsopt2 = fsc->mount_options;
447 args->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 263 int ofs = offsetof(struct ceph_mount_options, snapdir_name);
448 args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ 264 int ret;
449 args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
450 args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
451 args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
452 args->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
453 args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
454 args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
455 args->max_readdir = CEPH_MAX_READDIR_DEFAULT;
456 args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
457 args->congestion_kb = default_congestion_kb();
458
459 /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
460 err = -EINVAL;
461 if (!dev_name)
462 goto out;
463 *path = strstr(dev_name, ":/");
464 if (*path == NULL) {
465 pr_err("device name is missing path (no :/ in %s)\n",
466 dev_name);
467 goto out;
468 }
469 265
470 /* get mon ip(s) */ 266 ret = memcmp(fsopt1, fsopt2, ofs);
471 err = ceph_parse_ips(dev_name, *path, args->mon_addr, 267 if (ret)
472 CEPH_MAX_MON, &args->num_mon); 268 return ret;
473 if (err < 0) 269
474 goto out; 270 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
271 if (ret)
272 return ret;
273
274 return ceph_compare_options(new_opt, fsc->client);
275}
276
277static int parse_mount_options(struct ceph_mount_options **pfsopt,
278 struct ceph_options **popt,
279 int flags, char *options,
280 const char *dev_name,
281 const char **path)
282{
283 struct ceph_mount_options *fsopt;
284 const char *dev_name_end;
285 int err = -ENOMEM;
286
287 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
288 if (!fsopt)
289 return -ENOMEM;
290
291 dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
292
293 fsopt->sb_flags = flags;
294 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
295
296 fsopt->rsize = CEPH_RSIZE_DEFAULT;
297 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
298 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
299 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
300 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
301 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
302 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
303 fsopt->congestion_kb = default_congestion_kb();
304
305 /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
306 err = -EINVAL;
307 if (!dev_name)
308 goto out;
309 *path = strstr(dev_name, ":/");
310 if (*path == NULL) {
311 pr_err("device name is missing path (no :/ in %s)\n",
312 dev_name);
313 goto out;
314 }
315 dev_name_end = *path;
316 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
475 317
476 /* path on server */ 318 /* path on server */
477 *path += 2; 319 *path += 2;
478 dout("server path '%s'\n", *path); 320 dout("server path '%s'\n", *path);
479 321
480 /* parse mount options */ 322 err = ceph_parse_options(popt, options, dev_name, dev_name_end,
481 while ((c = strsep(&options, ",")) != NULL) { 323 parse_fsopt_token, (void *)fsopt);
482 int token, intval, ret; 324 if (err)
483 if (!*c) 325 goto out;
484 continue; 326
485 err = -EINVAL; 327 /* success */
486 token = match_token((char *)c, arg_tokens, argstr); 328 *pfsopt = fsopt;
487 if (token < 0) { 329 return 0;
488 pr_err("bad mount option at '%s'\n", c);
489 goto out;
490 }
491 if (token < Opt_last_int) {
492 ret = match_int(&argstr[0], &intval);
493 if (ret < 0) {
494 pr_err("bad mount option arg (not int) "
495 "at '%s'\n", c);
496 continue;
497 }
498 dout("got int token %d val %d\n", token, intval);
499 } else if (token > Opt_last_int && token < Opt_last_string) {
500 dout("got string token %d val %s\n", token,
501 argstr[0].from);
502 } else {
503 dout("got token %d\n", token);
504 }
505 switch (token) {
506 case Opt_ip:
507 err = ceph_parse_ips(argstr[0].from,
508 argstr[0].to,
509 &args->my_addr,
510 1, NULL);
511 if (err < 0)
512 goto out;
513 args->flags |= CEPH_OPT_MYIP;
514 break;
515
516 case Opt_fsid:
517 err = parse_fsid(argstr[0].from, &args->fsid);
518 if (err == 0)
519 args->flags |= CEPH_OPT_FSID;
520 break;
521 case Opt_snapdirname:
522 kfree(args->snapdir_name);
523 args->snapdir_name = kstrndup(argstr[0].from,
524 argstr[0].to-argstr[0].from,
525 GFP_KERNEL);
526 break;
527 case Opt_name:
528 args->name = kstrndup(argstr[0].from,
529 argstr[0].to-argstr[0].from,
530 GFP_KERNEL);
531 break;
532 case Opt_secret:
533 args->secret = kstrndup(argstr[0].from,
534 argstr[0].to-argstr[0].from,
535 GFP_KERNEL);
536 break;
537
538 /* misc */
539 case Opt_wsize:
540 args->wsize = intval;
541 break;
542 case Opt_rsize:
543 args->rsize = intval;
544 break;
545 case Opt_osdtimeout:
546 args->osd_timeout = intval;
547 break;
548 case Opt_osdkeepalivetimeout:
549 args->osd_keepalive_timeout = intval;
550 break;
551 case Opt_osd_idle_ttl:
552 args->osd_idle_ttl = intval;
553 break;
554 case Opt_mount_timeout:
555 args->mount_timeout = intval;
556 break;
557 case Opt_caps_wanted_delay_min:
558 args->caps_wanted_delay_min = intval;
559 break;
560 case Opt_caps_wanted_delay_max:
561 args->caps_wanted_delay_max = intval;
562 break;
563 case Opt_readdir_max_entries:
564 args->max_readdir = intval;
565 break;
566 case Opt_readdir_max_bytes:
567 args->max_readdir_bytes = intval;
568 break;
569 case Opt_congestion_kb:
570 args->congestion_kb = intval;
571 break;
572
573 case Opt_noshare:
574 args->flags |= CEPH_OPT_NOSHARE;
575 break;
576
577 case Opt_dirstat:
578 args->flags |= CEPH_OPT_DIRSTAT;
579 break;
580 case Opt_nodirstat:
581 args->flags &= ~CEPH_OPT_DIRSTAT;
582 break;
583 case Opt_rbytes:
584 args->flags |= CEPH_OPT_RBYTES;
585 break;
586 case Opt_norbytes:
587 args->flags &= ~CEPH_OPT_RBYTES;
588 break;
589 case Opt_nocrc:
590 args->flags |= CEPH_OPT_NOCRC;
591 break;
592 case Opt_noasyncreaddir:
593 args->flags |= CEPH_OPT_NOASYNCREADDIR;
594 break;
595
596 default:
597 BUG_ON(token);
598 }
599 }
600 return args;
601 330
602out: 331out:
603 kfree(args->mon_addr); 332 destroy_mount_options(fsopt);
604 kfree(args); 333 return err;
605 return ERR_PTR(err);
606} 334}
607 335
608static void destroy_mount_args(struct ceph_mount_args *args) 336/**
337 * ceph_show_options - Show mount options in /proc/mounts
338 * @m: seq_file to write to
339 * @mnt: mount descriptor
340 */
341static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
609{ 342{
610 dout("destroy_mount_args %p\n", args); 343 struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb);
611 kfree(args->snapdir_name); 344 struct ceph_mount_options *fsopt = fsc->mount_options;
612 args->snapdir_name = NULL; 345 struct ceph_options *opt = fsc->client->options;
613 kfree(args->name); 346
614 args->name = NULL; 347 if (opt->flags & CEPH_OPT_FSID)
615 kfree(args->secret); 348 seq_printf(m, ",fsid=%pU", &opt->fsid);
616 args->secret = NULL; 349 if (opt->flags & CEPH_OPT_NOSHARE)
617 kfree(args); 350 seq_puts(m, ",noshare");
351 if (opt->flags & CEPH_OPT_NOCRC)
352 seq_puts(m, ",nocrc");
353
354 if (opt->name)
355 seq_printf(m, ",name=%s", opt->name);
356 if (opt->key)
357 seq_puts(m, ",secret=<hidden>");
358
359 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
360 seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
361 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
362 seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
363 if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
364 seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
365 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
366 seq_printf(m, ",osdkeepalivetimeout=%d",
367 opt->osd_keepalive_timeout);
368
369 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
370 seq_puts(m, ",dirstat");
371 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
372 seq_puts(m, ",norbytes");
373 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
374 seq_puts(m, ",noasyncreaddir");
375
376 if (fsopt->wsize)
377 seq_printf(m, ",wsize=%d", fsopt->wsize);
378 if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
379 seq_printf(m, ",rsize=%d", fsopt->rsize);
380 if (fsopt->congestion_kb != default_congestion_kb())
381 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
382 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
383 seq_printf(m, ",caps_wanted_delay_min=%d",
384 fsopt->caps_wanted_delay_min);
385 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
386 seq_printf(m, ",caps_wanted_delay_max=%d",
387 fsopt->caps_wanted_delay_max);
388 if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
389 seq_printf(m, ",cap_release_safety=%d",
390 fsopt->cap_release_safety);
391 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
392 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
393 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
394 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
395 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
396 seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
397 return 0;
618} 398}
619 399
620/* 400/*
621 * create a fresh client instance 401 * handle any mon messages the standard library doesn't understand.
402 * return error if we don't either.
622 */ 403 */
623static struct ceph_client *ceph_create_client(struct ceph_mount_args *args) 404static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
624{ 405{
625 struct ceph_client *client; 406 struct ceph_fs_client *fsc = client->private;
407 int type = le16_to_cpu(msg->hdr.type);
408
409 switch (type) {
410 case CEPH_MSG_MDS_MAP:
411 ceph_mdsc_handle_map(fsc->mdsc, msg);
412 return 0;
413
414 default:
415 return -1;
416 }
417}
418
419/*
420 * create a new fs client
421 */
422struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
423 struct ceph_options *opt)
424{
425 struct ceph_fs_client *fsc;
626 int err = -ENOMEM; 426 int err = -ENOMEM;
627 427
628 client = kzalloc(sizeof(*client), GFP_KERNEL); 428 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
629 if (client == NULL) 429 if (!fsc)
630 return ERR_PTR(-ENOMEM); 430 return ERR_PTR(-ENOMEM);
631 431
632 mutex_init(&client->mount_mutex); 432 fsc->client = ceph_create_client(opt, fsc);
633 433 if (IS_ERR(fsc->client)) {
634 init_waitqueue_head(&client->auth_wq); 434 err = PTR_ERR(fsc->client);
435 goto fail;
436 }
437 fsc->client->extra_mon_dispatch = extra_mon_dispatch;
438 fsc->client->supported_features |= CEPH_FEATURE_FLOCK |
439 CEPH_FEATURE_DIRLAYOUTHASH;
440 fsc->client->monc.want_mdsmap = 1;
635 441
636 client->sb = NULL; 442 fsc->mount_options = fsopt;
637 client->mount_state = CEPH_MOUNT_MOUNTING;
638 client->mount_args = args;
639 443
640 client->msgr = NULL; 444 fsc->sb = NULL;
445 fsc->mount_state = CEPH_MOUNT_MOUNTING;
641 446
642 client->auth_err = 0; 447 atomic_long_set(&fsc->writeback_count, 0);
643 atomic_long_set(&client->writeback_count, 0);
644 448
645 err = bdi_init(&client->backing_dev_info); 449 err = bdi_init(&fsc->backing_dev_info);
646 if (err < 0) 450 if (err < 0)
647 goto fail; 451 goto fail_client;
648 452
649 err = -ENOMEM; 453 err = -ENOMEM;
650 client->wb_wq = create_workqueue("ceph-writeback"); 454 /*
651 if (client->wb_wq == NULL) 455 * The number of concurrent works can be high but they don't need
456 * to be processed in parallel, limit concurrency.
457 */
458 fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
459 if (fsc->wb_wq == NULL)
652 goto fail_bdi; 460 goto fail_bdi;
653 client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); 461 fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
654 if (client->pg_inv_wq == NULL) 462 if (fsc->pg_inv_wq == NULL)
655 goto fail_wb_wq; 463 goto fail_wb_wq;
656 client->trunc_wq = create_singlethread_workqueue("ceph-trunc"); 464 fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
657 if (client->trunc_wq == NULL) 465 if (fsc->trunc_wq == NULL)
658 goto fail_pg_inv_wq; 466 goto fail_pg_inv_wq;
659 467
660 /* set up mempools */ 468 /* set up mempools */
661 err = -ENOMEM; 469 err = -ENOMEM;
662 client->wb_pagevec_pool = mempool_create_kmalloc_pool(10, 470 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
663 client->mount_args->wsize >> PAGE_CACHE_SHIFT); 471 fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
664 if (!client->wb_pagevec_pool) 472 if (!fsc->wb_pagevec_pool)
665 goto fail_trunc_wq; 473 goto fail_trunc_wq;
666 474
667 /* caps */ 475 /* caps */
668 client->min_caps = args->max_readdir; 476 fsc->min_caps = fsopt->max_readdir;
477
478 return fsc;
669 479
670 /* subsystems */
671 err = ceph_monc_init(&client->monc, client);
672 if (err < 0)
673 goto fail_mempool;
674 err = ceph_osdc_init(&client->osdc, client);
675 if (err < 0)
676 goto fail_monc;
677 err = ceph_mdsc_init(&client->mdsc, client);
678 if (err < 0)
679 goto fail_osdc;
680 return client;
681
682fail_osdc:
683 ceph_osdc_stop(&client->osdc);
684fail_monc:
685 ceph_monc_stop(&client->monc);
686fail_mempool:
687 mempool_destroy(client->wb_pagevec_pool);
688fail_trunc_wq: 480fail_trunc_wq:
689 destroy_workqueue(client->trunc_wq); 481 destroy_workqueue(fsc->trunc_wq);
690fail_pg_inv_wq: 482fail_pg_inv_wq:
691 destroy_workqueue(client->pg_inv_wq); 483 destroy_workqueue(fsc->pg_inv_wq);
692fail_wb_wq: 484fail_wb_wq:
693 destroy_workqueue(client->wb_wq); 485 destroy_workqueue(fsc->wb_wq);
694fail_bdi: 486fail_bdi:
695 bdi_destroy(&client->backing_dev_info); 487 bdi_destroy(&fsc->backing_dev_info);
488fail_client:
489 ceph_destroy_client(fsc->client);
696fail: 490fail:
697 kfree(client); 491 kfree(fsc);
698 return ERR_PTR(err); 492 return ERR_PTR(err);
699} 493}
700 494
701static void ceph_destroy_client(struct ceph_client *client) 495void destroy_fs_client(struct ceph_fs_client *fsc)
702{ 496{
703 dout("destroy_client %p\n", client); 497 dout("destroy_fs_client %p\n", fsc);
704 498
705 /* unmount */ 499 destroy_workqueue(fsc->wb_wq);
706 ceph_mdsc_stop(&client->mdsc); 500 destroy_workqueue(fsc->pg_inv_wq);
707 ceph_osdc_stop(&client->osdc); 501 destroy_workqueue(fsc->trunc_wq);
708 502
709 /* 503 bdi_destroy(&fsc->backing_dev_info);
710 * make sure mds and osd connections close out before destroying
711 * the auth module, which is needed to free those connections'
712 * ceph_authorizers.
713 */
714 ceph_msgr_flush();
715
716 ceph_monc_stop(&client->monc);
717 504
718 ceph_debugfs_client_cleanup(client); 505 mempool_destroy(fsc->wb_pagevec_pool);
719 destroy_workqueue(client->wb_wq);
720 destroy_workqueue(client->pg_inv_wq);
721 destroy_workqueue(client->trunc_wq);
722 506
723 bdi_destroy(&client->backing_dev_info); 507 destroy_mount_options(fsc->mount_options);
724 508
725 if (client->msgr) 509 ceph_fs_debugfs_cleanup(fsc);
726 ceph_messenger_destroy(client->msgr);
727 mempool_destroy(client->wb_pagevec_pool);
728 510
729 destroy_mount_args(client->mount_args); 511 ceph_destroy_client(fsc->client);
730 512
731 kfree(client); 513 kfree(fsc);
732 dout("destroy_client %p done\n", client); 514 dout("destroy_fs_client %p done\n", fsc);
733} 515}
734 516
735/* 517/*
736 * Initially learn our fsid, or verify an fsid matches. 518 * caches
737 */ 519 */
738int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) 520struct kmem_cache *ceph_inode_cachep;
521struct kmem_cache *ceph_cap_cachep;
522struct kmem_cache *ceph_dentry_cachep;
523struct kmem_cache *ceph_file_cachep;
524
525static void ceph_inode_init_once(void *foo)
739{ 526{
740 if (client->have_fsid) { 527 struct ceph_inode_info *ci = foo;
741 if (ceph_fsid_compare(&client->fsid, fsid)) { 528 inode_init_once(&ci->vfs_inode);
742 pr_err("bad fsid, had %pU got %pU", 529}
743 &client->fsid, fsid); 530
744 return -1; 531static int __init init_caches(void)
745 } 532{
746 } else { 533 ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
747 pr_info("client%lld fsid %pU\n", client->monc.auth->global_id, 534 sizeof(struct ceph_inode_info),
748 fsid); 535 __alignof__(struct ceph_inode_info),
749 memcpy(&client->fsid, fsid, sizeof(*fsid)); 536 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
750 ceph_debugfs_client_init(client); 537 ceph_inode_init_once);
751 client->have_fsid = true; 538 if (ceph_inode_cachep == NULL)
752 } 539 return -ENOMEM;
540
541 ceph_cap_cachep = KMEM_CACHE(ceph_cap,
542 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
543 if (ceph_cap_cachep == NULL)
544 goto bad_cap;
545
546 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
547 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
548 if (ceph_dentry_cachep == NULL)
549 goto bad_dentry;
550
551 ceph_file_cachep = KMEM_CACHE(ceph_file_info,
552 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
553 if (ceph_file_cachep == NULL)
554 goto bad_file;
555
753 return 0; 556 return 0;
557
558bad_file:
559 kmem_cache_destroy(ceph_dentry_cachep);
560bad_dentry:
561 kmem_cache_destroy(ceph_cap_cachep);
562bad_cap:
563 kmem_cache_destroy(ceph_inode_cachep);
564 return -ENOMEM;
754} 565}
755 566
567static void destroy_caches(void)
568{
569 kmem_cache_destroy(ceph_inode_cachep);
570 kmem_cache_destroy(ceph_cap_cachep);
571 kmem_cache_destroy(ceph_dentry_cachep);
572 kmem_cache_destroy(ceph_file_cachep);
573}
574
575
756/* 576/*
757 * true if we have the mon map (and have thus joined the cluster) 577 * ceph_umount_begin - initiate forced umount. Tear down down the
578 * mount, skipping steps that may hang while waiting for server(s).
758 */ 579 */
759static int have_mon_and_osd_map(struct ceph_client *client) 580static void ceph_umount_begin(struct super_block *sb)
760{ 581{
761 return client->monc.monmap && client->monc.monmap->epoch && 582 struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
762 client->osdc.osdmap && client->osdc.osdmap->epoch; 583
584 dout("ceph_umount_begin - starting forced umount\n");
585 if (!fsc)
586 return;
587 fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
588 return;
763} 589}
764 590
591static const struct super_operations ceph_super_ops = {
592 .alloc_inode = ceph_alloc_inode,
593 .destroy_inode = ceph_destroy_inode,
594 .write_inode = ceph_write_inode,
595 .sync_fs = ceph_sync_fs,
596 .put_super = ceph_put_super,
597 .show_options = ceph_show_options,
598 .statfs = ceph_statfs,
599 .umount_begin = ceph_umount_begin,
600};
601
765/* 602/*
766 * Bootstrap mount by opening the root directory. Note the mount 603 * Bootstrap mount by opening the root directory. Note the mount
767 * @started time from caller, and time out if this takes too long. 604 * @started time from caller, and time out if this takes too long.
768 */ 605 */
769static struct dentry *open_root_dentry(struct ceph_client *client, 606static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
770 const char *path, 607 const char *path,
771 unsigned long started) 608 unsigned long started)
772{ 609{
773 struct ceph_mds_client *mdsc = &client->mdsc; 610 struct ceph_mds_client *mdsc = fsc->mdsc;
774 struct ceph_mds_request *req = NULL; 611 struct ceph_mds_request *req = NULL;
775 int err; 612 int err;
776 struct dentry *root; 613 struct dentry *root;
@@ -784,14 +621,14 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
784 req->r_ino1.ino = CEPH_INO_ROOT; 621 req->r_ino1.ino = CEPH_INO_ROOT;
785 req->r_ino1.snap = CEPH_NOSNAP; 622 req->r_ino1.snap = CEPH_NOSNAP;
786 req->r_started = started; 623 req->r_started = started;
787 req->r_timeout = client->mount_args->mount_timeout * HZ; 624 req->r_timeout = fsc->client->options->mount_timeout * HZ;
788 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 625 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
789 req->r_num_caps = 2; 626 req->r_num_caps = 2;
790 err = ceph_mdsc_do_request(mdsc, NULL, req); 627 err = ceph_mdsc_do_request(mdsc, NULL, req);
791 if (err == 0) { 628 if (err == 0) {
792 dout("open_root_inode success\n"); 629 dout("open_root_inode success\n");
793 if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && 630 if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
794 client->sb->s_root == NULL) 631 fsc->sb->s_root == NULL)
795 root = d_alloc_root(req->r_target_inode); 632 root = d_alloc_root(req->r_target_inode);
796 else 633 else
797 root = d_obtain_alias(req->r_target_inode); 634 root = d_obtain_alias(req->r_target_inode);
@@ -804,105 +641,84 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
804 return root; 641 return root;
805} 642}
806 643
644
645
646
807/* 647/*
808 * mount: join the ceph cluster, and open root directory. 648 * mount: join the ceph cluster, and open root directory.
809 */ 649 */
810static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, 650static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
811 const char *path) 651 const char *path)
812{ 652{
813 struct ceph_entity_addr *myaddr = NULL;
814 int err; 653 int err;
815 unsigned long timeout = client->mount_args->mount_timeout * HZ;
816 unsigned long started = jiffies; /* note the start time */ 654 unsigned long started = jiffies; /* note the start time */
817 struct dentry *root; 655 struct dentry *root;
656 int first = 0; /* first vfsmount for this super_block */
818 657
819 dout("mount start\n"); 658 dout("mount start\n");
820 mutex_lock(&client->mount_mutex); 659 mutex_lock(&fsc->client->mount_mutex);
821
822 /* initialize the messenger */
823 if (client->msgr == NULL) {
824 if (ceph_test_opt(client, MYIP))
825 myaddr = &client->mount_args->my_addr;
826 client->msgr = ceph_messenger_create(myaddr);
827 if (IS_ERR(client->msgr)) {
828 err = PTR_ERR(client->msgr);
829 client->msgr = NULL;
830 goto out;
831 }
832 client->msgr->nocrc = ceph_test_opt(client, NOCRC);
833 }
834 660
835 /* open session, and wait for mon, mds, and osd maps */ 661 err = __ceph_open_session(fsc->client, started);
836 err = ceph_monc_open_session(&client->monc);
837 if (err < 0) 662 if (err < 0)
838 goto out; 663 goto out;
839 664
840 while (!have_mon_and_osd_map(client)) {
841 err = -EIO;
842 if (timeout && time_after_eq(jiffies, started + timeout))
843 goto out;
844
845 /* wait */
846 dout("mount waiting for mon_map\n");
847 err = wait_event_interruptible_timeout(client->auth_wq,
848 have_mon_and_osd_map(client) || (client->auth_err < 0),
849 timeout);
850 if (err == -EINTR || err == -ERESTARTSYS)
851 goto out;
852 if (client->auth_err < 0) {
853 err = client->auth_err;
854 goto out;
855 }
856 }
857
858 dout("mount opening root\n"); 665 dout("mount opening root\n");
859 root = open_root_dentry(client, "", started); 666 root = open_root_dentry(fsc, "", started);
860 if (IS_ERR(root)) { 667 if (IS_ERR(root)) {
861 err = PTR_ERR(root); 668 err = PTR_ERR(root);
862 goto out; 669 goto out;
863 } 670 }
864 if (client->sb->s_root) 671 if (fsc->sb->s_root) {
865 dput(root); 672 dput(root);
866 else 673 } else {
867 client->sb->s_root = root; 674 fsc->sb->s_root = root;
675 first = 1;
676
677 err = ceph_fs_debugfs_init(fsc);
678 if (err < 0)
679 goto fail;
680 }
868 681
869 if (path[0] == 0) { 682 if (path[0] == 0) {
870 dget(root); 683 dget(root);
871 } else { 684 } else {
872 dout("mount opening base mountpoint\n"); 685 dout("mount opening base mountpoint\n");
873 root = open_root_dentry(client, path, started); 686 root = open_root_dentry(fsc, path, started);
874 if (IS_ERR(root)) { 687 if (IS_ERR(root)) {
875 err = PTR_ERR(root); 688 err = PTR_ERR(root);
876 dput(client->sb->s_root); 689 goto fail;
877 client->sb->s_root = NULL;
878 goto out;
879 } 690 }
880 } 691 }
881 692
882 mnt->mnt_root = root; 693 fsc->mount_state = CEPH_MOUNT_MOUNTED;
883 mnt->mnt_sb = client->sb;
884
885 client->mount_state = CEPH_MOUNT_MOUNTED;
886 dout("mount success\n"); 694 dout("mount success\n");
887 err = 0; 695 mutex_unlock(&fsc->client->mount_mutex);
696 return root;
888 697
889out: 698out:
890 mutex_unlock(&client->mount_mutex); 699 mutex_unlock(&fsc->client->mount_mutex);
891 return err; 700 return ERR_PTR(err);
701
702fail:
703 if (first) {
704 dput(fsc->sb->s_root);
705 fsc->sb->s_root = NULL;
706 }
707 goto out;
892} 708}
893 709
894static int ceph_set_super(struct super_block *s, void *data) 710static int ceph_set_super(struct super_block *s, void *data)
895{ 711{
896 struct ceph_client *client = data; 712 struct ceph_fs_client *fsc = data;
897 int ret; 713 int ret;
898 714
899 dout("set_super %p data %p\n", s, data); 715 dout("set_super %p data %p\n", s, data);
900 716
901 s->s_flags = client->mount_args->sb_flags; 717 s->s_flags = fsc->mount_options->sb_flags;
902 s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ 718 s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */
903 719
904 s->s_fs_info = client; 720 s->s_fs_info = fsc;
905 client->sb = s; 721 fsc->sb = s;
906 722
907 s->s_op = &ceph_super_ops; 723 s->s_op = &ceph_super_ops;
908 s->s_export_op = &ceph_export_ops; 724 s->s_export_op = &ceph_export_ops;
@@ -917,7 +733,7 @@ static int ceph_set_super(struct super_block *s, void *data)
917 733
918fail: 734fail:
919 s->s_fs_info = NULL; 735 s->s_fs_info = NULL;
920 client->sb = NULL; 736 fsc->sb = NULL;
921 return ret; 737 return ret;
922} 738}
923 739
@@ -926,30 +742,23 @@ fail:
926 */ 742 */
927static int ceph_compare_super(struct super_block *sb, void *data) 743static int ceph_compare_super(struct super_block *sb, void *data)
928{ 744{
929 struct ceph_client *new = data; 745 struct ceph_fs_client *new = data;
930 struct ceph_mount_args *args = new->mount_args; 746 struct ceph_mount_options *fsopt = new->mount_options;
931 struct ceph_client *other = ceph_sb_to_client(sb); 747 struct ceph_options *opt = new->client->options;
932 int i; 748 struct ceph_fs_client *other = ceph_sb_to_client(sb);
933 749
934 dout("ceph_compare_super %p\n", sb); 750 dout("ceph_compare_super %p\n", sb);
935 if (args->flags & CEPH_OPT_FSID) { 751
936 if (ceph_fsid_compare(&args->fsid, &other->fsid)) { 752 if (compare_mount_options(fsopt, opt, other)) {
937 dout("fsid doesn't match\n"); 753 dout("monitor(s)/mount options don't match\n");
938 return 0; 754 return 0;
939 } 755 }
940 } else { 756 if ((opt->flags & CEPH_OPT_FSID) &&
941 /* do we share (a) monitor? */ 757 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
942 for (i = 0; i < new->monc.monmap->num_mon; i++) 758 dout("fsid doesn't match\n");
943 if (ceph_monmap_contains(other->monc.monmap, 759 return 0;
944 &new->monc.monmap->mon_inst[i].addr))
945 break;
946 if (i == new->monc.monmap->num_mon) {
947 dout("mon ip not part of monmap\n");
948 return 0;
949 }
950 dout("mon ip matches existing sb %p\n", sb);
951 } 760 }
952 if (args->sb_flags != other->mount_args->sb_flags) { 761 if (fsopt->sb_flags != other->mount_options->sb_flags) {
953 dout("flags differ\n"); 762 dout("flags differ\n");
954 return 0; 763 return 0;
955 } 764 }
@@ -961,98 +770,113 @@ static int ceph_compare_super(struct super_block *sb, void *data)
961 */ 770 */
962static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 771static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
963 772
964static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) 773static int ceph_register_bdi(struct super_block *sb,
774 struct ceph_fs_client *fsc)
965{ 775{
966 int err; 776 int err;
967 777
968 /* set ra_pages based on rsize mount option? */ 778 /* set ra_pages based on rsize mount option? */
969 if (client->mount_args->rsize >= PAGE_CACHE_SIZE) 779 if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
970 client->backing_dev_info.ra_pages = 780 fsc->backing_dev_info.ra_pages =
971 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) 781 (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
972 >> PAGE_SHIFT; 782 >> PAGE_SHIFT;
973 err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", 783 err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
974 atomic_long_inc_return(&bdi_seq)); 784 atomic_long_inc_return(&bdi_seq));
975 if (!err) 785 if (!err)
976 sb->s_bdi = &client->backing_dev_info; 786 sb->s_bdi = &fsc->backing_dev_info;
977 return err; 787 return err;
978} 788}
979 789
980static int ceph_get_sb(struct file_system_type *fs_type, 790static struct dentry *ceph_mount(struct file_system_type *fs_type,
981 int flags, const char *dev_name, void *data, 791 int flags, const char *dev_name, void *data)
982 struct vfsmount *mnt)
983{ 792{
984 struct super_block *sb; 793 struct super_block *sb;
985 struct ceph_client *client; 794 struct ceph_fs_client *fsc;
795 struct dentry *res;
986 int err; 796 int err;
987 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 797 int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
988 const char *path = NULL; 798 const char *path = NULL;
989 struct ceph_mount_args *args; 799 struct ceph_mount_options *fsopt = NULL;
800 struct ceph_options *opt = NULL;
990 801
991 dout("ceph_get_sb\n"); 802 dout("ceph_mount\n");
992 args = parse_mount_args(flags, data, dev_name, &path); 803 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
993 if (IS_ERR(args)) { 804 if (err < 0) {
994 err = PTR_ERR(args); 805 res = ERR_PTR(err);
995 goto out_final; 806 goto out_final;
996 } 807 }
997 808
998 /* create client (which we may/may not use) */ 809 /* create client (which we may/may not use) */
999 client = ceph_create_client(args); 810 fsc = create_fs_client(fsopt, opt);
1000 if (IS_ERR(client)) { 811 if (IS_ERR(fsc)) {
1001 err = PTR_ERR(client); 812 res = ERR_CAST(fsc);
813 kfree(fsopt);
814 kfree(opt);
1002 goto out_final; 815 goto out_final;
1003 } 816 }
1004 817
1005 if (client->mount_args->flags & CEPH_OPT_NOSHARE) 818 err = ceph_mdsc_init(fsc);
819 if (err < 0) {
820 res = ERR_PTR(err);
821 goto out;
822 }
823
824 if (ceph_test_opt(fsc->client, NOSHARE))
1006 compare_super = NULL; 825 compare_super = NULL;
1007 sb = sget(fs_type, compare_super, ceph_set_super, client); 826 sb = sget(fs_type, compare_super, ceph_set_super, fsc);
1008 if (IS_ERR(sb)) { 827 if (IS_ERR(sb)) {
1009 err = PTR_ERR(sb); 828 res = ERR_CAST(sb);
1010 goto out; 829 goto out;
1011 } 830 }
1012 831
1013 if (ceph_sb_to_client(sb) != client) { 832 if (ceph_sb_to_client(sb) != fsc) {
1014 ceph_destroy_client(client); 833 ceph_mdsc_destroy(fsc);
1015 client = ceph_sb_to_client(sb); 834 destroy_fs_client(fsc);
1016 dout("get_sb got existing client %p\n", client); 835 fsc = ceph_sb_to_client(sb);
836 dout("get_sb got existing client %p\n", fsc);
1017 } else { 837 } else {
1018 dout("get_sb using new client %p\n", client); 838 dout("get_sb using new client %p\n", fsc);
1019 err = ceph_register_bdi(sb, client); 839 err = ceph_register_bdi(sb, fsc);
1020 if (err < 0) 840 if (err < 0) {
841 res = ERR_PTR(err);
1021 goto out_splat; 842 goto out_splat;
843 }
1022 } 844 }
1023 845
1024 err = ceph_mount(client, mnt, path); 846 res = ceph_real_mount(fsc, path);
1025 if (err < 0) 847 if (IS_ERR(res))
1026 goto out_splat; 848 goto out_splat;
1027 dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, 849 dout("root %p inode %p ino %llx.%llx\n", res,
1028 mnt->mnt_root->d_inode, ceph_vinop(mnt->mnt_root->d_inode)); 850 res->d_inode, ceph_vinop(res->d_inode));
1029 return 0; 851 return res;
1030 852
1031out_splat: 853out_splat:
1032 ceph_mdsc_close_sessions(&client->mdsc); 854 ceph_mdsc_close_sessions(fsc->mdsc);
1033 deactivate_locked_super(sb); 855 deactivate_locked_super(sb);
1034 goto out_final; 856 goto out_final;
1035 857
1036out: 858out:
1037 ceph_destroy_client(client); 859 ceph_mdsc_destroy(fsc);
860 destroy_fs_client(fsc);
1038out_final: 861out_final:
1039 dout("ceph_get_sb fail %d\n", err); 862 dout("ceph_mount fail %ld\n", PTR_ERR(res));
1040 return err; 863 return res;
1041} 864}
1042 865
1043static void ceph_kill_sb(struct super_block *s) 866static void ceph_kill_sb(struct super_block *s)
1044{ 867{
1045 struct ceph_client *client = ceph_sb_to_client(s); 868 struct ceph_fs_client *fsc = ceph_sb_to_client(s);
1046 dout("kill_sb %p\n", s); 869 dout("kill_sb %p\n", s);
1047 ceph_mdsc_pre_umount(&client->mdsc); 870 ceph_mdsc_pre_umount(fsc->mdsc);
1048 kill_anon_super(s); /* will call put_super after sb is r/o */ 871 kill_anon_super(s); /* will call put_super after sb is r/o */
1049 ceph_destroy_client(client); 872 ceph_mdsc_destroy(fsc);
873 destroy_fs_client(fsc);
1050} 874}
1051 875
1052static struct file_system_type ceph_fs_type = { 876static struct file_system_type ceph_fs_type = {
1053 .owner = THIS_MODULE, 877 .owner = THIS_MODULE,
1054 .name = "ceph", 878 .name = "ceph",
1055 .get_sb = ceph_get_sb, 879 .mount = ceph_mount,
1056 .kill_sb = ceph_kill_sb, 880 .kill_sb = ceph_kill_sb,
1057 .fs_flags = FS_RENAME_DOES_D_MOVE, 881 .fs_flags = FS_RENAME_DOES_D_MOVE,
1058}; 882};
@@ -1062,36 +886,20 @@ static struct file_system_type ceph_fs_type = {
1062 886
1063static int __init init_ceph(void) 887static int __init init_ceph(void)
1064{ 888{
1065 int ret = 0; 889 int ret = init_caches();
1066
1067 ret = ceph_debugfs_init();
1068 if (ret < 0)
1069 goto out;
1070
1071 ret = ceph_msgr_init();
1072 if (ret < 0)
1073 goto out_debugfs;
1074
1075 ret = init_caches();
1076 if (ret) 890 if (ret)
1077 goto out_msgr; 891 goto out;
1078 892
1079 ret = register_filesystem(&ceph_fs_type); 893 ret = register_filesystem(&ceph_fs_type);
1080 if (ret) 894 if (ret)
1081 goto out_icache; 895 goto out_icache;
1082 896
1083 pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n", 897 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
1084 CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL, 898
1085 CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
1086 CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
1087 return 0; 899 return 0;
1088 900
1089out_icache: 901out_icache:
1090 destroy_caches(); 902 destroy_caches();
1091out_msgr:
1092 ceph_msgr_exit();
1093out_debugfs:
1094 ceph_debugfs_cleanup();
1095out: 903out:
1096 return ret; 904 return ret;
1097} 905}
@@ -1101,8 +909,6 @@ static void __exit exit_ceph(void)
1101 dout("exit_ceph\n"); 909 dout("exit_ceph\n");
1102 unregister_filesystem(&ceph_fs_type); 910 unregister_filesystem(&ceph_fs_type);
1103 destroy_caches(); 911 destroy_caches();
1104 ceph_msgr_exit();
1105 ceph_debugfs_cleanup();
1106} 912}
1107 913
1108module_init(init_ceph); 914module_init(init_ceph);