diff options
Diffstat (limited to 'fs/ceph/super.c')
-rw-r--r-- | fs/ceph/super.c | 125 |
1 files changed, 81 insertions, 44 deletions
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9307bbee6fbe..7c663d9b9f81 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -8,14 +8,11 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
10 | #include <linux/parser.h> | 10 | #include <linux/parser.h> |
11 | #include <linux/rwsem.h> | ||
12 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
13 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
14 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
15 | #include <linux/statfs.h> | 14 | #include <linux/statfs.h> |
16 | #include <linux/string.h> | 15 | #include <linux/string.h> |
17 | #include <linux/version.h> | ||
18 | #include <linux/vmalloc.h> | ||
19 | 16 | ||
20 | #include "decode.h" | 17 | #include "decode.h" |
21 | #include "super.h" | 18 | #include "super.h" |
@@ -107,12 +104,40 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
107 | static int ceph_syncfs(struct super_block *sb, int wait) | 104 | static int ceph_syncfs(struct super_block *sb, int wait) |
108 | { | 105 | { |
109 | dout("sync_fs %d\n", wait); | 106 | dout("sync_fs %d\n", wait); |
110 | ceph_osdc_sync(&ceph_client(sb)->osdc); | 107 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); |
111 | ceph_mdsc_sync(&ceph_client(sb)->mdsc); | 108 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); |
112 | dout("sync_fs %d done\n", wait); | 109 | dout("sync_fs %d done\n", wait); |
113 | return 0; | 110 | return 0; |
114 | } | 111 | } |
115 | 112 | ||
113 | static int default_congestion_kb(void) | ||
114 | { | ||
115 | int congestion_kb; | ||
116 | |||
117 | /* | ||
118 | * Copied from NFS | ||
119 | * | ||
120 | * congestion size, scale with available memory. | ||
121 | * | ||
122 | * 64MB: 8192k | ||
123 | * 128MB: 11585k | ||
124 | * 256MB: 16384k | ||
125 | * 512MB: 23170k | ||
126 | * 1GB: 32768k | ||
127 | * 2GB: 46340k | ||
128 | * 4GB: 65536k | ||
129 | * 8GB: 92681k | ||
130 | * 16GB: 131072k | ||
131 | * | ||
132 | * This allows larger machines to have larger/more transfers. | ||
133 | * Limit the default to 256M | ||
134 | */ | ||
135 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
136 | if (congestion_kb > 256*1024) | ||
137 | congestion_kb = 256*1024; | ||
138 | |||
139 | return congestion_kb; | ||
140 | } | ||
116 | 141 | ||
117 | /** | 142 | /** |
118 | * ceph_show_options - Show mount options in /proc/mounts | 143 | * ceph_show_options - Show mount options in /proc/mounts |
@@ -138,6 +163,35 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
138 | seq_puts(m, ",nocrc"); | 163 | seq_puts(m, ",nocrc"); |
139 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) | 164 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) |
140 | seq_puts(m, ",noasyncreaddir"); | 165 | seq_puts(m, ",noasyncreaddir"); |
166 | |||
167 | if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
168 | seq_printf(m, ",mount_timeout=%d", args->mount_timeout); | ||
169 | if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
170 | seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); | ||
171 | if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
172 | seq_printf(m, ",osdtimeout=%d", args->osd_timeout); | ||
173 | if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
174 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
175 | args->osd_keepalive_timeout); | ||
176 | if (args->wsize) | ||
177 | seq_printf(m, ",wsize=%d", args->wsize); | ||
178 | if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
179 | seq_printf(m, ",rsize=%d", args->rsize); | ||
180 | if (args->congestion_kb != default_congestion_kb()) | ||
181 | seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); | ||
182 | if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
183 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
184 | args->caps_wanted_delay_min); | ||
185 | if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
186 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
187 | args->caps_wanted_delay_max); | ||
188 | if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
189 | seq_printf(m, ",cap_release_safety=%d", | ||
190 | args->cap_release_safety); | ||
191 | if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
192 | seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); | ||
193 | if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
194 | seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); | ||
141 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | 195 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) |
142 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); | 196 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); |
143 | if (args->name) | 197 | if (args->name) |
@@ -161,35 +215,6 @@ static void ceph_inode_init_once(void *foo) | |||
161 | inode_init_once(&ci->vfs_inode); | 215 | inode_init_once(&ci->vfs_inode); |
162 | } | 216 | } |
163 | 217 | ||
164 | static int default_congestion_kb(void) | ||
165 | { | ||
166 | int congestion_kb; | ||
167 | |||
168 | /* | ||
169 | * Copied from NFS | ||
170 | * | ||
171 | * congestion size, scale with available memory. | ||
172 | * | ||
173 | * 64MB: 8192k | ||
174 | * 128MB: 11585k | ||
175 | * 256MB: 16384k | ||
176 | * 512MB: 23170k | ||
177 | * 1GB: 32768k | ||
178 | * 2GB: 46340k | ||
179 | * 4GB: 65536k | ||
180 | * 8GB: 92681k | ||
181 | * 16GB: 131072k | ||
182 | * | ||
183 | * This allows larger machines to have larger/more transfers. | ||
184 | * Limit the default to 256M | ||
185 | */ | ||
186 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
187 | if (congestion_kb > 256*1024) | ||
188 | congestion_kb = 256*1024; | ||
189 | |||
190 | return congestion_kb; | ||
191 | } | ||
192 | |||
193 | static int __init init_caches(void) | 218 | static int __init init_caches(void) |
194 | { | 219 | { |
195 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", | 220 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
@@ -308,7 +333,9 @@ enum { | |||
308 | Opt_osd_idle_ttl, | 333 | Opt_osd_idle_ttl, |
309 | Opt_caps_wanted_delay_min, | 334 | Opt_caps_wanted_delay_min, |
310 | Opt_caps_wanted_delay_max, | 335 | Opt_caps_wanted_delay_max, |
336 | Opt_cap_release_safety, | ||
311 | Opt_readdir_max_entries, | 337 | Opt_readdir_max_entries, |
338 | Opt_readdir_max_bytes, | ||
312 | Opt_congestion_kb, | 339 | Opt_congestion_kb, |
313 | Opt_last_int, | 340 | Opt_last_int, |
314 | /* int args above */ | 341 | /* int args above */ |
@@ -339,7 +366,9 @@ static match_table_t arg_tokens = { | |||
339 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | 366 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, |
340 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, | 367 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, |
341 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, | 368 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, |
369 | {Opt_cap_release_safety, "cap_release_safety=%d"}, | ||
342 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, | 370 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, |
371 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | ||
343 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 372 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
344 | /* int args above */ | 373 | /* int args above */ |
345 | {Opt_snapdirname, "snapdirname=%s"}, | 374 | {Opt_snapdirname, "snapdirname=%s"}, |
@@ -388,8 +417,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
388 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | 417 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; |
389 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | 418 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; |
390 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | 419 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); |
391 | args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4; | 420 | args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; |
392 | args->max_readdir = 1024; | 421 | args->max_readdir = CEPH_MAX_READDIR_DEFAULT; |
422 | args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
393 | args->congestion_kb = default_congestion_kb(); | 423 | args->congestion_kb = default_congestion_kb(); |
394 | 424 | ||
395 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | 425 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ |
@@ -497,6 +527,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
497 | case Opt_readdir_max_entries: | 527 | case Opt_readdir_max_entries: |
498 | args->max_readdir = intval; | 528 | args->max_readdir = intval; |
499 | break; | 529 | break; |
530 | case Opt_readdir_max_bytes: | ||
531 | args->max_readdir_bytes = intval; | ||
532 | break; | ||
500 | case Opt_congestion_kb: | 533 | case Opt_congestion_kb: |
501 | args->congestion_kb = intval; | 534 | args->congestion_kb = intval; |
502 | break; | 535 | break; |
@@ -682,9 +715,10 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | |||
682 | /* | 715 | /* |
683 | * true if we have the mon map (and have thus joined the cluster) | 716 | * true if we have the mon map (and have thus joined the cluster) |
684 | */ | 717 | */ |
685 | static int have_mon_map(struct ceph_client *client) | 718 | static int have_mon_and_osd_map(struct ceph_client *client) |
686 | { | 719 | { |
687 | return client->monc.monmap && client->monc.monmap->epoch; | 720 | return client->monc.monmap && client->monc.monmap->epoch && |
721 | client->osdc.osdmap && client->osdc.osdmap->epoch; | ||
688 | } | 722 | } |
689 | 723 | ||
690 | /* | 724 | /* |
@@ -762,7 +796,7 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
762 | if (err < 0) | 796 | if (err < 0) |
763 | goto out; | 797 | goto out; |
764 | 798 | ||
765 | while (!have_mon_map(client)) { | 799 | while (!have_mon_and_osd_map(client)) { |
766 | err = -EIO; | 800 | err = -EIO; |
767 | if (timeout && time_after_eq(jiffies, started + timeout)) | 801 | if (timeout && time_after_eq(jiffies, started + timeout)) |
768 | goto out; | 802 | goto out; |
@@ -770,8 +804,8 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
770 | /* wait */ | 804 | /* wait */ |
771 | dout("mount waiting for mon_map\n"); | 805 | dout("mount waiting for mon_map\n"); |
772 | err = wait_event_interruptible_timeout(client->auth_wq, | 806 | err = wait_event_interruptible_timeout(client->auth_wq, |
773 | have_mon_map(client) || (client->auth_err < 0), | 807 | have_mon_and_osd_map(client) || (client->auth_err < 0), |
774 | timeout); | 808 | timeout); |
775 | if (err == -EINTR || err == -ERESTARTSYS) | 809 | if (err == -EINTR || err == -ERESTARTSYS) |
776 | goto out; | 810 | goto out; |
777 | if (client->auth_err < 0) { | 811 | if (client->auth_err < 0) { |
@@ -884,6 +918,8 @@ static int ceph_compare_super(struct super_block *sb, void *data) | |||
884 | /* | 918 | /* |
885 | * construct our own bdi so we can control readahead, etc. | 919 | * construct our own bdi so we can control readahead, etc. |
886 | */ | 920 | */ |
921 | static atomic_long_t bdi_seq = ATOMIC_INIT(0); | ||
922 | |||
887 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | 923 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) |
888 | { | 924 | { |
889 | int err; | 925 | int err; |
@@ -893,7 +929,8 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | |||
893 | client->backing_dev_info.ra_pages = | 929 | client->backing_dev_info.ra_pages = |
894 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 930 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) |
895 | >> PAGE_SHIFT; | 931 | >> PAGE_SHIFT; |
896 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); | 932 | err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", |
933 | atomic_long_inc_return(&bdi_seq)); | ||
897 | if (!err) | 934 | if (!err) |
898 | sb->s_bdi = &client->backing_dev_info; | 935 | sb->s_bdi = &client->backing_dev_info; |
899 | return err; | 936 | return err; |
@@ -932,9 +969,9 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
932 | goto out; | 969 | goto out; |
933 | } | 970 | } |
934 | 971 | ||
935 | if (ceph_client(sb) != client) { | 972 | if (ceph_sb_to_client(sb) != client) { |
936 | ceph_destroy_client(client); | 973 | ceph_destroy_client(client); |
937 | client = ceph_client(sb); | 974 | client = ceph_sb_to_client(sb); |
938 | dout("get_sb got existing client %p\n", client); | 975 | dout("get_sb got existing client %p\n", client); |
939 | } else { | 976 | } else { |
940 | dout("get_sb using new client %p\n", client); | 977 | dout("get_sb using new client %p\n", client); |