diff options
Diffstat (limited to 'fs/ceph/super.c')
-rw-r--r-- | fs/ceph/super.c | 151 |
1 files changed, 98 insertions, 53 deletions
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f888cf487b7c..7c663d9b9f81 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -8,14 +8,11 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
10 | #include <linux/parser.h> | 10 | #include <linux/parser.h> |
11 | #include <linux/rwsem.h> | ||
12 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
13 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
14 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
15 | #include <linux/statfs.h> | 14 | #include <linux/statfs.h> |
16 | #include <linux/string.h> | 15 | #include <linux/string.h> |
17 | #include <linux/version.h> | ||
18 | #include <linux/vmalloc.h> | ||
19 | 16 | ||
20 | #include "decode.h" | 17 | #include "decode.h" |
21 | #include "super.h" | 18 | #include "super.h" |
@@ -47,10 +44,20 @@ const char *ceph_file_part(const char *s, int len) | |||
47 | */ | 44 | */ |
48 | static void ceph_put_super(struct super_block *s) | 45 | static void ceph_put_super(struct super_block *s) |
49 | { | 46 | { |
50 | struct ceph_client *cl = ceph_client(s); | 47 | struct ceph_client *client = ceph_sb_to_client(s); |
51 | 48 | ||
52 | dout("put_super\n"); | 49 | dout("put_super\n"); |
53 | ceph_mdsc_close_sessions(&cl->mdsc); | 50 | ceph_mdsc_close_sessions(&client->mdsc); |
51 | |||
52 | /* | ||
53 | * ensure we release the bdi before put_anon_super releases | ||
54 | * the device name. | ||
55 | */ | ||
56 | if (s->s_bdi == &client->backing_dev_info) { | ||
57 | bdi_unregister(&client->backing_dev_info); | ||
58 | s->s_bdi = NULL; | ||
59 | } | ||
60 | |||
54 | return; | 61 | return; |
55 | } | 62 | } |
56 | 63 | ||
@@ -97,12 +104,40 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
97 | static int ceph_syncfs(struct super_block *sb, int wait) | 104 | static int ceph_syncfs(struct super_block *sb, int wait) |
98 | { | 105 | { |
99 | dout("sync_fs %d\n", wait); | 106 | dout("sync_fs %d\n", wait); |
100 | ceph_osdc_sync(&ceph_client(sb)->osdc); | 107 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); |
101 | ceph_mdsc_sync(&ceph_client(sb)->mdsc); | 108 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); |
102 | dout("sync_fs %d done\n", wait); | 109 | dout("sync_fs %d done\n", wait); |
103 | return 0; | 110 | return 0; |
104 | } | 111 | } |
105 | 112 | ||
113 | static int default_congestion_kb(void) | ||
114 | { | ||
115 | int congestion_kb; | ||
116 | |||
117 | /* | ||
118 | * Copied from NFS | ||
119 | * | ||
120 | * congestion size, scale with available memory. | ||
121 | * | ||
122 | * 64MB: 8192k | ||
123 | * 128MB: 11585k | ||
124 | * 256MB: 16384k | ||
125 | * 512MB: 23170k | ||
126 | * 1GB: 32768k | ||
127 | * 2GB: 46340k | ||
128 | * 4GB: 65536k | ||
129 | * 8GB: 92681k | ||
130 | * 16GB: 131072k | ||
131 | * | ||
132 | * This allows larger machines to have larger/more transfers. | ||
133 | * Limit the default to 256M | ||
134 | */ | ||
135 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
136 | if (congestion_kb > 256*1024) | ||
137 | congestion_kb = 256*1024; | ||
138 | |||
139 | return congestion_kb; | ||
140 | } | ||
106 | 141 | ||
107 | /** | 142 | /** |
108 | * ceph_show_options - Show mount options in /proc/mounts | 143 | * ceph_show_options - Show mount options in /proc/mounts |
@@ -128,6 +163,35 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
128 | seq_puts(m, ",nocrc"); | 163 | seq_puts(m, ",nocrc"); |
129 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) | 164 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) |
130 | seq_puts(m, ",noasyncreaddir"); | 165 | seq_puts(m, ",noasyncreaddir"); |
166 | |||
167 | if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
168 | seq_printf(m, ",mount_timeout=%d", args->mount_timeout); | ||
169 | if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
170 | seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); | ||
171 | if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
172 | seq_printf(m, ",osdtimeout=%d", args->osd_timeout); | ||
173 | if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
174 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
175 | args->osd_keepalive_timeout); | ||
176 | if (args->wsize) | ||
177 | seq_printf(m, ",wsize=%d", args->wsize); | ||
178 | if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
179 | seq_printf(m, ",rsize=%d", args->rsize); | ||
180 | if (args->congestion_kb != default_congestion_kb()) | ||
181 | seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); | ||
182 | if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
183 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
184 | args->caps_wanted_delay_min); | ||
185 | if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
186 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
187 | args->caps_wanted_delay_max); | ||
188 | if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
189 | seq_printf(m, ",cap_release_safety=%d", | ||
190 | args->cap_release_safety); | ||
191 | if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
192 | seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); | ||
193 | if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
194 | seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); | ||
131 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | 195 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) |
132 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); | 196 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); |
133 | if (args->name) | 197 | if (args->name) |
@@ -151,35 +215,6 @@ static void ceph_inode_init_once(void *foo) | |||
151 | inode_init_once(&ci->vfs_inode); | 215 | inode_init_once(&ci->vfs_inode); |
152 | } | 216 | } |
153 | 217 | ||
154 | static int default_congestion_kb(void) | ||
155 | { | ||
156 | int congestion_kb; | ||
157 | |||
158 | /* | ||
159 | * Copied from NFS | ||
160 | * | ||
161 | * congestion size, scale with available memory. | ||
162 | * | ||
163 | * 64MB: 8192k | ||
164 | * 128MB: 11585k | ||
165 | * 256MB: 16384k | ||
166 | * 512MB: 23170k | ||
167 | * 1GB: 32768k | ||
168 | * 2GB: 46340k | ||
169 | * 4GB: 65536k | ||
170 | * 8GB: 92681k | ||
171 | * 16GB: 131072k | ||
172 | * | ||
173 | * This allows larger machines to have larger/more transfers. | ||
174 | * Limit the default to 256M | ||
175 | */ | ||
176 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
177 | if (congestion_kb > 256*1024) | ||
178 | congestion_kb = 256*1024; | ||
179 | |||
180 | return congestion_kb; | ||
181 | } | ||
182 | |||
183 | static int __init init_caches(void) | 218 | static int __init init_caches(void) |
184 | { | 219 | { |
185 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", | 220 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
@@ -298,7 +333,9 @@ enum { | |||
298 | Opt_osd_idle_ttl, | 333 | Opt_osd_idle_ttl, |
299 | Opt_caps_wanted_delay_min, | 334 | Opt_caps_wanted_delay_min, |
300 | Opt_caps_wanted_delay_max, | 335 | Opt_caps_wanted_delay_max, |
336 | Opt_cap_release_safety, | ||
301 | Opt_readdir_max_entries, | 337 | Opt_readdir_max_entries, |
338 | Opt_readdir_max_bytes, | ||
302 | Opt_congestion_kb, | 339 | Opt_congestion_kb, |
303 | Opt_last_int, | 340 | Opt_last_int, |
304 | /* int args above */ | 341 | /* int args above */ |
@@ -329,7 +366,9 @@ static match_table_t arg_tokens = { | |||
329 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | 366 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, |
330 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, | 367 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, |
331 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, | 368 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, |
369 | {Opt_cap_release_safety, "cap_release_safety=%d"}, | ||
332 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, | 370 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, |
371 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | ||
333 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 372 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
334 | /* int args above */ | 373 | /* int args above */ |
335 | {Opt_snapdirname, "snapdirname=%s"}, | 374 | {Opt_snapdirname, "snapdirname=%s"}, |
@@ -378,8 +417,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
378 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | 417 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; |
379 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | 418 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; |
380 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | 419 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); |
381 | args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4; | 420 | args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; |
382 | args->max_readdir = 1024; | 421 | args->max_readdir = CEPH_MAX_READDIR_DEFAULT; |
422 | args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
383 | args->congestion_kb = default_congestion_kb(); | 423 | args->congestion_kb = default_congestion_kb(); |
384 | 424 | ||
385 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | 425 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ |
@@ -487,6 +527,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
487 | case Opt_readdir_max_entries: | 527 | case Opt_readdir_max_entries: |
488 | args->max_readdir = intval; | 528 | args->max_readdir = intval; |
489 | break; | 529 | break; |
530 | case Opt_readdir_max_bytes: | ||
531 | args->max_readdir_bytes = intval; | ||
532 | break; | ||
490 | case Opt_congestion_kb: | 533 | case Opt_congestion_kb: |
491 | args->congestion_kb = intval; | 534 | args->congestion_kb = intval; |
492 | break; | 535 | break; |
@@ -636,6 +679,8 @@ static void ceph_destroy_client(struct ceph_client *client) | |||
636 | destroy_workqueue(client->pg_inv_wq); | 679 | destroy_workqueue(client->pg_inv_wq); |
637 | destroy_workqueue(client->trunc_wq); | 680 | destroy_workqueue(client->trunc_wq); |
638 | 681 | ||
682 | bdi_destroy(&client->backing_dev_info); | ||
683 | |||
639 | if (client->msgr) | 684 | if (client->msgr) |
640 | ceph_messenger_destroy(client->msgr); | 685 | ceph_messenger_destroy(client->msgr); |
641 | mempool_destroy(client->wb_pagevec_pool); | 686 | mempool_destroy(client->wb_pagevec_pool); |
@@ -670,9 +715,10 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | |||
670 | /* | 715 | /* |
671 | * true if we have the mon map (and have thus joined the cluster) | 716 | * true if we have the mon map (and have thus joined the cluster) |
672 | */ | 717 | */ |
673 | static int have_mon_map(struct ceph_client *client) | 718 | static int have_mon_and_osd_map(struct ceph_client *client) |
674 | { | 719 | { |
675 | return client->monc.monmap && client->monc.monmap->epoch; | 720 | return client->monc.monmap && client->monc.monmap->epoch && |
721 | client->osdc.osdmap && client->osdc.osdmap->epoch; | ||
676 | } | 722 | } |
677 | 723 | ||
678 | /* | 724 | /* |
@@ -750,7 +796,7 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
750 | if (err < 0) | 796 | if (err < 0) |
751 | goto out; | 797 | goto out; |
752 | 798 | ||
753 | while (!have_mon_map(client)) { | 799 | while (!have_mon_and_osd_map(client)) { |
754 | err = -EIO; | 800 | err = -EIO; |
755 | if (timeout && time_after_eq(jiffies, started + timeout)) | 801 | if (timeout && time_after_eq(jiffies, started + timeout)) |
756 | goto out; | 802 | goto out; |
@@ -758,8 +804,8 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
758 | /* wait */ | 804 | /* wait */ |
759 | dout("mount waiting for mon_map\n"); | 805 | dout("mount waiting for mon_map\n"); |
760 | err = wait_event_interruptible_timeout(client->auth_wq, | 806 | err = wait_event_interruptible_timeout(client->auth_wq, |
761 | have_mon_map(client) || (client->auth_err < 0), | 807 | have_mon_and_osd_map(client) || (client->auth_err < 0), |
762 | timeout); | 808 | timeout); |
763 | if (err == -EINTR || err == -ERESTARTSYS) | 809 | if (err == -EINTR || err == -ERESTARTSYS) |
764 | goto out; | 810 | goto out; |
765 | if (client->auth_err < 0) { | 811 | if (client->auth_err < 0) { |
@@ -872,18 +918,21 @@ static int ceph_compare_super(struct super_block *sb, void *data) | |||
872 | /* | 918 | /* |
873 | * construct our own bdi so we can control readahead, etc. | 919 | * construct our own bdi so we can control readahead, etc. |
874 | */ | 920 | */ |
921 | static atomic_long_t bdi_seq = ATOMIC_INIT(0); | ||
922 | |||
875 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | 923 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) |
876 | { | 924 | { |
877 | int err; | 925 | int err; |
878 | 926 | ||
879 | sb->s_bdi = &client->backing_dev_info; | ||
880 | |||
881 | /* set ra_pages based on rsize mount option? */ | 927 | /* set ra_pages based on rsize mount option? */ |
882 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) | 928 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) |
883 | client->backing_dev_info.ra_pages = | 929 | client->backing_dev_info.ra_pages = |
884 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 930 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) |
885 | >> PAGE_SHIFT; | 931 | >> PAGE_SHIFT; |
886 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); | 932 | err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", |
933 | atomic_long_inc_return(&bdi_seq)); | ||
934 | if (!err) | ||
935 | sb->s_bdi = &client->backing_dev_info; | ||
887 | return err; | 936 | return err; |
888 | } | 937 | } |
889 | 938 | ||
@@ -920,9 +969,9 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
920 | goto out; | 969 | goto out; |
921 | } | 970 | } |
922 | 971 | ||
923 | if (ceph_client(sb) != client) { | 972 | if (ceph_sb_to_client(sb) != client) { |
924 | ceph_destroy_client(client); | 973 | ceph_destroy_client(client); |
925 | client = ceph_client(sb); | 974 | client = ceph_sb_to_client(sb); |
926 | dout("get_sb got existing client %p\n", client); | 975 | dout("get_sb got existing client %p\n", client); |
927 | } else { | 976 | } else { |
928 | dout("get_sb using new client %p\n", client); | 977 | dout("get_sb using new client %p\n", client); |
@@ -940,8 +989,7 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
940 | 989 | ||
941 | out_splat: | 990 | out_splat: |
942 | ceph_mdsc_close_sessions(&client->mdsc); | 991 | ceph_mdsc_close_sessions(&client->mdsc); |
943 | up_write(&sb->s_umount); | 992 | deactivate_locked_super(sb); |
944 | deactivate_super(sb); | ||
945 | goto out_final; | 993 | goto out_final; |
946 | 994 | ||
947 | out: | 995 | out: |
@@ -957,9 +1005,6 @@ static void ceph_kill_sb(struct super_block *s) | |||
957 | dout("kill_sb %p\n", s); | 1005 | dout("kill_sb %p\n", s); |
958 | ceph_mdsc_pre_umount(&client->mdsc); | 1006 | ceph_mdsc_pre_umount(&client->mdsc); |
959 | kill_anon_super(s); /* will call put_super after sb is r/o */ | 1007 | kill_anon_super(s); /* will call put_super after sb is r/o */ |
960 | if (s->s_bdi == &client->backing_dev_info) | ||
961 | bdi_unregister(&client->backing_dev_info); | ||
962 | bdi_destroy(&client->backing_dev_info); | ||
963 | ceph_destroy_client(client); | 1008 | ceph_destroy_client(client); |
964 | } | 1009 | } |
965 | 1010 | ||