aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/super.c')
-rw-r--r--fs/ceph/super.c151
1 files changed, 98 insertions, 53 deletions
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f888cf487b7c..7c663d9b9f81 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -8,14 +8,11 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/mount.h> 9#include <linux/mount.h>
10#include <linux/parser.h> 10#include <linux/parser.h>
11#include <linux/rwsem.h>
12#include <linux/sched.h> 11#include <linux/sched.h>
13#include <linux/seq_file.h> 12#include <linux/seq_file.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
15#include <linux/statfs.h> 14#include <linux/statfs.h>
16#include <linux/string.h> 15#include <linux/string.h>
17#include <linux/version.h>
18#include <linux/vmalloc.h>
19 16
20#include "decode.h" 17#include "decode.h"
21#include "super.h" 18#include "super.h"
@@ -47,10 +44,20 @@ const char *ceph_file_part(const char *s, int len)
47 */ 44 */
48static void ceph_put_super(struct super_block *s) 45static void ceph_put_super(struct super_block *s)
49{ 46{
50 struct ceph_client *cl = ceph_client(s); 47 struct ceph_client *client = ceph_sb_to_client(s);
51 48
52 dout("put_super\n"); 49 dout("put_super\n");
53 ceph_mdsc_close_sessions(&cl->mdsc); 50 ceph_mdsc_close_sessions(&client->mdsc);
51
52 /*
53 * ensure we release the bdi before put_anon_super releases
54 * the device name.
55 */
56 if (s->s_bdi == &client->backing_dev_info) {
57 bdi_unregister(&client->backing_dev_info);
58 s->s_bdi = NULL;
59 }
60
54 return; 61 return;
55} 62}
56 63
@@ -97,12 +104,40 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
97static int ceph_syncfs(struct super_block *sb, int wait) 104static int ceph_syncfs(struct super_block *sb, int wait)
98{ 105{
99 dout("sync_fs %d\n", wait); 106 dout("sync_fs %d\n", wait);
100 ceph_osdc_sync(&ceph_client(sb)->osdc); 107 ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc);
101 ceph_mdsc_sync(&ceph_client(sb)->mdsc); 108 ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc);
102 dout("sync_fs %d done\n", wait); 109 dout("sync_fs %d done\n", wait);
103 return 0; 110 return 0;
104} 111}
105 112
113static int default_congestion_kb(void)
114{
115 int congestion_kb;
116
117 /*
118 * Copied from NFS
119 *
120 * congestion size, scale with available memory.
121 *
122 * 64MB: 8192k
123 * 128MB: 11585k
124 * 256MB: 16384k
125 * 512MB: 23170k
126 * 1GB: 32768k
127 * 2GB: 46340k
128 * 4GB: 65536k
129 * 8GB: 92681k
130 * 16GB: 131072k
131 *
132 * This allows larger machines to have larger/more transfers.
133 * Limit the default to 256M
134 */
135 congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
136 if (congestion_kb > 256*1024)
137 congestion_kb = 256*1024;
138
139 return congestion_kb;
140}
106 141
107/** 142/**
108 * ceph_show_options - Show mount options in /proc/mounts 143 * ceph_show_options - Show mount options in /proc/mounts
@@ -128,6 +163,35 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
128 seq_puts(m, ",nocrc"); 163 seq_puts(m, ",nocrc");
129 if (args->flags & CEPH_OPT_NOASYNCREADDIR) 164 if (args->flags & CEPH_OPT_NOASYNCREADDIR)
130 seq_puts(m, ",noasyncreaddir"); 165 seq_puts(m, ",noasyncreaddir");
166
167 if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
168 seq_printf(m, ",mount_timeout=%d", args->mount_timeout);
169 if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
170 seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl);
171 if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
172 seq_printf(m, ",osdtimeout=%d", args->osd_timeout);
173 if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
174 seq_printf(m, ",osdkeepalivetimeout=%d",
175 args->osd_keepalive_timeout);
176 if (args->wsize)
177 seq_printf(m, ",wsize=%d", args->wsize);
178 if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
179 seq_printf(m, ",rsize=%d", args->rsize);
180 if (args->congestion_kb != default_congestion_kb())
181 seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb);
182 if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
183 seq_printf(m, ",caps_wanted_delay_min=%d",
184 args->caps_wanted_delay_min);
185 if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
186 seq_printf(m, ",caps_wanted_delay_max=%d",
187 args->caps_wanted_delay_max);
188 if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
189 seq_printf(m, ",cap_release_safety=%d",
190 args->cap_release_safety);
191 if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT)
192 seq_printf(m, ",readdir_max_entries=%d", args->max_readdir);
193 if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
194 seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes);
131 if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 195 if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
132 seq_printf(m, ",snapdirname=%s", args->snapdir_name); 196 seq_printf(m, ",snapdirname=%s", args->snapdir_name);
133 if (args->name) 197 if (args->name)
@@ -151,35 +215,6 @@ static void ceph_inode_init_once(void *foo)
151 inode_init_once(&ci->vfs_inode); 215 inode_init_once(&ci->vfs_inode);
152} 216}
153 217
154static int default_congestion_kb(void)
155{
156 int congestion_kb;
157
158 /*
159 * Copied from NFS
160 *
161 * congestion size, scale with available memory.
162 *
163 * 64MB: 8192k
164 * 128MB: 11585k
165 * 256MB: 16384k
166 * 512MB: 23170k
167 * 1GB: 32768k
168 * 2GB: 46340k
169 * 4GB: 65536k
170 * 8GB: 92681k
171 * 16GB: 131072k
172 *
173 * This allows larger machines to have larger/more transfers.
174 * Limit the default to 256M
175 */
176 congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
177 if (congestion_kb > 256*1024)
178 congestion_kb = 256*1024;
179
180 return congestion_kb;
181}
182
183static int __init init_caches(void) 218static int __init init_caches(void)
184{ 219{
185 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 220 ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
@@ -298,7 +333,9 @@ enum {
298 Opt_osd_idle_ttl, 333 Opt_osd_idle_ttl,
299 Opt_caps_wanted_delay_min, 334 Opt_caps_wanted_delay_min,
300 Opt_caps_wanted_delay_max, 335 Opt_caps_wanted_delay_max,
336 Opt_cap_release_safety,
301 Opt_readdir_max_entries, 337 Opt_readdir_max_entries,
338 Opt_readdir_max_bytes,
302 Opt_congestion_kb, 339 Opt_congestion_kb,
303 Opt_last_int, 340 Opt_last_int,
304 /* int args above */ 341 /* int args above */
@@ -329,7 +366,9 @@ static match_table_t arg_tokens = {
329 {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, 366 {Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
330 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 367 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
331 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 368 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
369 {Opt_cap_release_safety, "cap_release_safety=%d"},
332 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 370 {Opt_readdir_max_entries, "readdir_max_entries=%d"},
371 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
333 {Opt_congestion_kb, "write_congestion_kb=%d"}, 372 {Opt_congestion_kb, "write_congestion_kb=%d"},
334 /* int args above */ 373 /* int args above */
335 {Opt_snapdirname, "snapdirname=%s"}, 374 {Opt_snapdirname, "snapdirname=%s"},
@@ -378,8 +417,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
378 args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 417 args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
379 args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; 418 args->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
380 args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 419 args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
381 args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4; 420 args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
382 args->max_readdir = 1024; 421 args->max_readdir = CEPH_MAX_READDIR_DEFAULT;
422 args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
383 args->congestion_kb = default_congestion_kb(); 423 args->congestion_kb = default_congestion_kb();
384 424
385 /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ 425 /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
@@ -487,6 +527,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
487 case Opt_readdir_max_entries: 527 case Opt_readdir_max_entries:
488 args->max_readdir = intval; 528 args->max_readdir = intval;
489 break; 529 break;
530 case Opt_readdir_max_bytes:
531 args->max_readdir_bytes = intval;
532 break;
490 case Opt_congestion_kb: 533 case Opt_congestion_kb:
491 args->congestion_kb = intval; 534 args->congestion_kb = intval;
492 break; 535 break;
@@ -636,6 +679,8 @@ static void ceph_destroy_client(struct ceph_client *client)
636 destroy_workqueue(client->pg_inv_wq); 679 destroy_workqueue(client->pg_inv_wq);
637 destroy_workqueue(client->trunc_wq); 680 destroy_workqueue(client->trunc_wq);
638 681
682 bdi_destroy(&client->backing_dev_info);
683
639 if (client->msgr) 684 if (client->msgr)
640 ceph_messenger_destroy(client->msgr); 685 ceph_messenger_destroy(client->msgr);
641 mempool_destroy(client->wb_pagevec_pool); 686 mempool_destroy(client->wb_pagevec_pool);
@@ -670,9 +715,10 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
670/* 715/*
671 * true if we have the mon map (and have thus joined the cluster) 716 * true if we have the mon map (and have thus joined the cluster)
672 */ 717 */
673static int have_mon_map(struct ceph_client *client) 718static int have_mon_and_osd_map(struct ceph_client *client)
674{ 719{
675 return client->monc.monmap && client->monc.monmap->epoch; 720 return client->monc.monmap && client->monc.monmap->epoch &&
721 client->osdc.osdmap && client->osdc.osdmap->epoch;
676} 722}
677 723
678/* 724/*
@@ -750,7 +796,7 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
750 if (err < 0) 796 if (err < 0)
751 goto out; 797 goto out;
752 798
753 while (!have_mon_map(client)) { 799 while (!have_mon_and_osd_map(client)) {
754 err = -EIO; 800 err = -EIO;
755 if (timeout && time_after_eq(jiffies, started + timeout)) 801 if (timeout && time_after_eq(jiffies, started + timeout))
756 goto out; 802 goto out;
@@ -758,8 +804,8 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
758 /* wait */ 804 /* wait */
759 dout("mount waiting for mon_map\n"); 805 dout("mount waiting for mon_map\n");
760 err = wait_event_interruptible_timeout(client->auth_wq, 806 err = wait_event_interruptible_timeout(client->auth_wq,
761 have_mon_map(client) || (client->auth_err < 0), 807 have_mon_and_osd_map(client) || (client->auth_err < 0),
762 timeout); 808 timeout);
763 if (err == -EINTR || err == -ERESTARTSYS) 809 if (err == -EINTR || err == -ERESTARTSYS)
764 goto out; 810 goto out;
765 if (client->auth_err < 0) { 811 if (client->auth_err < 0) {
@@ -872,18 +918,21 @@ static int ceph_compare_super(struct super_block *sb, void *data)
872/* 918/*
873 * construct our own bdi so we can control readahead, etc. 919 * construct our own bdi so we can control readahead, etc.
874 */ 920 */
921static atomic_long_t bdi_seq = ATOMIC_INIT(0);
922
875static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) 923static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
876{ 924{
877 int err; 925 int err;
878 926
879 sb->s_bdi = &client->backing_dev_info;
880
881 /* set ra_pages based on rsize mount option? */ 927 /* set ra_pages based on rsize mount option? */
882 if (client->mount_args->rsize >= PAGE_CACHE_SIZE) 928 if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
883 client->backing_dev_info.ra_pages = 929 client->backing_dev_info.ra_pages =
884 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) 930 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
885 >> PAGE_SHIFT; 931 >> PAGE_SHIFT;
886 err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); 932 err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d",
933 atomic_long_inc_return(&bdi_seq));
934 if (!err)
935 sb->s_bdi = &client->backing_dev_info;
887 return err; 936 return err;
888} 937}
889 938
@@ -920,9 +969,9 @@ static int ceph_get_sb(struct file_system_type *fs_type,
920 goto out; 969 goto out;
921 } 970 }
922 971
923 if (ceph_client(sb) != client) { 972 if (ceph_sb_to_client(sb) != client) {
924 ceph_destroy_client(client); 973 ceph_destroy_client(client);
925 client = ceph_client(sb); 974 client = ceph_sb_to_client(sb);
926 dout("get_sb got existing client %p\n", client); 975 dout("get_sb got existing client %p\n", client);
927 } else { 976 } else {
928 dout("get_sb using new client %p\n", client); 977 dout("get_sb using new client %p\n", client);
@@ -940,8 +989,7 @@ static int ceph_get_sb(struct file_system_type *fs_type,
940 989
941out_splat: 990out_splat:
942 ceph_mdsc_close_sessions(&client->mdsc); 991 ceph_mdsc_close_sessions(&client->mdsc);
943 up_write(&sb->s_umount); 992 deactivate_locked_super(sb);
944 deactivate_super(sb);
945 goto out_final; 993 goto out_final;
946 994
947out: 995out:
@@ -957,9 +1005,6 @@ static void ceph_kill_sb(struct super_block *s)
957 dout("kill_sb %p\n", s); 1005 dout("kill_sb %p\n", s);
958 ceph_mdsc_pre_umount(&client->mdsc); 1006 ceph_mdsc_pre_umount(&client->mdsc);
959 kill_anon_super(s); /* will call put_super after sb is r/o */ 1007 kill_anon_super(s); /* will call put_super after sb is r/o */
960 if (s->s_bdi == &client->backing_dev_info)
961 bdi_unregister(&client->backing_dev_info);
962 bdi_destroy(&client->backing_dev_info);
963 ceph_destroy_client(client); 1008 ceph_destroy_client(client);
964} 1009}
965 1010