aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2019-02-01 01:57:15 -0500
committerIlya Dryomov <idryomov@gmail.com>2019-03-05 12:55:17 -0500
commitfe33032daae2e584d9e7e33bab44c9eafced1f8f (patch)
treea03be95fd94ffcd66c69e2146a1930837f769eb4 /fs/ceph
parent37c4efc1ddf98ba8b234d116d863a9464445901e (diff)
ceph: add mount option to limit caps count
If number of caps exceed the limit, ceph_trim_dentires() also trim dentries with valid leases. Trimming dentry releases references to associated inode, which may evict inode and release caps. By default, there is no limit for caps count. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Reviewed-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/caps.c33
-rw-r--r--fs/ceph/dir.c20
-rw-r--r--fs/ceph/mds_client.c34
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/ceph/super.c12
-rw-r--r--fs/ceph/super.h5
6 files changed, 86 insertions, 21 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6fbdc1a0afbe..36a8dc699448 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -148,11 +148,17 @@ void ceph_caps_finalize(struct ceph_mds_client *mdsc)
148 spin_unlock(&mdsc->caps_list_lock); 148 spin_unlock(&mdsc->caps_list_lock);
149} 149}
150 150
151void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) 151void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
152 struct ceph_mount_options *fsopt)
152{ 153{
153 spin_lock(&mdsc->caps_list_lock); 154 spin_lock(&mdsc->caps_list_lock);
154 mdsc->caps_min_count += delta; 155 mdsc->caps_min_count = fsopt->max_readdir;
155 BUG_ON(mdsc->caps_min_count < 0); 156 if (mdsc->caps_min_count < 1024)
157 mdsc->caps_min_count = 1024;
158 mdsc->caps_use_max = fsopt->caps_max;
159 if (mdsc->caps_use_max > 0 &&
160 mdsc->caps_use_max < mdsc->caps_min_count)
161 mdsc->caps_use_max = mdsc->caps_min_count;
156 spin_unlock(&mdsc->caps_list_lock); 162 spin_unlock(&mdsc->caps_list_lock);
157} 163}
158 164
@@ -272,6 +278,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
272 if (!err) { 278 if (!err) {
273 BUG_ON(have + alloc != need); 279 BUG_ON(have + alloc != need);
274 ctx->count = need; 280 ctx->count = need;
281 ctx->used = 0;
275 } 282 }
276 283
277 spin_lock(&mdsc->caps_list_lock); 284 spin_lock(&mdsc->caps_list_lock);
@@ -295,13 +302,24 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
295} 302}
296 303
297void ceph_unreserve_caps(struct ceph_mds_client *mdsc, 304void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
298 struct ceph_cap_reservation *ctx) 305 struct ceph_cap_reservation *ctx)
299{ 306{
307 bool reclaim = false;
308 if (!ctx->count)
309 return;
310
300 dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); 311 dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
301 spin_lock(&mdsc->caps_list_lock); 312 spin_lock(&mdsc->caps_list_lock);
302 __ceph_unreserve_caps(mdsc, ctx->count); 313 __ceph_unreserve_caps(mdsc, ctx->count);
303 ctx->count = 0; 314 ctx->count = 0;
315
316 if (mdsc->caps_use_max > 0 &&
317 mdsc->caps_use_count > mdsc->caps_use_max)
318 reclaim = true;
304 spin_unlock(&mdsc->caps_list_lock); 319 spin_unlock(&mdsc->caps_list_lock);
320
321 if (reclaim)
322 ceph_reclaim_caps_nr(mdsc, ctx->used);
305} 323}
306 324
307struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, 325struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
@@ -346,6 +364,7 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
346 BUG_ON(list_empty(&mdsc->caps_list)); 364 BUG_ON(list_empty(&mdsc->caps_list));
347 365
348 ctx->count--; 366 ctx->count--;
367 ctx->used++;
349 mdsc->caps_reserve_count--; 368 mdsc->caps_reserve_count--;
350 mdsc->caps_use_count++; 369 mdsc->caps_use_count++;
351 370
@@ -500,12 +519,12 @@ static void __insert_cap_node(struct ceph_inode_info *ci,
500static void __cap_set_timeouts(struct ceph_mds_client *mdsc, 519static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
501 struct ceph_inode_info *ci) 520 struct ceph_inode_info *ci)
502{ 521{
503 struct ceph_mount_options *ma = mdsc->fsc->mount_options; 522 struct ceph_mount_options *opt = mdsc->fsc->mount_options;
504 523
505 ci->i_hold_caps_min = round_jiffies(jiffies + 524 ci->i_hold_caps_min = round_jiffies(jiffies +
506 ma->caps_wanted_delay_min * HZ); 525 opt->caps_wanted_delay_min * HZ);
507 ci->i_hold_caps_max = round_jiffies(jiffies + 526 ci->i_hold_caps_max = round_jiffies(jiffies +
508 ma->caps_wanted_delay_max * HZ); 527 opt->caps_wanted_delay_max * HZ);
509 dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode, 528 dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode,
510 ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies); 529 ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies);
511} 530}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index eba283557653..a8f429882249 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1224,6 +1224,7 @@ enum {
1224 1224
1225struct ceph_lease_walk_control { 1225struct ceph_lease_walk_control {
1226 bool dir_lease; 1226 bool dir_lease;
1227 bool expire_dir_lease;
1227 unsigned long nr_to_scan; 1228 unsigned long nr_to_scan;
1228 unsigned long dir_lease_ttl; 1229 unsigned long dir_lease_ttl;
1229}; 1230};
@@ -1345,7 +1346,13 @@ static int __dir_lease_check(struct dentry *dentry, void *arg)
1345 /* Move dentry to tail of dir lease list if we don't want 1346 /* Move dentry to tail of dir lease list if we don't want
1346 * to delete it. So dentries in the list are checked in a 1347 * to delete it. So dentries in the list are checked in a
1347 * round robin manner */ 1348 * round robin manner */
1348 return TOUCH; 1349 if (!lwc->expire_dir_lease)
1350 return TOUCH;
1351 if (dentry->d_lockref.count > 0 ||
1352 (di->flags & CEPH_DENTRY_REFERENCED))
1353 return TOUCH;
1354 /* invalidate dir lease */
1355 di->lease_shared_gen = 0;
1349 } 1356 }
1350 return DELETE; 1357 return DELETE;
1351} 1358}
@@ -1353,8 +1360,17 @@ static int __dir_lease_check(struct dentry *dentry, void *arg)
1353int ceph_trim_dentries(struct ceph_mds_client *mdsc) 1360int ceph_trim_dentries(struct ceph_mds_client *mdsc)
1354{ 1361{
1355 struct ceph_lease_walk_control lwc; 1362 struct ceph_lease_walk_control lwc;
1363 unsigned long count;
1356 unsigned long freed; 1364 unsigned long freed;
1357 1365
1366 spin_lock(&mdsc->caps_list_lock);
1367 if (mdsc->caps_use_max > 0 &&
1368 mdsc->caps_use_count > mdsc->caps_use_max)
1369 count = mdsc->caps_use_count - mdsc->caps_use_max;
1370 else
1371 count = 0;
1372 spin_unlock(&mdsc->caps_list_lock);
1373
1358 lwc.dir_lease = false; 1374 lwc.dir_lease = false;
1359 lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2; 1375 lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2;
1360 freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check); 1376 freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check);
@@ -1365,6 +1381,8 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc)
1365 lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE; 1381 lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE;
1366 1382
1367 lwc.dir_lease = true; 1383 lwc.dir_lease = true;
1384 lwc.expire_dir_lease = freed < count;
1385 lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ;
1368 freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check); 1386 freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check);
1369 if (!lwc.nr_to_scan) /* more to check */ 1387 if (!lwc.nr_to_scan) /* more to check */
1370 return -EAGAIN; 1388 return -EAGAIN;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 2095e5d038f8..21c33ed048ed 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1965,6 +1965,18 @@ void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc)
1965 } 1965 }
1966} 1966}
1967 1967
1968void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
1969{
1970 int val;
1971 if (!nr)
1972 return;
1973 val = atomic_add_return(nr, &mdsc->cap_reclaim_pending);
1974 if (!(val % CEPH_CAPS_PER_RELEASE)) {
1975 atomic_set(&mdsc->cap_reclaim_pending, 0);
1976 ceph_queue_cap_reclaim_work(mdsc);
1977 }
1978}
1979
1968/* 1980/*
1969 * requests 1981 * requests
1970 */ 1982 */
@@ -2878,7 +2890,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2878 if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || 2890 if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
2879 req->r_op == CEPH_MDS_OP_LSSNAP)) 2891 req->r_op == CEPH_MDS_OP_LSSNAP))
2880 ceph_readdir_prepopulate(req, req->r_session); 2892 ceph_readdir_prepopulate(req, req->r_session);
2881 ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
2882 } 2893 }
2883 current->journal_info = NULL; 2894 current->journal_info = NULL;
2884 mutex_unlock(&req->r_fill_mutex); 2895 mutex_unlock(&req->r_fill_mutex);
@@ -2887,12 +2898,18 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2887 if (realm) 2898 if (realm)
2888 ceph_put_snap_realm(mdsc, realm); 2899 ceph_put_snap_realm(mdsc, realm);
2889 2900
2890 if (err == 0 && req->r_target_inode && 2901 if (err == 0) {
2891 test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { 2902 if (req->r_target_inode &&
2892 struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); 2903 test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
2893 spin_lock(&ci->i_unsafe_lock); 2904 struct ceph_inode_info *ci =
2894 list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops); 2905 ceph_inode(req->r_target_inode);
2895 spin_unlock(&ci->i_unsafe_lock); 2906 spin_lock(&ci->i_unsafe_lock);
2907 list_add_tail(&req->r_unsafe_target_item,
2908 &ci->i_unsafe_iops);
2909 spin_unlock(&ci->i_unsafe_lock);
2910 }
2911
2912 ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
2896 } 2913 }
2897out_err: 2914out_err:
2898 mutex_lock(&mdsc->mutex); 2915 mutex_lock(&mdsc->mutex);
@@ -4083,13 +4100,14 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
4083 spin_lock_init(&mdsc->cap_dirty_lock); 4100 spin_lock_init(&mdsc->cap_dirty_lock);
4084 init_waitqueue_head(&mdsc->cap_flushing_wq); 4101 init_waitqueue_head(&mdsc->cap_flushing_wq);
4085 INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work); 4102 INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
4103 atomic_set(&mdsc->cap_reclaim_pending, 0);
4086 4104
4087 spin_lock_init(&mdsc->dentry_list_lock); 4105 spin_lock_init(&mdsc->dentry_list_lock);
4088 INIT_LIST_HEAD(&mdsc->dentry_leases); 4106 INIT_LIST_HEAD(&mdsc->dentry_leases);
4089 INIT_LIST_HEAD(&mdsc->dentry_dir_leases); 4107 INIT_LIST_HEAD(&mdsc->dentry_dir_leases);
4090 4108
4091 ceph_caps_init(mdsc); 4109 ceph_caps_init(mdsc);
4092 ceph_adjust_min_caps(mdsc, fsc->min_caps); 4110 ceph_adjust_caps_max_min(mdsc, fsc->mount_options);
4093 4111
4094 spin_lock_init(&mdsc->snapid_map_lock); 4112 spin_lock_init(&mdsc->snapid_map_lock);
4095 mdsc->snapid_map_tree = RB_ROOT; 4113 mdsc->snapid_map_tree = RB_ROOT;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 580b235f343b..50385a481fdb 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -379,6 +379,7 @@ struct ceph_mds_client {
379 wait_queue_head_t cap_flushing_wq; 379 wait_queue_head_t cap_flushing_wq;
380 380
381 struct work_struct cap_reclaim_work; 381 struct work_struct cap_reclaim_work;
382 atomic_t cap_reclaim_pending;
382 383
383 /* 384 /*
384 * Cap reservations 385 * Cap reservations
@@ -396,6 +397,7 @@ struct ceph_mds_client {
396 unreserved) */ 397 unreserved) */
397 int caps_total_count; /* total caps allocated */ 398 int caps_total_count; /* total caps allocated */
398 int caps_use_count; /* in use */ 399 int caps_use_count; /* in use */
400 int caps_use_max; /* max used caps */
399 int caps_reserve_count; /* unused, reserved */ 401 int caps_reserve_count; /* unused, reserved */
400 int caps_avail_count; /* unused, unreserved */ 402 int caps_avail_count; /* unused, unreserved */
401 int caps_min_count; /* keep at least this many 403 int caps_min_count; /* keep at least this many
@@ -465,6 +467,7 @@ extern void __ceph_queue_cap_release(struct ceph_mds_session *session,
465extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, 467extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
466 struct ceph_mds_session *session); 468 struct ceph_mds_session *session);
467extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); 469extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
470extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
468extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); 471extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
469 472
470extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, 473extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 200836bcf542..6d5bb2f74612 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -133,6 +133,7 @@ enum {
133 Opt_rasize, 133 Opt_rasize,
134 Opt_caps_wanted_delay_min, 134 Opt_caps_wanted_delay_min,
135 Opt_caps_wanted_delay_max, 135 Opt_caps_wanted_delay_max,
136 Opt_caps_max,
136 Opt_readdir_max_entries, 137 Opt_readdir_max_entries,
137 Opt_readdir_max_bytes, 138 Opt_readdir_max_bytes,
138 Opt_congestion_kb, 139 Opt_congestion_kb,
@@ -175,6 +176,7 @@ static match_table_t fsopt_tokens = {
175 {Opt_rasize, "rasize=%d"}, 176 {Opt_rasize, "rasize=%d"},
176 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 177 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
177 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 178 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
179 {Opt_caps_max, "caps_max=%d"},
178 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 180 {Opt_readdir_max_entries, "readdir_max_entries=%d"},
179 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 181 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
180 {Opt_congestion_kb, "write_congestion_kb=%d"}, 182 {Opt_congestion_kb, "write_congestion_kb=%d"},
@@ -286,6 +288,11 @@ static int parse_fsopt_token(char *c, void *private)
286 return -EINVAL; 288 return -EINVAL;
287 fsopt->caps_wanted_delay_max = intval; 289 fsopt->caps_wanted_delay_max = intval;
288 break; 290 break;
291 case Opt_caps_max:
292 if (intval < 0)
293 return -EINVAL;
294 fsopt->caps_max = intval;
295 break;
289 case Opt_readdir_max_entries: 296 case Opt_readdir_max_entries:
290 if (intval < 1) 297 if (intval < 1)
291 return -EINVAL; 298 return -EINVAL;
@@ -576,6 +583,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
576 seq_printf(m, ",rasize=%d", fsopt->rasize); 583 seq_printf(m, ",rasize=%d", fsopt->rasize);
577 if (fsopt->congestion_kb != default_congestion_kb()) 584 if (fsopt->congestion_kb != default_congestion_kb())
578 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 585 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
586 if (fsopt->caps_max)
587 seq_printf(m, ",caps_max=%d", fsopt->caps_max);
579 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 588 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
580 seq_printf(m, ",caps_wanted_delay_min=%d", 589 seq_printf(m, ",caps_wanted_delay_min=%d",
581 fsopt->caps_wanted_delay_min); 590 fsopt->caps_wanted_delay_min);
@@ -683,9 +692,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
683 if (!fsc->wb_pagevec_pool) 692 if (!fsc->wb_pagevec_pool)
684 goto fail_cap_wq; 693 goto fail_cap_wq;
685 694
686 /* caps */
687 fsc->min_caps = fsopt->max_readdir;
688
689 return fsc; 695 return fsc;
690 696
691fail_cap_wq: 697fail_cap_wq:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b3bcfb3c27bd..16c03188578e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -79,6 +79,7 @@ struct ceph_mount_options {
79 int rasize; /* max readahead */ 79 int rasize; /* max readahead */
80 int congestion_kb; /* max writeback in flight */ 80 int congestion_kb; /* max writeback in flight */
81 int caps_wanted_delay_min, caps_wanted_delay_max; 81 int caps_wanted_delay_min, caps_wanted_delay_max;
82 int caps_max;
82 int max_readdir; /* max readdir result (entires) */ 83 int max_readdir; /* max readdir result (entires) */
83 int max_readdir_bytes; /* max readdir result (bytes) */ 84 int max_readdir_bytes; /* max readdir result (bytes) */
84 85
@@ -100,7 +101,6 @@ struct ceph_fs_client {
100 struct ceph_client *client; 101 struct ceph_client *client;
101 102
102 unsigned long mount_state; 103 unsigned long mount_state;
103 int min_caps; /* min caps i added */
104 loff_t max_file_size; 104 loff_t max_file_size;
105 105
106 struct ceph_mds_client *mdsc; 106 struct ceph_mds_client *mdsc;
@@ -668,7 +668,8 @@ extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
668 668
669extern void ceph_caps_init(struct ceph_mds_client *mdsc); 669extern void ceph_caps_init(struct ceph_mds_client *mdsc);
670extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); 670extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
671extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); 671extern void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
672 struct ceph_mount_options *fsopt);
672extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, 673extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
673 struct ceph_cap_reservation *ctx, int need); 674 struct ceph_cap_reservation *ctx, int need);
674extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc, 675extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc,