aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorYehuda Sadeh <yehuda@hq.newdream.net>2010-02-03 14:00:26 -0500
committerSage Weil <sage@newdream.net>2010-02-11 14:48:48 -0500
commitf5a2041bd96c9f05ff10172b9c814c14f247084e (patch)
tree3c9c47169fa5ad2ec52d278f10d9d2de2237accf /fs
parentb056c8769d1da6a6a80ce780a4b8957b70434a41 (diff)
ceph: put unused osd connections on lru
Instead of removing osd connection immediately when the requests list is empty, put the osd connection on an lru. Only if that osd has not been used for more than a specified time, will it be removed. Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net> Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/osd_client.c76
-rw-r--r--fs/ceph/osd_client.h4
-rw-r--r--fs/ceph/super.c3
-rw-r--r--fs/ceph/super.h2
4 files changed, 76 insertions, 9 deletions
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index 35c8afea13e..7f8a26fdcc2 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -389,6 +389,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
389 atomic_set(&osd->o_ref, 1); 389 atomic_set(&osd->o_ref, 1);
390 osd->o_osdc = osdc; 390 osd->o_osdc = osdc;
391 INIT_LIST_HEAD(&osd->o_requests); 391 INIT_LIST_HEAD(&osd->o_requests);
392 INIT_LIST_HEAD(&osd->o_osd_lru);
392 osd->o_incarnation = 1; 393 osd->o_incarnation = 1;
393 394
394 ceph_con_init(osdc->client->msgr, &osd->o_con); 395 ceph_con_init(osdc->client->msgr, &osd->o_con);
@@ -422,25 +423,56 @@ static void put_osd(struct ceph_osd *osd)
422/* 423/*
423 * remove an osd from our map 424 * remove an osd from our map
424 */ 425 */
425static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) 426static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
426{ 427{
427 dout("remove_osd %p\n", osd); 428 dout("__remove_osd %p\n", osd);
428 BUG_ON(!list_empty(&osd->o_requests)); 429 BUG_ON(!list_empty(&osd->o_requests));
429 rb_erase(&osd->o_node, &osdc->osds); 430 rb_erase(&osd->o_node, &osdc->osds);
431 list_del_init(&osd->o_osd_lru);
430 ceph_con_close(&osd->o_con); 432 ceph_con_close(&osd->o_con);
431 put_osd(osd); 433 put_osd(osd);
432} 434}
433 435
436static void __move_osd_to_lru(struct ceph_osd_client *osdc,
437 struct ceph_osd *osd)
438{
439 dout("__move_osd_to_lru %p\n", osd);
440 BUG_ON(!list_empty(&osd->o_osd_lru));
441 list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
442 osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ;
443}
444
445static void __remove_osd_from_lru(struct ceph_osd *osd)
446{
447 dout("__remove_osd_from_lru %p\n", osd);
448 if (!list_empty(&osd->o_osd_lru))
449 list_del_init(&osd->o_osd_lru);
450}
451
452static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
453{
454 struct ceph_osd *osd, *nosd;
455
456 dout("__remove_old_osds %p\n", osdc);
457 mutex_lock(&osdc->request_mutex);
458 list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
459 if (!remove_all && time_before(jiffies, osd->lru_ttl))
460 break;
461 __remove_osd(osdc, osd);
462 }
463 mutex_unlock(&osdc->request_mutex);
464}
465
434/* 466/*
435 * reset osd connect 467 * reset osd connect
436 */ 468 */
437static int reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) 469static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
438{ 470{
439 int ret = 0; 471 int ret = 0;
440 472
441 dout("reset_osd %p osd%d\n", osd, osd->o_osd); 473 dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
442 if (list_empty(&osd->o_requests)) { 474 if (list_empty(&osd->o_requests)) {
443 remove_osd(osdc, osd); 475 __remove_osd(osdc, osd);
444 } else { 476 } else {
445 ceph_con_close(&osd->o_con); 477 ceph_con_close(&osd->o_con);
446 ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); 478 ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
@@ -533,7 +565,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
533 565
534 list_del_init(&req->r_osd_item); 566 list_del_init(&req->r_osd_item);
535 if (list_empty(&req->r_osd->o_requests)) 567 if (list_empty(&req->r_osd->o_requests))
536 remove_osd(osdc, req->r_osd); 568 __move_osd_to_lru(osdc, req->r_osd);
537 req->r_osd = NULL; 569 req->r_osd = NULL;
538 } 570 }
539 571
@@ -611,7 +643,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
611 if (list_empty(&req->r_osd->o_requests)) { 643 if (list_empty(&req->r_osd->o_requests)) {
612 /* try to re-use r_osd if possible */ 644 /* try to re-use r_osd if possible */
613 newosd = get_osd(req->r_osd); 645 newosd = get_osd(req->r_osd);
614 remove_osd(osdc, newosd); 646 __remove_osd(osdc, newosd);
615 } 647 }
616 req->r_osd = NULL; 648 req->r_osd = NULL;
617 } 649 }
@@ -636,8 +668,10 @@ static int __map_osds(struct ceph_osd_client *osdc,
636 ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]); 668 ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
637 } 669 }
638 670
639 if (req->r_osd) 671 if (req->r_osd) {
672 __remove_osd_from_lru(req->r_osd);
640 list_add(&req->r_osd_item, &req->r_osd->o_requests); 673 list_add(&req->r_osd_item, &req->r_osd->o_requests);
674 }
641 err = 1; /* osd changed */ 675 err = 1; /* osd changed */
642 676
643out: 677out:
@@ -744,6 +778,23 @@ static void handle_timeout(struct work_struct *work)
744 up_read(&osdc->map_sem); 778 up_read(&osdc->map_sem);
745} 779}
746 780
781static void handle_osds_timeout(struct work_struct *work)
782{
783 struct ceph_osd_client *osdc =
784 container_of(work, struct ceph_osd_client,
785 osds_timeout_work.work);
786 unsigned long delay =
787 osdc->client->mount_args->osd_idle_ttl * HZ >> 2;
788
789 dout("osds timeout\n");
790 down_read(&osdc->map_sem);
791 remove_old_osds(osdc, 0);
792 up_read(&osdc->map_sem);
793
794 schedule_delayed_work(&osdc->osds_timeout_work,
795 round_jiffies_relative(delay));
796}
797
747/* 798/*
748 * handle osd op reply. either call the callback if it is specified, 799 * handle osd op reply. either call the callback if it is specified,
749 * or do the completion to wake up the waiting thread. 800 * or do the completion to wake up the waiting thread.
@@ -881,7 +932,7 @@ static void kick_requests(struct ceph_osd_client *osdc,
881 ceph_osd_addr(osdc->osdmap, 932 ceph_osd_addr(osdc->osdmap,
882 osd->o_osd), 933 osd->o_osd),
883 sizeof(struct ceph_entity_addr)) != 0) 934 sizeof(struct ceph_entity_addr)) != 0)
884 reset_osd(osdc, osd); 935 __reset_osd(osdc, osd);
885 } 936 }
886 } 937 }
887 938
@@ -1195,9 +1246,14 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
1195 osdc->timeout_tid = 0; 1246 osdc->timeout_tid = 0;
1196 osdc->last_tid = 0; 1247 osdc->last_tid = 0;
1197 osdc->osds = RB_ROOT; 1248 osdc->osds = RB_ROOT;
1249 INIT_LIST_HEAD(&osdc->osd_lru);
1198 osdc->requests = RB_ROOT; 1250 osdc->requests = RB_ROOT;
1199 osdc->num_requests = 0; 1251 osdc->num_requests = 0;
1200 INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout); 1252 INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
1253 INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
1254
1255 schedule_delayed_work(&osdc->osds_timeout_work,
1256 round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ));
1201 1257
1202 err = -ENOMEM; 1258 err = -ENOMEM;
1203 osdc->req_mempool = mempool_create_kmalloc_pool(10, 1259 osdc->req_mempool = mempool_create_kmalloc_pool(10,
@@ -1219,10 +1275,12 @@ out:
1219void ceph_osdc_stop(struct ceph_osd_client *osdc) 1275void ceph_osdc_stop(struct ceph_osd_client *osdc)
1220{ 1276{
1221 cancel_delayed_work_sync(&osdc->timeout_work); 1277 cancel_delayed_work_sync(&osdc->timeout_work);
1278 cancel_delayed_work_sync(&osdc->osds_timeout_work);
1222 if (osdc->osdmap) { 1279 if (osdc->osdmap) {
1223 ceph_osdmap_destroy(osdc->osdmap); 1280 ceph_osdmap_destroy(osdc->osdmap);
1224 osdc->osdmap = NULL; 1281 osdc->osdmap = NULL;
1225 } 1282 }
1283 remove_old_osds(osdc, 1);
1226 mempool_destroy(osdc->req_mempool); 1284 mempool_destroy(osdc->req_mempool);
1227 ceph_msgpool_destroy(&osdc->msgpool_op); 1285 ceph_msgpool_destroy(&osdc->msgpool_op);
1228} 1286}
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index 8d533d9406f..70f31b61f02 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -31,9 +31,11 @@ struct ceph_osd {
31 struct rb_node o_node; 31 struct rb_node o_node;
32 struct ceph_connection o_con; 32 struct ceph_connection o_con;
33 struct list_head o_requests; 33 struct list_head o_requests;
34 struct list_head o_osd_lru;
34 struct ceph_authorizer *o_authorizer; 35 struct ceph_authorizer *o_authorizer;
35 void *o_authorizer_buf, *o_authorizer_reply_buf; 36 void *o_authorizer_buf, *o_authorizer_reply_buf;
36 size_t o_authorizer_buf_len, o_authorizer_reply_buf_len; 37 size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
38 unsigned long lru_ttl;
37}; 39};
38 40
39/* an in-flight request */ 41/* an in-flight request */
@@ -90,11 +92,13 @@ struct ceph_osd_client {
90 92
91 struct mutex request_mutex; 93 struct mutex request_mutex;
92 struct rb_root osds; /* osds */ 94 struct rb_root osds; /* osds */
95 struct list_head osd_lru; /* idle osds */
93 u64 timeout_tid; /* tid of timeout triggering rq */ 96 u64 timeout_tid; /* tid of timeout triggering rq */
94 u64 last_tid; /* tid of last request */ 97 u64 last_tid; /* tid of last request */
95 struct rb_root requests; /* pending requests */ 98 struct rb_root requests; /* pending requests */
96 int num_requests; 99 int num_requests;
97 struct delayed_work timeout_work; 100 struct delayed_work timeout_work;
101 struct delayed_work osds_timeout_work;
98#ifdef CONFIG_DEBUG_FS 102#ifdef CONFIG_DEBUG_FS
99 struct dentry *debugfs_file; 103 struct dentry *debugfs_file;
100#endif 104#endif
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 3a2548951fe..39aaf29a04a 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -293,6 +293,7 @@ enum {
293 Opt_rsize, 293 Opt_rsize,
294 Opt_osdtimeout, 294 Opt_osdtimeout,
295 Opt_mount_timeout, 295 Opt_mount_timeout,
296 Opt_osd_idle_ttl,
296 Opt_caps_wanted_delay_min, 297 Opt_caps_wanted_delay_min,
297 Opt_caps_wanted_delay_max, 298 Opt_caps_wanted_delay_max,
298 Opt_readdir_max_entries, 299 Opt_readdir_max_entries,
@@ -322,6 +323,7 @@ static match_table_t arg_tokens = {
322 {Opt_rsize, "rsize=%d"}, 323 {Opt_rsize, "rsize=%d"},
323 {Opt_osdtimeout, "osdtimeout=%d"}, 324 {Opt_osdtimeout, "osdtimeout=%d"},
324 {Opt_mount_timeout, "mount_timeout=%d"}, 325 {Opt_mount_timeout, "mount_timeout=%d"},
326 {Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
325 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 327 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
326 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 328 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
327 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 329 {Opt_readdir_max_entries, "readdir_max_entries=%d"},
@@ -367,6 +369,7 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
367 args->flags = CEPH_OPT_DEFAULT; 369 args->flags = CEPH_OPT_DEFAULT;
368 args->osd_timeout = 5; /* seconds */ 370 args->osd_timeout = 5; /* seconds */
369 args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ 371 args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
372 args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
370 args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 373 args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
371 args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 374 args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
372 args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; 375 args->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 770f7b507fc..3930fb685f0 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -53,6 +53,7 @@ struct ceph_mount_args {
53 struct ceph_entity_addr *mon_addr; 53 struct ceph_entity_addr *mon_addr;
54 int flags; 54 int flags;
55 int mount_timeout; 55 int mount_timeout;
56 int osd_idle_ttl;
56 int caps_wanted_delay_min, caps_wanted_delay_max; 57 int caps_wanted_delay_min, caps_wanted_delay_max;
57 struct ceph_fsid fsid; 58 struct ceph_fsid fsid;
58 struct ceph_entity_addr my_addr; 59 struct ceph_entity_addr my_addr;
@@ -71,6 +72,7 @@ struct ceph_mount_args {
71 * defaults 72 * defaults
72 */ 73 */
73#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 74#define CEPH_MOUNT_TIMEOUT_DEFAULT 60
75#define CEPH_OSD_IDLE_TTL_DEFAULT 60
74#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ 76#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */
75 77
76#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) 78#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)