diff options
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/osd_client.c | 76 | ||||
-rw-r--r-- | fs/ceph/osd_client.h | 4 | ||||
-rw-r--r-- | fs/ceph/super.c | 3 | ||||
-rw-r--r-- | fs/ceph/super.h | 2 |
4 files changed, 76 insertions, 9 deletions
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 35c8afea13e..7f8a26fdcc2 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -389,6 +389,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) | |||
389 | atomic_set(&osd->o_ref, 1); | 389 | atomic_set(&osd->o_ref, 1); |
390 | osd->o_osdc = osdc; | 390 | osd->o_osdc = osdc; |
391 | INIT_LIST_HEAD(&osd->o_requests); | 391 | INIT_LIST_HEAD(&osd->o_requests); |
392 | INIT_LIST_HEAD(&osd->o_osd_lru); | ||
392 | osd->o_incarnation = 1; | 393 | osd->o_incarnation = 1; |
393 | 394 | ||
394 | ceph_con_init(osdc->client->msgr, &osd->o_con); | 395 | ceph_con_init(osdc->client->msgr, &osd->o_con); |
@@ -422,25 +423,56 @@ static void put_osd(struct ceph_osd *osd) | |||
422 | /* | 423 | /* |
423 | * remove an osd from our map | 424 | * remove an osd from our map |
424 | */ | 425 | */ |
425 | static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | 426 | static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) |
426 | { | 427 | { |
427 | dout("remove_osd %p\n", osd); | 428 | dout("__remove_osd %p\n", osd); |
428 | BUG_ON(!list_empty(&osd->o_requests)); | 429 | BUG_ON(!list_empty(&osd->o_requests)); |
429 | rb_erase(&osd->o_node, &osdc->osds); | 430 | rb_erase(&osd->o_node, &osdc->osds); |
431 | list_del_init(&osd->o_osd_lru); | ||
430 | ceph_con_close(&osd->o_con); | 432 | ceph_con_close(&osd->o_con); |
431 | put_osd(osd); | 433 | put_osd(osd); |
432 | } | 434 | } |
433 | 435 | ||
436 | static void __move_osd_to_lru(struct ceph_osd_client *osdc, | ||
437 | struct ceph_osd *osd) | ||
438 | { | ||
439 | dout("__move_osd_to_lru %p\n", osd); | ||
440 | BUG_ON(!list_empty(&osd->o_osd_lru)); | ||
441 | list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); | ||
442 | osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ; | ||
443 | } | ||
444 | |||
445 | static void __remove_osd_from_lru(struct ceph_osd *osd) | ||
446 | { | ||
447 | dout("__remove_osd_from_lru %p\n", osd); | ||
448 | if (!list_empty(&osd->o_osd_lru)) | ||
449 | list_del_init(&osd->o_osd_lru); | ||
450 | } | ||
451 | |||
452 | static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all) | ||
453 | { | ||
454 | struct ceph_osd *osd, *nosd; | ||
455 | |||
456 | dout("__remove_old_osds %p\n", osdc); | ||
457 | mutex_lock(&osdc->request_mutex); | ||
458 | list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { | ||
459 | if (!remove_all && time_before(jiffies, osd->lru_ttl)) | ||
460 | break; | ||
461 | __remove_osd(osdc, osd); | ||
462 | } | ||
463 | mutex_unlock(&osdc->request_mutex); | ||
464 | } | ||
465 | |||
434 | /* | 466 | /* |
435 | * reset osd connect | 467 | * reset osd connect |
436 | */ | 468 | */ |
437 | static int reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | 469 | static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) |
438 | { | 470 | { |
439 | int ret = 0; | 471 | int ret = 0; |
440 | 472 | ||
441 | dout("reset_osd %p osd%d\n", osd, osd->o_osd); | 473 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); |
442 | if (list_empty(&osd->o_requests)) { | 474 | if (list_empty(&osd->o_requests)) { |
443 | remove_osd(osdc, osd); | 475 | __remove_osd(osdc, osd); |
444 | } else { | 476 | } else { |
445 | ceph_con_close(&osd->o_con); | 477 | ceph_con_close(&osd->o_con); |
446 | ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); | 478 | ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); |
@@ -533,7 +565,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, | |||
533 | 565 | ||
534 | list_del_init(&req->r_osd_item); | 566 | list_del_init(&req->r_osd_item); |
535 | if (list_empty(&req->r_osd->o_requests)) | 567 | if (list_empty(&req->r_osd->o_requests)) |
536 | remove_osd(osdc, req->r_osd); | 568 | __move_osd_to_lru(osdc, req->r_osd); |
537 | req->r_osd = NULL; | 569 | req->r_osd = NULL; |
538 | } | 570 | } |
539 | 571 | ||
@@ -611,7 +643,7 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
611 | if (list_empty(&req->r_osd->o_requests)) { | 643 | if (list_empty(&req->r_osd->o_requests)) { |
612 | /* try to re-use r_osd if possible */ | 644 | /* try to re-use r_osd if possible */ |
613 | newosd = get_osd(req->r_osd); | 645 | newosd = get_osd(req->r_osd); |
614 | remove_osd(osdc, newosd); | 646 | __remove_osd(osdc, newosd); |
615 | } | 647 | } |
616 | req->r_osd = NULL; | 648 | req->r_osd = NULL; |
617 | } | 649 | } |
@@ -636,8 +668,10 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
636 | ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]); | 668 | ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]); |
637 | } | 669 | } |
638 | 670 | ||
639 | if (req->r_osd) | 671 | if (req->r_osd) { |
672 | __remove_osd_from_lru(req->r_osd); | ||
640 | list_add(&req->r_osd_item, &req->r_osd->o_requests); | 673 | list_add(&req->r_osd_item, &req->r_osd->o_requests); |
674 | } | ||
641 | err = 1; /* osd changed */ | 675 | err = 1; /* osd changed */ |
642 | 676 | ||
643 | out: | 677 | out: |
@@ -744,6 +778,23 @@ static void handle_timeout(struct work_struct *work) | |||
744 | up_read(&osdc->map_sem); | 778 | up_read(&osdc->map_sem); |
745 | } | 779 | } |
746 | 780 | ||
781 | static void handle_osds_timeout(struct work_struct *work) | ||
782 | { | ||
783 | struct ceph_osd_client *osdc = | ||
784 | container_of(work, struct ceph_osd_client, | ||
785 | osds_timeout_work.work); | ||
786 | unsigned long delay = | ||
787 | osdc->client->mount_args->osd_idle_ttl * HZ >> 2; | ||
788 | |||
789 | dout("osds timeout\n"); | ||
790 | down_read(&osdc->map_sem); | ||
791 | remove_old_osds(osdc, 0); | ||
792 | up_read(&osdc->map_sem); | ||
793 | |||
794 | schedule_delayed_work(&osdc->osds_timeout_work, | ||
795 | round_jiffies_relative(delay)); | ||
796 | } | ||
797 | |||
747 | /* | 798 | /* |
748 | * handle osd op reply. either call the callback if it is specified, | 799 | * handle osd op reply. either call the callback if it is specified, |
749 | * or do the completion to wake up the waiting thread. | 800 | * or do the completion to wake up the waiting thread. |
@@ -881,7 +932,7 @@ static void kick_requests(struct ceph_osd_client *osdc, | |||
881 | ceph_osd_addr(osdc->osdmap, | 932 | ceph_osd_addr(osdc->osdmap, |
882 | osd->o_osd), | 933 | osd->o_osd), |
883 | sizeof(struct ceph_entity_addr)) != 0) | 934 | sizeof(struct ceph_entity_addr)) != 0) |
884 | reset_osd(osdc, osd); | 935 | __reset_osd(osdc, osd); |
885 | } | 936 | } |
886 | } | 937 | } |
887 | 938 | ||
@@ -1195,9 +1246,14 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
1195 | osdc->timeout_tid = 0; | 1246 | osdc->timeout_tid = 0; |
1196 | osdc->last_tid = 0; | 1247 | osdc->last_tid = 0; |
1197 | osdc->osds = RB_ROOT; | 1248 | osdc->osds = RB_ROOT; |
1249 | INIT_LIST_HEAD(&osdc->osd_lru); | ||
1198 | osdc->requests = RB_ROOT; | 1250 | osdc->requests = RB_ROOT; |
1199 | osdc->num_requests = 0; | 1251 | osdc->num_requests = 0; |
1200 | INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout); | 1252 | INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout); |
1253 | INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); | ||
1254 | |||
1255 | schedule_delayed_work(&osdc->osds_timeout_work, | ||
1256 | round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ)); | ||
1201 | 1257 | ||
1202 | err = -ENOMEM; | 1258 | err = -ENOMEM; |
1203 | osdc->req_mempool = mempool_create_kmalloc_pool(10, | 1259 | osdc->req_mempool = mempool_create_kmalloc_pool(10, |
@@ -1219,10 +1275,12 @@ out: | |||
1219 | void ceph_osdc_stop(struct ceph_osd_client *osdc) | 1275 | void ceph_osdc_stop(struct ceph_osd_client *osdc) |
1220 | { | 1276 | { |
1221 | cancel_delayed_work_sync(&osdc->timeout_work); | 1277 | cancel_delayed_work_sync(&osdc->timeout_work); |
1278 | cancel_delayed_work_sync(&osdc->osds_timeout_work); | ||
1222 | if (osdc->osdmap) { | 1279 | if (osdc->osdmap) { |
1223 | ceph_osdmap_destroy(osdc->osdmap); | 1280 | ceph_osdmap_destroy(osdc->osdmap); |
1224 | osdc->osdmap = NULL; | 1281 | osdc->osdmap = NULL; |
1225 | } | 1282 | } |
1283 | remove_old_osds(osdc, 1); | ||
1226 | mempool_destroy(osdc->req_mempool); | 1284 | mempool_destroy(osdc->req_mempool); |
1227 | ceph_msgpool_destroy(&osdc->msgpool_op); | 1285 | ceph_msgpool_destroy(&osdc->msgpool_op); |
1228 | } | 1286 | } |
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index 8d533d9406f..70f31b61f02 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h | |||
@@ -31,9 +31,11 @@ struct ceph_osd { | |||
31 | struct rb_node o_node; | 31 | struct rb_node o_node; |
32 | struct ceph_connection o_con; | 32 | struct ceph_connection o_con; |
33 | struct list_head o_requests; | 33 | struct list_head o_requests; |
34 | struct list_head o_osd_lru; | ||
34 | struct ceph_authorizer *o_authorizer; | 35 | struct ceph_authorizer *o_authorizer; |
35 | void *o_authorizer_buf, *o_authorizer_reply_buf; | 36 | void *o_authorizer_buf, *o_authorizer_reply_buf; |
36 | size_t o_authorizer_buf_len, o_authorizer_reply_buf_len; | 37 | size_t o_authorizer_buf_len, o_authorizer_reply_buf_len; |
38 | unsigned long lru_ttl; | ||
37 | }; | 39 | }; |
38 | 40 | ||
39 | /* an in-flight request */ | 41 | /* an in-flight request */ |
@@ -90,11 +92,13 @@ struct ceph_osd_client { | |||
90 | 92 | ||
91 | struct mutex request_mutex; | 93 | struct mutex request_mutex; |
92 | struct rb_root osds; /* osds */ | 94 | struct rb_root osds; /* osds */ |
95 | struct list_head osd_lru; /* idle osds */ | ||
93 | u64 timeout_tid; /* tid of timeout triggering rq */ | 96 | u64 timeout_tid; /* tid of timeout triggering rq */ |
94 | u64 last_tid; /* tid of last request */ | 97 | u64 last_tid; /* tid of last request */ |
95 | struct rb_root requests; /* pending requests */ | 98 | struct rb_root requests; /* pending requests */ |
96 | int num_requests; | 99 | int num_requests; |
97 | struct delayed_work timeout_work; | 100 | struct delayed_work timeout_work; |
101 | struct delayed_work osds_timeout_work; | ||
98 | #ifdef CONFIG_DEBUG_FS | 102 | #ifdef CONFIG_DEBUG_FS |
99 | struct dentry *debugfs_file; | 103 | struct dentry *debugfs_file; |
100 | #endif | 104 | #endif |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 3a2548951fe..39aaf29a04a 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -293,6 +293,7 @@ enum { | |||
293 | Opt_rsize, | 293 | Opt_rsize, |
294 | Opt_osdtimeout, | 294 | Opt_osdtimeout, |
295 | Opt_mount_timeout, | 295 | Opt_mount_timeout, |
296 | Opt_osd_idle_ttl, | ||
296 | Opt_caps_wanted_delay_min, | 297 | Opt_caps_wanted_delay_min, |
297 | Opt_caps_wanted_delay_max, | 298 | Opt_caps_wanted_delay_max, |
298 | Opt_readdir_max_entries, | 299 | Opt_readdir_max_entries, |
@@ -322,6 +323,7 @@ static match_table_t arg_tokens = { | |||
322 | {Opt_rsize, "rsize=%d"}, | 323 | {Opt_rsize, "rsize=%d"}, |
323 | {Opt_osdtimeout, "osdtimeout=%d"}, | 324 | {Opt_osdtimeout, "osdtimeout=%d"}, |
324 | {Opt_mount_timeout, "mount_timeout=%d"}, | 325 | {Opt_mount_timeout, "mount_timeout=%d"}, |
326 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | ||
325 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, | 327 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, |
326 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, | 328 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, |
327 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, | 329 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, |
@@ -367,6 +369,7 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
367 | args->flags = CEPH_OPT_DEFAULT; | 369 | args->flags = CEPH_OPT_DEFAULT; |
368 | args->osd_timeout = 5; /* seconds */ | 370 | args->osd_timeout = 5; /* seconds */ |
369 | args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | 371 | args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ |
372 | args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ | ||
370 | args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; | 373 | args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; |
371 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | 374 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; |
372 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | 375 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 770f7b507fc..3930fb685f0 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -53,6 +53,7 @@ struct ceph_mount_args { | |||
53 | struct ceph_entity_addr *mon_addr; | 53 | struct ceph_entity_addr *mon_addr; |
54 | int flags; | 54 | int flags; |
55 | int mount_timeout; | 55 | int mount_timeout; |
56 | int osd_idle_ttl; | ||
56 | int caps_wanted_delay_min, caps_wanted_delay_max; | 57 | int caps_wanted_delay_min, caps_wanted_delay_max; |
57 | struct ceph_fsid fsid; | 58 | struct ceph_fsid fsid; |
58 | struct ceph_entity_addr my_addr; | 59 | struct ceph_entity_addr my_addr; |
@@ -71,6 +72,7 @@ struct ceph_mount_args { | |||
71 | * defaults | 72 | * defaults |
72 | */ | 73 | */ |
73 | #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 | 74 | #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 |
75 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | ||
74 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | 76 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ |
75 | 77 | ||
76 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | 78 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) |