diff options
author | Ilya Dryomov <idryomov@gmail.com> | 2015-05-15 05:02:17 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2015-06-25 04:49:29 -0400 |
commit | a319bf56a617354e62cf5f774d2ca4e1a8a3bff3 (patch) | |
tree | cf54ed20b02c8488a342f54fd573eb57df964a3c /net/ceph | |
parent | d50c97b566c5bbf990eff472e9feaa58fdebdd33 (diff) |
libceph: store timeouts in jiffies, verify user input
There are currently three libceph-level timeouts that the user can
specify on mount: mount_timeout, osd_idle_ttl and osdkeepalive. All of
these are in seconds and no checking is done on user input: negative
values are accepted, we multiply them all by HZ which may or may not
overflow, arbitrarily large jiffies then get added together, etc.
There is also a bug in the way mount_timeout=0 is handled. It's
supposed to mean "infinite timeout", but that's not how wait.h APIs
treat it and so __ceph_open_session() for example will busy loop
without much chance of being interrupted if none of ceph-mons are
there.
Fix all this by verifying user input, storing timeouts capped by
msecs_to_jiffies() in jiffies and using the new ceph_timeout_jiffies()
helper for all user-specified waits to handle infinite timeouts
correctly.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Elder <elder@linaro.org>
Diffstat (limited to 'net/ceph')
-rw-r--r-- | net/ceph/ceph_common.c | 41 | ||||
-rw-r--r-- | net/ceph/mon_client.c | 11 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 15 |
3 files changed, 47 insertions, 20 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 79e8f71aef5b..a80e91c2c9a3 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
@@ -352,8 +352,8 @@ ceph_parse_options(char *options, const char *dev_name, | |||
352 | /* start with defaults */ | 352 | /* start with defaults */ |
353 | opt->flags = CEPH_OPT_DEFAULT; | 353 | opt->flags = CEPH_OPT_DEFAULT; |
354 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | 354 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; |
355 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | 355 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; |
356 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ | 356 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; |
357 | 357 | ||
358 | /* get mon ip(s) */ | 358 | /* get mon ip(s) */ |
359 | /* ip1[:port1][,ip2[:port2]...] */ | 359 | /* ip1[:port1][,ip2[:port2]...] */ |
@@ -439,13 +439,32 @@ ceph_parse_options(char *options, const char *dev_name, | |||
439 | pr_warn("ignoring deprecated osdtimeout option\n"); | 439 | pr_warn("ignoring deprecated osdtimeout option\n"); |
440 | break; | 440 | break; |
441 | case Opt_osdkeepalivetimeout: | 441 | case Opt_osdkeepalivetimeout: |
442 | opt->osd_keepalive_timeout = intval; | 442 | /* 0 isn't well defined right now, reject it */ |
443 | if (intval < 1 || intval > INT_MAX / 1000) { | ||
444 | pr_err("osdkeepalive out of range\n"); | ||
445 | err = -EINVAL; | ||
446 | goto out; | ||
447 | } | ||
448 | opt->osd_keepalive_timeout = | ||
449 | msecs_to_jiffies(intval * 1000); | ||
443 | break; | 450 | break; |
444 | case Opt_osd_idle_ttl: | 451 | case Opt_osd_idle_ttl: |
445 | opt->osd_idle_ttl = intval; | 452 | /* 0 isn't well defined right now, reject it */ |
453 | if (intval < 1 || intval > INT_MAX / 1000) { | ||
454 | pr_err("osd_idle_ttl out of range\n"); | ||
455 | err = -EINVAL; | ||
456 | goto out; | ||
457 | } | ||
458 | opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000); | ||
446 | break; | 459 | break; |
447 | case Opt_mount_timeout: | 460 | case Opt_mount_timeout: |
448 | opt->mount_timeout = intval; | 461 | /* 0 is "wait forever" (i.e. infinite timeout) */ |
462 | if (intval < 0 || intval > INT_MAX / 1000) { | ||
463 | pr_err("mount_timeout out of range\n"); | ||
464 | err = -EINVAL; | ||
465 | goto out; | ||
466 | } | ||
467 | opt->mount_timeout = msecs_to_jiffies(intval * 1000); | ||
449 | break; | 468 | break; |
450 | 469 | ||
451 | case Opt_share: | 470 | case Opt_share: |
@@ -512,12 +531,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) | |||
512 | seq_puts(m, "notcp_nodelay,"); | 531 | seq_puts(m, "notcp_nodelay,"); |
513 | 532 | ||
514 | if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | 533 | if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) |
515 | seq_printf(m, "mount_timeout=%d,", opt->mount_timeout); | 534 | seq_printf(m, "mount_timeout=%d,", |
535 | jiffies_to_msecs(opt->mount_timeout) / 1000); | ||
516 | if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | 536 | if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) |
517 | seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl); | 537 | seq_printf(m, "osd_idle_ttl=%d,", |
538 | jiffies_to_msecs(opt->osd_idle_ttl) / 1000); | ||
518 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | 539 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) |
519 | seq_printf(m, "osdkeepalivetimeout=%d,", | 540 | seq_printf(m, "osdkeepalivetimeout=%d,", |
520 | opt->osd_keepalive_timeout); | 541 | jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000); |
521 | 542 | ||
522 | /* drop redundant comma */ | 543 | /* drop redundant comma */ |
523 | if (m->count != pos) | 544 | if (m->count != pos) |
@@ -627,7 +648,7 @@ static int have_mon_and_osd_map(struct ceph_client *client) | |||
627 | int __ceph_open_session(struct ceph_client *client, unsigned long started) | 648 | int __ceph_open_session(struct ceph_client *client, unsigned long started) |
628 | { | 649 | { |
629 | int err; | 650 | int err; |
630 | unsigned long timeout = client->options->mount_timeout * HZ; | 651 | unsigned long timeout = client->options->mount_timeout; |
631 | 652 | ||
632 | /* open session, and wait for mon and osd maps */ | 653 | /* open session, and wait for mon and osd maps */ |
633 | err = ceph_monc_open_session(&client->monc); | 654 | err = ceph_monc_open_session(&client->monc); |
@@ -643,7 +664,7 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) | |||
643 | dout("mount waiting for mon_map\n"); | 664 | dout("mount waiting for mon_map\n"); |
644 | err = wait_event_interruptible_timeout(client->auth_wq, | 665 | err = wait_event_interruptible_timeout(client->auth_wq, |
645 | have_mon_and_osd_map(client) || (client->auth_err < 0), | 666 | have_mon_and_osd_map(client) || (client->auth_err < 0), |
646 | timeout); | 667 | ceph_timeout_jiffies(timeout)); |
647 | if (err == -EINTR || err == -ERESTARTSYS) | 668 | if (err == -EINTR || err == -ERESTARTSYS) |
648 | return err; | 669 | return err; |
649 | if (client->auth_err < 0) | 670 | if (client->auth_err < 0) |
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 2b3cf05e87b0..0da3bdc116f7 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
@@ -298,6 +298,12 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) | |||
298 | } | 298 | } |
299 | EXPORT_SYMBOL(ceph_monc_request_next_osdmap); | 299 | EXPORT_SYMBOL(ceph_monc_request_next_osdmap); |
300 | 300 | ||
301 | /* | ||
302 | * Wait for an osdmap with a given epoch. | ||
303 | * | ||
304 | * @epoch: epoch to wait for | ||
305 | * @timeout: in jiffies, 0 means "wait forever" | ||
306 | */ | ||
301 | int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, | 307 | int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, |
302 | unsigned long timeout) | 308 | unsigned long timeout) |
303 | { | 309 | { |
@@ -308,11 +314,12 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, | |||
308 | while (monc->have_osdmap < epoch) { | 314 | while (monc->have_osdmap < epoch) { |
309 | mutex_unlock(&monc->mutex); | 315 | mutex_unlock(&monc->mutex); |
310 | 316 | ||
311 | if (timeout != 0 && time_after_eq(jiffies, started + timeout)) | 317 | if (timeout && time_after_eq(jiffies, started + timeout)) |
312 | return -ETIMEDOUT; | 318 | return -ETIMEDOUT; |
313 | 319 | ||
314 | ret = wait_event_interruptible_timeout(monc->client->auth_wq, | 320 | ret = wait_event_interruptible_timeout(monc->client->auth_wq, |
315 | monc->have_osdmap >= epoch, timeout); | 321 | monc->have_osdmap >= epoch, |
322 | ceph_timeout_jiffies(timeout)); | ||
316 | if (ret < 0) | 323 | if (ret < 0) |
317 | return ret; | 324 | return ret; |
318 | 325 | ||
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 4cb4fab46e4f..50033677c0fa 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -1097,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc, | |||
1097 | BUG_ON(!list_empty(&osd->o_osd_lru)); | 1097 | BUG_ON(!list_empty(&osd->o_osd_lru)); |
1098 | 1098 | ||
1099 | list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); | 1099 | list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); |
1100 | osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; | 1100 | osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl; |
1101 | } | 1101 | } |
1102 | 1102 | ||
1103 | static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc, | 1103 | static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc, |
@@ -1208,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) | |||
1208 | static void __schedule_osd_timeout(struct ceph_osd_client *osdc) | 1208 | static void __schedule_osd_timeout(struct ceph_osd_client *osdc) |
1209 | { | 1209 | { |
1210 | schedule_delayed_work(&osdc->timeout_work, | 1210 | schedule_delayed_work(&osdc->timeout_work, |
1211 | osdc->client->options->osd_keepalive_timeout * HZ); | 1211 | osdc->client->options->osd_keepalive_timeout); |
1212 | } | 1212 | } |
1213 | 1213 | ||
1214 | static void __cancel_osd_timeout(struct ceph_osd_client *osdc) | 1214 | static void __cancel_osd_timeout(struct ceph_osd_client *osdc) |
@@ -1576,10 +1576,9 @@ static void handle_timeout(struct work_struct *work) | |||
1576 | { | 1576 | { |
1577 | struct ceph_osd_client *osdc = | 1577 | struct ceph_osd_client *osdc = |
1578 | container_of(work, struct ceph_osd_client, timeout_work.work); | 1578 | container_of(work, struct ceph_osd_client, timeout_work.work); |
1579 | struct ceph_options *opts = osdc->client->options; | ||
1579 | struct ceph_osd_request *req; | 1580 | struct ceph_osd_request *req; |
1580 | struct ceph_osd *osd; | 1581 | struct ceph_osd *osd; |
1581 | unsigned long keepalive = | ||
1582 | osdc->client->options->osd_keepalive_timeout * HZ; | ||
1583 | struct list_head slow_osds; | 1582 | struct list_head slow_osds; |
1584 | dout("timeout\n"); | 1583 | dout("timeout\n"); |
1585 | down_read(&osdc->map_sem); | 1584 | down_read(&osdc->map_sem); |
@@ -1595,7 +1594,8 @@ static void handle_timeout(struct work_struct *work) | |||
1595 | */ | 1594 | */ |
1596 | INIT_LIST_HEAD(&slow_osds); | 1595 | INIT_LIST_HEAD(&slow_osds); |
1597 | list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { | 1596 | list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { |
1598 | if (time_before(jiffies, req->r_stamp + keepalive)) | 1597 | if (time_before(jiffies, |
1598 | req->r_stamp + opts->osd_keepalive_timeout)) | ||
1599 | break; | 1599 | break; |
1600 | 1600 | ||
1601 | osd = req->r_osd; | 1601 | osd = req->r_osd; |
@@ -1622,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work) | |||
1622 | struct ceph_osd_client *osdc = | 1622 | struct ceph_osd_client *osdc = |
1623 | container_of(work, struct ceph_osd_client, | 1623 | container_of(work, struct ceph_osd_client, |
1624 | osds_timeout_work.work); | 1624 | osds_timeout_work.work); |
1625 | unsigned long delay = | 1625 | unsigned long delay = osdc->client->options->osd_idle_ttl / 4; |
1626 | osdc->client->options->osd_idle_ttl * HZ >> 2; | ||
1627 | 1626 | ||
1628 | dout("osds timeout\n"); | 1627 | dout("osds timeout\n"); |
1629 | down_read(&osdc->map_sem); | 1628 | down_read(&osdc->map_sem); |
@@ -2628,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
2628 | osdc->event_count = 0; | 2627 | osdc->event_count = 0; |
2629 | 2628 | ||
2630 | schedule_delayed_work(&osdc->osds_timeout_work, | 2629 | schedule_delayed_work(&osdc->osds_timeout_work, |
2631 | round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); | 2630 | round_jiffies_relative(osdc->client->options->osd_idle_ttl)); |
2632 | 2631 | ||
2633 | err = -ENOMEM; | 2632 | err = -ENOMEM; |
2634 | osdc->req_mempool = mempool_create_kmalloc_pool(10, | 2633 | osdc->req_mempool = mempool_create_kmalloc_pool(10, |