summaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2015-05-15 05:02:17 -0400
committerIlya Dryomov <idryomov@gmail.com>2015-06-25 04:49:29 -0400
commita319bf56a617354e62cf5f774d2ca4e1a8a3bff3 (patch)
treecf54ed20b02c8488a342f54fd573eb57df964a3c /net/ceph
parentd50c97b566c5bbf990eff472e9feaa58fdebdd33 (diff)
libceph: store timeouts in jiffies, verify user input
There are currently three libceph-level timeouts that the user can specify on mount: mount_timeout, osd_idle_ttl and osdkeepalive. All of these are in seconds and no checking is done on user input: negative values are accepted, we multiply them all by HZ which may or may not overflow, arbitrarily large jiffies then get added together, etc. There is also a bug in the way mount_timeout=0 is handled. It's supposed to mean "infinite timeout", but that's not how wait.h APIs treat it and so __ceph_open_session() for example will busy loop without much chance of being interrupted if none of ceph-mons are there. Fix all this by verifying user input, storing timeouts capped by msecs_to_jiffies() in jiffies and using the new ceph_timeout_jiffies() helper for all user-specified waits to handle infinite timeouts correctly. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Alex Elder <elder@linaro.org>
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/ceph_common.c41
-rw-r--r--net/ceph/mon_client.c11
-rw-r--r--net/ceph/osd_client.c15
3 files changed, 47 insertions, 20 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 79e8f71aef5b..a80e91c2c9a3 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -352,8 +352,8 @@ ceph_parse_options(char *options, const char *dev_name,
352 /* start with defaults */ 352 /* start with defaults */
353 opt->flags = CEPH_OPT_DEFAULT; 353 opt->flags = CEPH_OPT_DEFAULT;
354 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 354 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
355 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ 355 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
356 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ 356 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
357 357
358 /* get mon ip(s) */ 358 /* get mon ip(s) */
359 /* ip1[:port1][,ip2[:port2]...] */ 359 /* ip1[:port1][,ip2[:port2]...] */
@@ -439,13 +439,32 @@ ceph_parse_options(char *options, const char *dev_name,
439 pr_warn("ignoring deprecated osdtimeout option\n"); 439 pr_warn("ignoring deprecated osdtimeout option\n");
440 break; 440 break;
441 case Opt_osdkeepalivetimeout: 441 case Opt_osdkeepalivetimeout:
442 opt->osd_keepalive_timeout = intval; 442 /* 0 isn't well defined right now, reject it */
443 if (intval < 1 || intval > INT_MAX / 1000) {
444 pr_err("osdkeepalive out of range\n");
445 err = -EINVAL;
446 goto out;
447 }
448 opt->osd_keepalive_timeout =
449 msecs_to_jiffies(intval * 1000);
443 break; 450 break;
444 case Opt_osd_idle_ttl: 451 case Opt_osd_idle_ttl:
445 opt->osd_idle_ttl = intval; 452 /* 0 isn't well defined right now, reject it */
453 if (intval < 1 || intval > INT_MAX / 1000) {
454 pr_err("osd_idle_ttl out of range\n");
455 err = -EINVAL;
456 goto out;
457 }
458 opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000);
446 break; 459 break;
447 case Opt_mount_timeout: 460 case Opt_mount_timeout:
448 opt->mount_timeout = intval; 461 /* 0 is "wait forever" (i.e. infinite timeout) */
462 if (intval < 0 || intval > INT_MAX / 1000) {
463 pr_err("mount_timeout out of range\n");
464 err = -EINVAL;
465 goto out;
466 }
467 opt->mount_timeout = msecs_to_jiffies(intval * 1000);
449 break; 468 break;
450 469
451 case Opt_share: 470 case Opt_share:
@@ -512,12 +531,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
512 seq_puts(m, "notcp_nodelay,"); 531 seq_puts(m, "notcp_nodelay,");
513 532
514 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) 533 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
515 seq_printf(m, "mount_timeout=%d,", opt->mount_timeout); 534 seq_printf(m, "mount_timeout=%d,",
535 jiffies_to_msecs(opt->mount_timeout) / 1000);
516 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) 536 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
517 seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl); 537 seq_printf(m, "osd_idle_ttl=%d,",
538 jiffies_to_msecs(opt->osd_idle_ttl) / 1000);
518 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) 539 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
519 seq_printf(m, "osdkeepalivetimeout=%d,", 540 seq_printf(m, "osdkeepalivetimeout=%d,",
520 opt->osd_keepalive_timeout); 541 jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
521 542
522 /* drop redundant comma */ 543 /* drop redundant comma */
523 if (m->count != pos) 544 if (m->count != pos)
@@ -627,7 +648,7 @@ static int have_mon_and_osd_map(struct ceph_client *client)
627int __ceph_open_session(struct ceph_client *client, unsigned long started) 648int __ceph_open_session(struct ceph_client *client, unsigned long started)
628{ 649{
629 int err; 650 int err;
630 unsigned long timeout = client->options->mount_timeout * HZ; 651 unsigned long timeout = client->options->mount_timeout;
631 652
632 /* open session, and wait for mon and osd maps */ 653 /* open session, and wait for mon and osd maps */
633 err = ceph_monc_open_session(&client->monc); 654 err = ceph_monc_open_session(&client->monc);
@@ -643,7 +664,7 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
643 dout("mount waiting for mon_map\n"); 664 dout("mount waiting for mon_map\n");
644 err = wait_event_interruptible_timeout(client->auth_wq, 665 err = wait_event_interruptible_timeout(client->auth_wq,
645 have_mon_and_osd_map(client) || (client->auth_err < 0), 666 have_mon_and_osd_map(client) || (client->auth_err < 0),
646 timeout); 667 ceph_timeout_jiffies(timeout));
647 if (err == -EINTR || err == -ERESTARTSYS) 668 if (err == -EINTR || err == -ERESTARTSYS)
648 return err; 669 return err;
649 if (client->auth_err < 0) 670 if (client->auth_err < 0)
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2b3cf05e87b0..0da3bdc116f7 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -298,6 +298,12 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
298} 298}
299EXPORT_SYMBOL(ceph_monc_request_next_osdmap); 299EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
300 300
301/*
302 * Wait for an osdmap with a given epoch.
303 *
304 * @epoch: epoch to wait for
305 * @timeout: in jiffies, 0 means "wait forever"
306 */
301int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, 307int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
302 unsigned long timeout) 308 unsigned long timeout)
303{ 309{
@@ -308,11 +314,12 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
308 while (monc->have_osdmap < epoch) { 314 while (monc->have_osdmap < epoch) {
309 mutex_unlock(&monc->mutex); 315 mutex_unlock(&monc->mutex);
310 316
311 if (timeout != 0 && time_after_eq(jiffies, started + timeout)) 317 if (timeout && time_after_eq(jiffies, started + timeout))
312 return -ETIMEDOUT; 318 return -ETIMEDOUT;
313 319
314 ret = wait_event_interruptible_timeout(monc->client->auth_wq, 320 ret = wait_event_interruptible_timeout(monc->client->auth_wq,
315 monc->have_osdmap >= epoch, timeout); 321 monc->have_osdmap >= epoch,
322 ceph_timeout_jiffies(timeout));
316 if (ret < 0) 323 if (ret < 0)
317 return ret; 324 return ret;
318 325
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 4cb4fab46e4f..50033677c0fa 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1097,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
1097 BUG_ON(!list_empty(&osd->o_osd_lru)); 1097 BUG_ON(!list_empty(&osd->o_osd_lru));
1098 1098
1099 list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); 1099 list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
1100 osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; 1100 osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl;
1101} 1101}
1102 1102
1103static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc, 1103static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
@@ -1208,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
1208static void __schedule_osd_timeout(struct ceph_osd_client *osdc) 1208static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
1209{ 1209{
1210 schedule_delayed_work(&osdc->timeout_work, 1210 schedule_delayed_work(&osdc->timeout_work,
1211 osdc->client->options->osd_keepalive_timeout * HZ); 1211 osdc->client->options->osd_keepalive_timeout);
1212} 1212}
1213 1213
1214static void __cancel_osd_timeout(struct ceph_osd_client *osdc) 1214static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
@@ -1576,10 +1576,9 @@ static void handle_timeout(struct work_struct *work)
1576{ 1576{
1577 struct ceph_osd_client *osdc = 1577 struct ceph_osd_client *osdc =
1578 container_of(work, struct ceph_osd_client, timeout_work.work); 1578 container_of(work, struct ceph_osd_client, timeout_work.work);
1579 struct ceph_options *opts = osdc->client->options;
1579 struct ceph_osd_request *req; 1580 struct ceph_osd_request *req;
1580 struct ceph_osd *osd; 1581 struct ceph_osd *osd;
1581 unsigned long keepalive =
1582 osdc->client->options->osd_keepalive_timeout * HZ;
1583 struct list_head slow_osds; 1582 struct list_head slow_osds;
1584 dout("timeout\n"); 1583 dout("timeout\n");
1585 down_read(&osdc->map_sem); 1584 down_read(&osdc->map_sem);
@@ -1595,7 +1594,8 @@ static void handle_timeout(struct work_struct *work)
1595 */ 1594 */
1596 INIT_LIST_HEAD(&slow_osds); 1595 INIT_LIST_HEAD(&slow_osds);
1597 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { 1596 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
1598 if (time_before(jiffies, req->r_stamp + keepalive)) 1597 if (time_before(jiffies,
1598 req->r_stamp + opts->osd_keepalive_timeout))
1599 break; 1599 break;
1600 1600
1601 osd = req->r_osd; 1601 osd = req->r_osd;
@@ -1622,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work)
1622 struct ceph_osd_client *osdc = 1622 struct ceph_osd_client *osdc =
1623 container_of(work, struct ceph_osd_client, 1623 container_of(work, struct ceph_osd_client,
1624 osds_timeout_work.work); 1624 osds_timeout_work.work);
1625 unsigned long delay = 1625 unsigned long delay = osdc->client->options->osd_idle_ttl / 4;
1626 osdc->client->options->osd_idle_ttl * HZ >> 2;
1627 1626
1628 dout("osds timeout\n"); 1627 dout("osds timeout\n");
1629 down_read(&osdc->map_sem); 1628 down_read(&osdc->map_sem);
@@ -2628,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
2628 osdc->event_count = 0; 2627 osdc->event_count = 0;
2629 2628
2630 schedule_delayed_work(&osdc->osds_timeout_work, 2629 schedule_delayed_work(&osdc->osds_timeout_work,
2631 round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); 2630 round_jiffies_relative(osdc->client->options->osd_idle_ttl));
2632 2631
2633 err = -ENOMEM; 2632 err = -ENOMEM;
2634 osdc->req_mempool = mempool_create_kmalloc_pool(10, 2633 osdc->req_mempool = mempool_create_kmalloc_pool(10,