diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-10 14:05:47 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-10 14:05:47 -0500 |
commit | 24c534bb161b8c95f261b34cbe44bbd043e829fc (patch) | |
tree | 5039615326276e24f898171acdd8c012e6a6db73 | |
parent | 2baf38095cd79bb18108bc6131aec84ea85ae368 (diff) | |
parent | 7cc5e38f2f0b0b58a22a4c18a56348dd99a71270 (diff) |
Merge tag 'ceph-for-4.11-rc2' of git://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov:
- a fix for the recently discovered misdirected requests bug present in
jewel and later on the server side and all stable kernels
- a fixup for -rc1 CRUSH changes
- two usability enhancements: osd_request_timeout option and
supported_features bus attribute.
* tag 'ceph-for-4.11-rc2' of git://github.com/ceph/ceph-client:
libceph: osd_request_timeout option
rbd: supported_features bus attribute
libceph: don't set weight to IN when OSD is destroyed
libceph: fix crush_decode() for older maps
-rw-r--r-- | drivers/block/rbd.c | 16 | ||||
-rw-r--r-- | include/linux/ceph/libceph.h | 2 | ||||
-rw-r--r-- | include/linux/ceph/osd_client.h | 1 | ||||
-rw-r--r-- | net/ceph/ceph_common.c | 15 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 36 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 4 |
6 files changed, 66 insertions, 8 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4d6807723798..517838b65964 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -120,10 +120,11 @@ static int atomic_dec_return_safe(atomic_t *v) | |||
120 | 120 | ||
121 | /* Feature bits */ | 121 | /* Feature bits */ |
122 | 122 | ||
123 | #define RBD_FEATURE_LAYERING (1<<0) | 123 | #define RBD_FEATURE_LAYERING (1ULL<<0) |
124 | #define RBD_FEATURE_STRIPINGV2 (1<<1) | 124 | #define RBD_FEATURE_STRIPINGV2 (1ULL<<1) |
125 | #define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2) | 125 | #define RBD_FEATURE_EXCLUSIVE_LOCK (1ULL<<2) |
126 | #define RBD_FEATURE_DATA_POOL (1<<7) | 126 | #define RBD_FEATURE_DATA_POOL (1ULL<<7) |
127 | |||
127 | #define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \ | 128 | #define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \ |
128 | RBD_FEATURE_STRIPINGV2 | \ | 129 | RBD_FEATURE_STRIPINGV2 | \ |
129 | RBD_FEATURE_EXCLUSIVE_LOCK | \ | 130 | RBD_FEATURE_EXCLUSIVE_LOCK | \ |
@@ -499,16 +500,23 @@ static bool rbd_is_lock_owner(struct rbd_device *rbd_dev) | |||
499 | return is_lock_owner; | 500 | return is_lock_owner; |
500 | } | 501 | } |
501 | 502 | ||
503 | static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf) | ||
504 | { | ||
505 | return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED); | ||
506 | } | ||
507 | |||
502 | static BUS_ATTR(add, S_IWUSR, NULL, rbd_add); | 508 | static BUS_ATTR(add, S_IWUSR, NULL, rbd_add); |
503 | static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove); | 509 | static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove); |
504 | static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major); | 510 | static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major); |
505 | static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major); | 511 | static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major); |
512 | static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL); | ||
506 | 513 | ||
507 | static struct attribute *rbd_bus_attrs[] = { | 514 | static struct attribute *rbd_bus_attrs[] = { |
508 | &bus_attr_add.attr, | 515 | &bus_attr_add.attr, |
509 | &bus_attr_remove.attr, | 516 | &bus_attr_remove.attr, |
510 | &bus_attr_add_single_major.attr, | 517 | &bus_attr_add_single_major.attr, |
511 | &bus_attr_remove_single_major.attr, | 518 | &bus_attr_remove_single_major.attr, |
519 | &bus_attr_supported_features.attr, | ||
512 | NULL, | 520 | NULL, |
513 | }; | 521 | }; |
514 | 522 | ||
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 1816c5e26581..88cd5dc8e238 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h | |||
@@ -48,6 +48,7 @@ struct ceph_options { | |||
48 | unsigned long mount_timeout; /* jiffies */ | 48 | unsigned long mount_timeout; /* jiffies */ |
49 | unsigned long osd_idle_ttl; /* jiffies */ | 49 | unsigned long osd_idle_ttl; /* jiffies */ |
50 | unsigned long osd_keepalive_timeout; /* jiffies */ | 50 | unsigned long osd_keepalive_timeout; /* jiffies */ |
51 | unsigned long osd_request_timeout; /* jiffies */ | ||
51 | 52 | ||
52 | /* | 53 | /* |
53 | * any type that can't be simply compared or doesn't need need | 54 | * any type that can't be simply compared or doesn't need need |
@@ -68,6 +69,7 @@ struct ceph_options { | |||
68 | #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) | 69 | #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) |
69 | #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) | 70 | #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) |
70 | #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) | 71 | #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) |
72 | #define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */ | ||
71 | 73 | ||
72 | #define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000) | 74 | #define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000) |
73 | #define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000) | 75 | #define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000) |
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 2ea0c282f3dc..c125b5d9e13c 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h | |||
@@ -189,6 +189,7 @@ struct ceph_osd_request { | |||
189 | 189 | ||
190 | /* internal */ | 190 | /* internal */ |
191 | unsigned long r_stamp; /* jiffies, send or check time */ | 191 | unsigned long r_stamp; /* jiffies, send or check time */ |
192 | unsigned long r_start_stamp; /* jiffies */ | ||
192 | int r_attempts; | 193 | int r_attempts; |
193 | struct ceph_eversion r_replay_version; /* aka reassert_version */ | 194 | struct ceph_eversion r_replay_version; /* aka reassert_version */ |
194 | u32 r_last_force_resend; | 195 | u32 r_last_force_resend; |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 464e88599b9d..108533859a53 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
@@ -230,6 +230,7 @@ enum { | |||
230 | Opt_osdkeepalivetimeout, | 230 | Opt_osdkeepalivetimeout, |
231 | Opt_mount_timeout, | 231 | Opt_mount_timeout, |
232 | Opt_osd_idle_ttl, | 232 | Opt_osd_idle_ttl, |
233 | Opt_osd_request_timeout, | ||
233 | Opt_last_int, | 234 | Opt_last_int, |
234 | /* int args above */ | 235 | /* int args above */ |
235 | Opt_fsid, | 236 | Opt_fsid, |
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = { | |||
256 | {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, | 257 | {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, |
257 | {Opt_mount_timeout, "mount_timeout=%d"}, | 258 | {Opt_mount_timeout, "mount_timeout=%d"}, |
258 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | 259 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, |
260 | {Opt_osd_request_timeout, "osd_request_timeout=%d"}, | ||
259 | /* int args above */ | 261 | /* int args above */ |
260 | {Opt_fsid, "fsid=%s"}, | 262 | {Opt_fsid, "fsid=%s"}, |
261 | {Opt_name, "name=%s"}, | 263 | {Opt_name, "name=%s"}, |
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name, | |||
361 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | 363 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; |
362 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; | 364 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; |
363 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; | 365 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; |
366 | opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; | ||
364 | 367 | ||
365 | /* get mon ip(s) */ | 368 | /* get mon ip(s) */ |
366 | /* ip1[:port1][,ip2[:port2]...] */ | 369 | /* ip1[:port1][,ip2[:port2]...] */ |
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name, | |||
473 | } | 476 | } |
474 | opt->mount_timeout = msecs_to_jiffies(intval * 1000); | 477 | opt->mount_timeout = msecs_to_jiffies(intval * 1000); |
475 | break; | 478 | break; |
479 | case Opt_osd_request_timeout: | ||
480 | /* 0 is "wait forever" (i.e. infinite timeout) */ | ||
481 | if (intval < 0 || intval > INT_MAX / 1000) { | ||
482 | pr_err("osd_request_timeout out of range\n"); | ||
483 | err = -EINVAL; | ||
484 | goto out; | ||
485 | } | ||
486 | opt->osd_request_timeout = msecs_to_jiffies(intval * 1000); | ||
487 | break; | ||
476 | 488 | ||
477 | case Opt_share: | 489 | case Opt_share: |
478 | opt->flags &= ~CEPH_OPT_NOSHARE; | 490 | opt->flags &= ~CEPH_OPT_NOSHARE; |
@@ -557,6 +569,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) | |||
557 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | 569 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) |
558 | seq_printf(m, "osdkeepalivetimeout=%d,", | 570 | seq_printf(m, "osdkeepalivetimeout=%d,", |
559 | jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000); | 571 | jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000); |
572 | if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT) | ||
573 | seq_printf(m, "osd_request_timeout=%d,", | ||
574 | jiffies_to_msecs(opt->osd_request_timeout) / 1000); | ||
560 | 575 | ||
561 | /* drop redundant comma */ | 576 | /* drop redundant comma */ |
562 | if (m->count != pos) | 577 | if (m->count != pos) |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b65bbf9f45eb..e15ea9e4c495 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -1709,6 +1709,8 @@ static void account_request(struct ceph_osd_request *req) | |||
1709 | 1709 | ||
1710 | req->r_flags |= CEPH_OSD_FLAG_ONDISK; | 1710 | req->r_flags |= CEPH_OSD_FLAG_ONDISK; |
1711 | atomic_inc(&req->r_osdc->num_requests); | 1711 | atomic_inc(&req->r_osdc->num_requests); |
1712 | |||
1713 | req->r_start_stamp = jiffies; | ||
1712 | } | 1714 | } |
1713 | 1715 | ||
1714 | static void submit_request(struct ceph_osd_request *req, bool wrlocked) | 1716 | static void submit_request(struct ceph_osd_request *req, bool wrlocked) |
@@ -1789,6 +1791,14 @@ static void cancel_request(struct ceph_osd_request *req) | |||
1789 | ceph_osdc_put_request(req); | 1791 | ceph_osdc_put_request(req); |
1790 | } | 1792 | } |
1791 | 1793 | ||
1794 | static void abort_request(struct ceph_osd_request *req, int err) | ||
1795 | { | ||
1796 | dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err); | ||
1797 | |||
1798 | cancel_map_check(req); | ||
1799 | complete_request(req, err); | ||
1800 | } | ||
1801 | |||
1792 | static void check_pool_dne(struct ceph_osd_request *req) | 1802 | static void check_pool_dne(struct ceph_osd_request *req) |
1793 | { | 1803 | { |
1794 | struct ceph_osd_client *osdc = req->r_osdc; | 1804 | struct ceph_osd_client *osdc = req->r_osdc; |
@@ -2487,6 +2497,7 @@ static void handle_timeout(struct work_struct *work) | |||
2487 | container_of(work, struct ceph_osd_client, timeout_work.work); | 2497 | container_of(work, struct ceph_osd_client, timeout_work.work); |
2488 | struct ceph_options *opts = osdc->client->options; | 2498 | struct ceph_options *opts = osdc->client->options; |
2489 | unsigned long cutoff = jiffies - opts->osd_keepalive_timeout; | 2499 | unsigned long cutoff = jiffies - opts->osd_keepalive_timeout; |
2500 | unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout; | ||
2490 | LIST_HEAD(slow_osds); | 2501 | LIST_HEAD(slow_osds); |
2491 | struct rb_node *n, *p; | 2502 | struct rb_node *n, *p; |
2492 | 2503 | ||
@@ -2502,15 +2513,23 @@ static void handle_timeout(struct work_struct *work) | |||
2502 | struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); | 2513 | struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); |
2503 | bool found = false; | 2514 | bool found = false; |
2504 | 2515 | ||
2505 | for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) { | 2516 | for (p = rb_first(&osd->o_requests); p; ) { |
2506 | struct ceph_osd_request *req = | 2517 | struct ceph_osd_request *req = |
2507 | rb_entry(p, struct ceph_osd_request, r_node); | 2518 | rb_entry(p, struct ceph_osd_request, r_node); |
2508 | 2519 | ||
2520 | p = rb_next(p); /* abort_request() */ | ||
2521 | |||
2509 | if (time_before(req->r_stamp, cutoff)) { | 2522 | if (time_before(req->r_stamp, cutoff)) { |
2510 | dout(" req %p tid %llu on osd%d is laggy\n", | 2523 | dout(" req %p tid %llu on osd%d is laggy\n", |
2511 | req, req->r_tid, osd->o_osd); | 2524 | req, req->r_tid, osd->o_osd); |
2512 | found = true; | 2525 | found = true; |
2513 | } | 2526 | } |
2527 | if (opts->osd_request_timeout && | ||
2528 | time_before(req->r_start_stamp, expiry_cutoff)) { | ||
2529 | pr_err_ratelimited("tid %llu on osd%d timeout\n", | ||
2530 | req->r_tid, osd->o_osd); | ||
2531 | abort_request(req, -ETIMEDOUT); | ||
2532 | } | ||
2514 | } | 2533 | } |
2515 | for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) { | 2534 | for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) { |
2516 | struct ceph_osd_linger_request *lreq = | 2535 | struct ceph_osd_linger_request *lreq = |
@@ -2530,6 +2549,21 @@ static void handle_timeout(struct work_struct *work) | |||
2530 | list_move_tail(&osd->o_keepalive_item, &slow_osds); | 2549 | list_move_tail(&osd->o_keepalive_item, &slow_osds); |
2531 | } | 2550 | } |
2532 | 2551 | ||
2552 | if (opts->osd_request_timeout) { | ||
2553 | for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) { | ||
2554 | struct ceph_osd_request *req = | ||
2555 | rb_entry(p, struct ceph_osd_request, r_node); | ||
2556 | |||
2557 | p = rb_next(p); /* abort_request() */ | ||
2558 | |||
2559 | if (time_before(req->r_start_stamp, expiry_cutoff)) { | ||
2560 | pr_err_ratelimited("tid %llu on osd%d timeout\n", | ||
2561 | req->r_tid, osdc->homeless_osd.o_osd); | ||
2562 | abort_request(req, -ETIMEDOUT); | ||
2563 | } | ||
2564 | } | ||
2565 | } | ||
2566 | |||
2533 | if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds)) | 2567 | if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds)) |
2534 | maybe_request_map(osdc); | 2568 | maybe_request_map(osdc); |
2535 | 2569 | ||
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 6824c0ec8373..ffe9e904d4d1 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -390,9 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
390 | dout("crush decode tunable chooseleaf_stable = %d\n", | 390 | dout("crush decode tunable chooseleaf_stable = %d\n", |
391 | c->chooseleaf_stable); | 391 | c->chooseleaf_stable); |
392 | 392 | ||
393 | crush_finalize(c); | ||
394 | |||
395 | done: | 393 | done: |
394 | crush_finalize(c); | ||
396 | dout("crush_decode success\n"); | 395 | dout("crush_decode success\n"); |
397 | return c; | 396 | return c; |
398 | 397 | ||
@@ -1380,7 +1379,6 @@ static int decode_new_up_state_weight(void **p, void *end, | |||
1380 | if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && | 1379 | if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && |
1381 | (xorstate & CEPH_OSD_EXISTS)) { | 1380 | (xorstate & CEPH_OSD_EXISTS)) { |
1382 | pr_info("osd%d does not exist\n", osd); | 1381 | pr_info("osd%d does not exist\n", osd); |
1383 | map->osd_weight[osd] = CEPH_OSD_IN; | ||
1384 | ret = set_primary_affinity(map, osd, | 1382 | ret = set_primary_affinity(map, osd, |
1385 | CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); | 1383 | CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); |
1386 | if (ret) | 1384 | if (ret) |