aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-03-10 14:05:47 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-03-10 14:05:47 -0500
commit24c534bb161b8c95f261b34cbe44bbd043e829fc (patch)
tree5039615326276e24f898171acdd8c012e6a6db73
parent2baf38095cd79bb18108bc6131aec84ea85ae368 (diff)
parent7cc5e38f2f0b0b58a22a4c18a56348dd99a71270 (diff)
Merge tag 'ceph-for-4.11-rc2' of git://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov: - a fix for the recently discovered misdirected requests bug present in jewel and later on the server side and all stable kernels - a fixup for -rc1 CRUSH changes - two usability enhancements: osd_request_timeout option and supported_features bus attribute. * tag 'ceph-for-4.11-rc2' of git://github.com/ceph/ceph-client: libceph: osd_request_timeout option rbd: supported_features bus attribute libceph: don't set weight to IN when OSD is destroyed libceph: fix crush_decode() for older maps
-rw-r--r--drivers/block/rbd.c16
-rw-r--r--include/linux/ceph/libceph.h2
-rw-r--r--include/linux/ceph/osd_client.h1
-rw-r--r--net/ceph/ceph_common.c15
-rw-r--r--net/ceph/osd_client.c36
-rw-r--r--net/ceph/osdmap.c4
6 files changed, 66 insertions, 8 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4d6807723798..517838b65964 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -120,10 +120,11 @@ static int atomic_dec_return_safe(atomic_t *v)
120 120
121/* Feature bits */ 121/* Feature bits */
122 122
123#define RBD_FEATURE_LAYERING (1<<0) 123#define RBD_FEATURE_LAYERING (1ULL<<0)
124#define RBD_FEATURE_STRIPINGV2 (1<<1) 124#define RBD_FEATURE_STRIPINGV2 (1ULL<<1)
125#define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2) 125#define RBD_FEATURE_EXCLUSIVE_LOCK (1ULL<<2)
126#define RBD_FEATURE_DATA_POOL (1<<7) 126#define RBD_FEATURE_DATA_POOL (1ULL<<7)
127
127#define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \ 128#define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \
128 RBD_FEATURE_STRIPINGV2 | \ 129 RBD_FEATURE_STRIPINGV2 | \
129 RBD_FEATURE_EXCLUSIVE_LOCK | \ 130 RBD_FEATURE_EXCLUSIVE_LOCK | \
@@ -499,16 +500,23 @@ static bool rbd_is_lock_owner(struct rbd_device *rbd_dev)
499 return is_lock_owner; 500 return is_lock_owner;
500} 501}
501 502
503static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf)
504{
505 return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
506}
507
502static BUS_ATTR(add, S_IWUSR, NULL, rbd_add); 508static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
503static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove); 509static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
504static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major); 510static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
505static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major); 511static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
512static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL);
506 513
507static struct attribute *rbd_bus_attrs[] = { 514static struct attribute *rbd_bus_attrs[] = {
508 &bus_attr_add.attr, 515 &bus_attr_add.attr,
509 &bus_attr_remove.attr, 516 &bus_attr_remove.attr,
510 &bus_attr_add_single_major.attr, 517 &bus_attr_add_single_major.attr,
511 &bus_attr_remove_single_major.attr, 518 &bus_attr_remove_single_major.attr,
519 &bus_attr_supported_features.attr,
512 NULL, 520 NULL,
513}; 521};
514 522
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 1816c5e26581..88cd5dc8e238 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -48,6 +48,7 @@ struct ceph_options {
48 unsigned long mount_timeout; /* jiffies */ 48 unsigned long mount_timeout; /* jiffies */
49 unsigned long osd_idle_ttl; /* jiffies */ 49 unsigned long osd_idle_ttl; /* jiffies */
50 unsigned long osd_keepalive_timeout; /* jiffies */ 50 unsigned long osd_keepalive_timeout; /* jiffies */
51 unsigned long osd_request_timeout; /* jiffies */
51 52
52 /* 53 /*
53 * any type that can't be simply compared or doesn't need need 54 * any type that can't be simply compared or doesn't need need
@@ -68,6 +69,7 @@ struct ceph_options {
68#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) 69#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
69#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) 70#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
70#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) 71#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
72#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */
71 73
72#define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000) 74#define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000)
73#define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000) 75#define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000)
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 2ea0c282f3dc..c125b5d9e13c 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -189,6 +189,7 @@ struct ceph_osd_request {
189 189
190 /* internal */ 190 /* internal */
191 unsigned long r_stamp; /* jiffies, send or check time */ 191 unsigned long r_stamp; /* jiffies, send or check time */
192 unsigned long r_start_stamp; /* jiffies */
192 int r_attempts; 193 int r_attempts;
193 struct ceph_eversion r_replay_version; /* aka reassert_version */ 194 struct ceph_eversion r_replay_version; /* aka reassert_version */
194 u32 r_last_force_resend; 195 u32 r_last_force_resend;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 464e88599b9d..108533859a53 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -230,6 +230,7 @@ enum {
230 Opt_osdkeepalivetimeout, 230 Opt_osdkeepalivetimeout,
231 Opt_mount_timeout, 231 Opt_mount_timeout,
232 Opt_osd_idle_ttl, 232 Opt_osd_idle_ttl,
233 Opt_osd_request_timeout,
233 Opt_last_int, 234 Opt_last_int,
234 /* int args above */ 235 /* int args above */
235 Opt_fsid, 236 Opt_fsid,
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
256 {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, 257 {Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
257 {Opt_mount_timeout, "mount_timeout=%d"}, 258 {Opt_mount_timeout, "mount_timeout=%d"},
258 {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, 259 {Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
260 {Opt_osd_request_timeout, "osd_request_timeout=%d"},
259 /* int args above */ 261 /* int args above */
260 {Opt_fsid, "fsid=%s"}, 262 {Opt_fsid, "fsid=%s"},
261 {Opt_name, "name=%s"}, 263 {Opt_name, "name=%s"},
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
361 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 363 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
362 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; 364 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
363 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; 365 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
366 opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
364 367
365 /* get mon ip(s) */ 368 /* get mon ip(s) */
366 /* ip1[:port1][,ip2[:port2]...] */ 369 /* ip1[:port1][,ip2[:port2]...] */
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
473 } 476 }
474 opt->mount_timeout = msecs_to_jiffies(intval * 1000); 477 opt->mount_timeout = msecs_to_jiffies(intval * 1000);
475 break; 478 break;
479 case Opt_osd_request_timeout:
480 /* 0 is "wait forever" (i.e. infinite timeout) */
481 if (intval < 0 || intval > INT_MAX / 1000) {
482 pr_err("osd_request_timeout out of range\n");
483 err = -EINVAL;
484 goto out;
485 }
486 opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
487 break;
476 488
477 case Opt_share: 489 case Opt_share:
478 opt->flags &= ~CEPH_OPT_NOSHARE; 490 opt->flags &= ~CEPH_OPT_NOSHARE;
@@ -557,6 +569,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
557 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) 569 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
558 seq_printf(m, "osdkeepalivetimeout=%d,", 570 seq_printf(m, "osdkeepalivetimeout=%d,",
559 jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000); 571 jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
572 if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT)
573 seq_printf(m, "osd_request_timeout=%d,",
574 jiffies_to_msecs(opt->osd_request_timeout) / 1000);
560 575
561 /* drop redundant comma */ 576 /* drop redundant comma */
562 if (m->count != pos) 577 if (m->count != pos)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b65bbf9f45eb..e15ea9e4c495 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1709,6 +1709,8 @@ static void account_request(struct ceph_osd_request *req)
1709 1709
1710 req->r_flags |= CEPH_OSD_FLAG_ONDISK; 1710 req->r_flags |= CEPH_OSD_FLAG_ONDISK;
1711 atomic_inc(&req->r_osdc->num_requests); 1711 atomic_inc(&req->r_osdc->num_requests);
1712
1713 req->r_start_stamp = jiffies;
1712} 1714}
1713 1715
1714static void submit_request(struct ceph_osd_request *req, bool wrlocked) 1716static void submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -1789,6 +1791,14 @@ static void cancel_request(struct ceph_osd_request *req)
1789 ceph_osdc_put_request(req); 1791 ceph_osdc_put_request(req);
1790} 1792}
1791 1793
1794static void abort_request(struct ceph_osd_request *req, int err)
1795{
1796 dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err);
1797
1798 cancel_map_check(req);
1799 complete_request(req, err);
1800}
1801
1792static void check_pool_dne(struct ceph_osd_request *req) 1802static void check_pool_dne(struct ceph_osd_request *req)
1793{ 1803{
1794 struct ceph_osd_client *osdc = req->r_osdc; 1804 struct ceph_osd_client *osdc = req->r_osdc;
@@ -2487,6 +2497,7 @@ static void handle_timeout(struct work_struct *work)
2487 container_of(work, struct ceph_osd_client, timeout_work.work); 2497 container_of(work, struct ceph_osd_client, timeout_work.work);
2488 struct ceph_options *opts = osdc->client->options; 2498 struct ceph_options *opts = osdc->client->options;
2489 unsigned long cutoff = jiffies - opts->osd_keepalive_timeout; 2499 unsigned long cutoff = jiffies - opts->osd_keepalive_timeout;
2500 unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout;
2490 LIST_HEAD(slow_osds); 2501 LIST_HEAD(slow_osds);
2491 struct rb_node *n, *p; 2502 struct rb_node *n, *p;
2492 2503
@@ -2502,15 +2513,23 @@ static void handle_timeout(struct work_struct *work)
2502 struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); 2513 struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
2503 bool found = false; 2514 bool found = false;
2504 2515
2505 for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) { 2516 for (p = rb_first(&osd->o_requests); p; ) {
2506 struct ceph_osd_request *req = 2517 struct ceph_osd_request *req =
2507 rb_entry(p, struct ceph_osd_request, r_node); 2518 rb_entry(p, struct ceph_osd_request, r_node);
2508 2519
2520 p = rb_next(p); /* abort_request() */
2521
2509 if (time_before(req->r_stamp, cutoff)) { 2522 if (time_before(req->r_stamp, cutoff)) {
2510 dout(" req %p tid %llu on osd%d is laggy\n", 2523 dout(" req %p tid %llu on osd%d is laggy\n",
2511 req, req->r_tid, osd->o_osd); 2524 req, req->r_tid, osd->o_osd);
2512 found = true; 2525 found = true;
2513 } 2526 }
2527 if (opts->osd_request_timeout &&
2528 time_before(req->r_start_stamp, expiry_cutoff)) {
2529 pr_err_ratelimited("tid %llu on osd%d timeout\n",
2530 req->r_tid, osd->o_osd);
2531 abort_request(req, -ETIMEDOUT);
2532 }
2514 } 2533 }
2515 for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) { 2534 for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) {
2516 struct ceph_osd_linger_request *lreq = 2535 struct ceph_osd_linger_request *lreq =
@@ -2530,6 +2549,21 @@ static void handle_timeout(struct work_struct *work)
2530 list_move_tail(&osd->o_keepalive_item, &slow_osds); 2549 list_move_tail(&osd->o_keepalive_item, &slow_osds);
2531 } 2550 }
2532 2551
2552 if (opts->osd_request_timeout) {
2553 for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) {
2554 struct ceph_osd_request *req =
2555 rb_entry(p, struct ceph_osd_request, r_node);
2556
2557 p = rb_next(p); /* abort_request() */
2558
2559 if (time_before(req->r_start_stamp, expiry_cutoff)) {
2560 pr_err_ratelimited("tid %llu on osd%d timeout\n",
2561 req->r_tid, osdc->homeless_osd.o_osd);
2562 abort_request(req, -ETIMEDOUT);
2563 }
2564 }
2565 }
2566
2533 if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds)) 2567 if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds))
2534 maybe_request_map(osdc); 2568 maybe_request_map(osdc);
2535 2569
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 6824c0ec8373..ffe9e904d4d1 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -390,9 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
390 dout("crush decode tunable chooseleaf_stable = %d\n", 390 dout("crush decode tunable chooseleaf_stable = %d\n",
391 c->chooseleaf_stable); 391 c->chooseleaf_stable);
392 392
393 crush_finalize(c);
394
395done: 393done:
394 crush_finalize(c);
396 dout("crush_decode success\n"); 395 dout("crush_decode success\n");
397 return c; 396 return c;
398 397
@@ -1380,7 +1379,6 @@ static int decode_new_up_state_weight(void **p, void *end,
1380 if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && 1379 if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
1381 (xorstate & CEPH_OSD_EXISTS)) { 1380 (xorstate & CEPH_OSD_EXISTS)) {
1382 pr_info("osd%d does not exist\n", osd); 1381 pr_info("osd%d does not exist\n", osd);
1383 map->osd_weight[osd] = CEPH_OSD_IN;
1384 ret = set_primary_affinity(map, osd, 1382 ret = set_primary_affinity(map, osd,
1385 CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); 1383 CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
1386 if (ret) 1384 if (ret)