aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-08-04 13:15:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-08-04 13:15:11 -0400
commitc63716ab4d77f3df7d12260fc62cbf847c2a85d1 (patch)
treee87e666aab9be7caa1fcfd41a8bca5607c3795a4 /net
parenta64c40e79fb20c15e42e184d7cde30f900d138eb (diff)
parentae78dd8139ce93a528beb7f3914531b7a7be9e30 (diff)
Merge tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov: "A bunch of fixes and follow-ups for -rc1 Luminous patches: issues with ->reencode_message() and last minute RADOS semantic changes in v12.1.2" * tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client: libceph: make RECOVERY_DELETES feature create a new interval libceph: upmap semantic changes crush: assume weight_set != null imples weight_set_size > 0 libceph: fallback for when there isn't a pool-specific choose_arg libceph: don't call ->reencode_message() more than once per message libceph: make encode_request_*() work with r_mempool requests
Diffstat (limited to 'net')
-rw-r--r--net/ceph/crush/mapper.c2
-rw-r--r--net/ceph/messenger.c6
-rw-r--r--net/ceph/osd_client.c14
-rw-r--r--net/ceph/osdmap.c60
4 files changed, 45 insertions, 37 deletions
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 746b145bfd11..417df675c71b 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -306,7 +306,7 @@ static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket,
306 const struct crush_choose_arg *arg, 306 const struct crush_choose_arg *arg,
307 int position) 307 int position)
308{ 308{
309 if (!arg || !arg->weight_set || arg->weight_set_size == 0) 309 if (!arg || !arg->weight_set)
310 return bucket->item_weights; 310 return bucket->item_weights;
311 311
312 if (position >= arg->weight_set_size) 312 if (position >= arg->weight_set_size)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b7cc615d42ef..a67298c7e0cd 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1287,10 +1287,10 @@ static void prepare_write_message(struct ceph_connection *con)
1287 if (m->needs_out_seq) { 1287 if (m->needs_out_seq) {
1288 m->hdr.seq = cpu_to_le64(++con->out_seq); 1288 m->hdr.seq = cpu_to_le64(++con->out_seq);
1289 m->needs_out_seq = false; 1289 m->needs_out_seq = false;
1290 }
1291 1290
1292 if (con->ops->reencode_message) 1291 if (con->ops->reencode_message)
1293 con->ops->reencode_message(m); 1292 con->ops->reencode_message(m);
1293 }
1294 1294
1295 dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", 1295 dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
1296 m, con->out_seq, le16_to_cpu(m->hdr.type), 1296 m, con->out_seq, le16_to_cpu(m->hdr.type),
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 901bb8221366..dcfbdd74dfd1 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
1337 bool legacy_change; 1337 bool legacy_change;
1338 bool split = false; 1338 bool split = false;
1339 bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); 1339 bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
1340 bool recovery_deletes = ceph_osdmap_flag(osdc,
1341 CEPH_OSDMAP_RECOVERY_DELETES);
1340 enum calc_target_result ct_res; 1342 enum calc_target_result ct_res;
1341 int ret; 1343 int ret;
1342 1344
@@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
1399 pi->pg_num, 1401 pi->pg_num,
1400 t->sort_bitwise, 1402 t->sort_bitwise,
1401 sort_bitwise, 1403 sort_bitwise,
1404 t->recovery_deletes,
1405 recovery_deletes,
1402 &last_pgid)) 1406 &last_pgid))
1403 force_resend = true; 1407 force_resend = true;
1404 1408
@@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
1421 t->pg_num = pi->pg_num; 1425 t->pg_num = pi->pg_num;
1422 t->pg_num_mask = pi->pg_num_mask; 1426 t->pg_num_mask = pi->pg_num_mask;
1423 t->sort_bitwise = sort_bitwise; 1427 t->sort_bitwise = sort_bitwise;
1428 t->recovery_deletes = recovery_deletes;
1424 1429
1425 t->osd = acting.primary; 1430 t->osd = acting.primary;
1426 } 1431 }
@@ -1918,10 +1923,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
1918 } 1923 }
1919 1924
1920 ceph_encode_32(&p, req->r_attempts); /* retry_attempt */ 1925 ceph_encode_32(&p, req->r_attempts); /* retry_attempt */
1921 BUG_ON(p != end - 8); /* space for features */ 1926 BUG_ON(p > end - 8); /* space for features */
1922 1927
1923 msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */ 1928 msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */
1924 /* front_len is finalized in encode_request_finish() */ 1929 /* front_len is finalized in encode_request_finish() */
1930 msg->front.iov_len = p - msg->front.iov_base;
1931 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
1925 msg->hdr.data_len = cpu_to_le32(data_len); 1932 msg->hdr.data_len = cpu_to_le32(data_len);
1926 /* 1933 /*
1927 * The header "data_off" is a hint to the receiver allowing it 1934 * The header "data_off" is a hint to the receiver allowing it
@@ -1937,11 +1944,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
1937static void encode_request_finish(struct ceph_msg *msg) 1944static void encode_request_finish(struct ceph_msg *msg)
1938{ 1945{
1939 void *p = msg->front.iov_base; 1946 void *p = msg->front.iov_base;
1947 void *const partial_end = p + msg->front.iov_len;
1940 void *const end = p + msg->front_alloc_len; 1948 void *const end = p + msg->front_alloc_len;
1941 1949
1942 if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) { 1950 if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) {
1943 /* luminous OSD -- encode features and be done */ 1951 /* luminous OSD -- encode features and be done */
1944 p = end - 8; 1952 p = partial_end;
1945 ceph_encode_64(&p, msg->con->peer_features); 1953 ceph_encode_64(&p, msg->con->peer_features);
1946 } else { 1954 } else {
1947 struct { 1955 struct {
@@ -1984,7 +1992,7 @@ static void encode_request_finish(struct ceph_msg *msg)
1984 oid_len = p - oid; 1992 oid_len = p - oid;
1985 1993
1986 tail = p; 1994 tail = p;
1987 tail_len = (end - p) - 8; 1995 tail_len = partial_end - p;
1988 1996
1989 p = msg->front.iov_base; 1997 p = msg->front.iov_base;
1990 ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc)); 1998 ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc));
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 64ae9f89773a..f358d0bfa76b 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -295,6 +295,10 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c)
295 ret = decode_choose_arg(p, end, arg); 295 ret = decode_choose_arg(p, end, arg);
296 if (ret) 296 if (ret)
297 goto fail; 297 goto fail;
298
299 if (arg->ids_size &&
300 arg->ids_size != c->buckets[bucket_index]->size)
301 goto e_inval;
298 } 302 }
299 303
300 insert_choose_arg_map(&c->choose_args, arg_map); 304 insert_choose_arg_map(&c->choose_args, arg_map);
@@ -2078,6 +2082,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
2078 u32 new_pg_num, 2082 u32 new_pg_num,
2079 bool old_sort_bitwise, 2083 bool old_sort_bitwise,
2080 bool new_sort_bitwise, 2084 bool new_sort_bitwise,
2085 bool old_recovery_deletes,
2086 bool new_recovery_deletes,
2081 const struct ceph_pg *pgid) 2087 const struct ceph_pg *pgid)
2082{ 2088{
2083 return !osds_equal(old_acting, new_acting) || 2089 return !osds_equal(old_acting, new_acting) ||
@@ -2085,7 +2091,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
2085 old_size != new_size || 2091 old_size != new_size ||
2086 old_min_size != new_min_size || 2092 old_min_size != new_min_size ||
2087 ceph_pg_is_split(pgid, old_pg_num, new_pg_num) || 2093 ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
2088 old_sort_bitwise != new_sort_bitwise; 2094 old_sort_bitwise != new_sort_bitwise ||
2095 old_recovery_deletes != new_recovery_deletes;
2089} 2096}
2090 2097
2091static int calc_pg_rank(int osd, const struct ceph_osds *acting) 2098static int calc_pg_rank(int osd, const struct ceph_osds *acting)
@@ -2301,10 +2308,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
2301 } 2308 }
2302} 2309}
2303 2310
2311/*
2312 * Magic value used for a "default" fallback choose_args, used if the
2313 * crush_choose_arg_map passed to do_crush() does not exist. If this
2314 * also doesn't exist, fall back to canonical weights.
2315 */
2316#define CEPH_DEFAULT_CHOOSE_ARGS -1
2317
2304static int do_crush(struct ceph_osdmap *map, int ruleno, int x, 2318static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
2305 int *result, int result_max, 2319 int *result, int result_max,
2306 const __u32 *weight, int weight_max, 2320 const __u32 *weight, int weight_max,
2307 u64 choose_args_index) 2321 s64 choose_args_index)
2308{ 2322{
2309 struct crush_choose_arg_map *arg_map; 2323 struct crush_choose_arg_map *arg_map;
2310 int r; 2324 int r;
@@ -2313,6 +2327,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
2313 2327
2314 arg_map = lookup_choose_arg_map(&map->crush->choose_args, 2328 arg_map = lookup_choose_arg_map(&map->crush->choose_args,
2315 choose_args_index); 2329 choose_args_index);
2330 if (!arg_map)
2331 arg_map = lookup_choose_arg_map(&map->crush->choose_args,
2332 CEPH_DEFAULT_CHOOSE_ARGS);
2316 2333
2317 mutex_lock(&map->crush_workspace_mutex); 2334 mutex_lock(&map->crush_workspace_mutex);
2318 r = crush_do_rule(map->crush, ruleno, x, result, result_max, 2335 r = crush_do_rule(map->crush, ruleno, x, result, result_max,
@@ -2423,40 +2440,23 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
2423 for (i = 0; i < pg->pg_upmap.len; i++) 2440 for (i = 0; i < pg->pg_upmap.len; i++)
2424 raw->osds[i] = pg->pg_upmap.osds[i]; 2441 raw->osds[i] = pg->pg_upmap.osds[i];
2425 raw->size = pg->pg_upmap.len; 2442 raw->size = pg->pg_upmap.len;
2426 return; 2443 /* check and apply pg_upmap_items, if any */
2427 } 2444 }
2428 2445
2429 pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid); 2446 pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
2430 if (pg) { 2447 if (pg) {
2431 /* 2448 for (i = 0; i < raw->size; i++) {
2432 * Note: this approach does not allow a bidirectional swap, 2449 for (j = 0; j < pg->pg_upmap_items.len; j++) {
2433 * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1]. 2450 int from = pg->pg_upmap_items.from_to[j][0];
2434 */ 2451 int to = pg->pg_upmap_items.from_to[j][1];
2435 for (i = 0; i < pg->pg_upmap_items.len; i++) { 2452
2436 int from = pg->pg_upmap_items.from_to[i][0]; 2453 if (from == raw->osds[i]) {
2437 int to = pg->pg_upmap_items.from_to[i][1]; 2454 if (!(to != CRUSH_ITEM_NONE &&
2438 int pos = -1; 2455 to < osdmap->max_osd &&
2439 bool exists = false; 2456 osdmap->osd_weight[to] == 0))
2440 2457 raw->osds[i] = to;
2441 /* make sure replacement doesn't already appear */
2442 for (j = 0; j < raw->size; j++) {
2443 int osd = raw->osds[j];
2444
2445 if (osd == to) {
2446 exists = true;
2447 break; 2458 break;
2448 } 2459 }
2449 /* ignore mapping if target is marked out */
2450 if (osd == from && pos < 0 &&
2451 !(to != CRUSH_ITEM_NONE &&
2452 to < osdmap->max_osd &&
2453 osdmap->osd_weight[to] == 0)) {
2454 pos = j;
2455 }
2456 }
2457 if (!exists && pos >= 0) {
2458 raw->osds[pos] = to;
2459 return;
2460 } 2460 }
2461 } 2461 }
2462 } 2462 }