diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-08-04 13:15:11 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-08-04 13:15:11 -0400 |
| commit | c63716ab4d77f3df7d12260fc62cbf847c2a85d1 (patch) | |
| tree | e87e666aab9be7caa1fcfd41a8bca5607c3795a4 /net | |
| parent | a64c40e79fb20c15e42e184d7cde30f900d138eb (diff) | |
| parent | ae78dd8139ce93a528beb7f3914531b7a7be9e30 (diff) | |
Merge tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov:
"A bunch of fixes and follow-ups for -rc1 Luminous patches: issues with
->reencode_message() and last minute RADOS semantic changes in
v12.1.2"
* tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client:
libceph: make RECOVERY_DELETES feature create a new interval
libceph: upmap semantic changes
crush: assume weight_set != null imples weight_set_size > 0
libceph: fallback for when there isn't a pool-specific choose_arg
libceph: don't call ->reencode_message() more than once per message
libceph: make encode_request_*() work with r_mempool requests
Diffstat (limited to 'net')
| -rw-r--r-- | net/ceph/crush/mapper.c | 2 | ||||
| -rw-r--r-- | net/ceph/messenger.c | 6 | ||||
| -rw-r--r-- | net/ceph/osd_client.c | 14 | ||||
| -rw-r--r-- | net/ceph/osdmap.c | 60 |
4 files changed, 45 insertions, 37 deletions
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 746b145bfd11..417df675c71b 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c | |||
| @@ -306,7 +306,7 @@ static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket, | |||
| 306 | const struct crush_choose_arg *arg, | 306 | const struct crush_choose_arg *arg, |
| 307 | int position) | 307 | int position) |
| 308 | { | 308 | { |
| 309 | if (!arg || !arg->weight_set || arg->weight_set_size == 0) | 309 | if (!arg || !arg->weight_set) |
| 310 | return bucket->item_weights; | 310 | return bucket->item_weights; |
| 311 | 311 | ||
| 312 | if (position >= arg->weight_set_size) | 312 | if (position >= arg->weight_set_size) |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b7cc615d42ef..a67298c7e0cd 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -1287,10 +1287,10 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 1287 | if (m->needs_out_seq) { | 1287 | if (m->needs_out_seq) { |
| 1288 | m->hdr.seq = cpu_to_le64(++con->out_seq); | 1288 | m->hdr.seq = cpu_to_le64(++con->out_seq); |
| 1289 | m->needs_out_seq = false; | 1289 | m->needs_out_seq = false; |
| 1290 | } | ||
| 1291 | 1290 | ||
| 1292 | if (con->ops->reencode_message) | 1291 | if (con->ops->reencode_message) |
| 1293 | con->ops->reencode_message(m); | 1292 | con->ops->reencode_message(m); |
| 1293 | } | ||
| 1294 | 1294 | ||
| 1295 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", | 1295 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", |
| 1296 | m, con->out_seq, le16_to_cpu(m->hdr.type), | 1296 | m, con->out_seq, le16_to_cpu(m->hdr.type), |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 901bb8221366..dcfbdd74dfd1 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
| @@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, | |||
| 1337 | bool legacy_change; | 1337 | bool legacy_change; |
| 1338 | bool split = false; | 1338 | bool split = false; |
| 1339 | bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); | 1339 | bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); |
| 1340 | bool recovery_deletes = ceph_osdmap_flag(osdc, | ||
| 1341 | CEPH_OSDMAP_RECOVERY_DELETES); | ||
| 1340 | enum calc_target_result ct_res; | 1342 | enum calc_target_result ct_res; |
| 1341 | int ret; | 1343 | int ret; |
| 1342 | 1344 | ||
| @@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, | |||
| 1399 | pi->pg_num, | 1401 | pi->pg_num, |
| 1400 | t->sort_bitwise, | 1402 | t->sort_bitwise, |
| 1401 | sort_bitwise, | 1403 | sort_bitwise, |
| 1404 | t->recovery_deletes, | ||
| 1405 | recovery_deletes, | ||
| 1402 | &last_pgid)) | 1406 | &last_pgid)) |
| 1403 | force_resend = true; | 1407 | force_resend = true; |
| 1404 | 1408 | ||
| @@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, | |||
| 1421 | t->pg_num = pi->pg_num; | 1425 | t->pg_num = pi->pg_num; |
| 1422 | t->pg_num_mask = pi->pg_num_mask; | 1426 | t->pg_num_mask = pi->pg_num_mask; |
| 1423 | t->sort_bitwise = sort_bitwise; | 1427 | t->sort_bitwise = sort_bitwise; |
| 1428 | t->recovery_deletes = recovery_deletes; | ||
| 1424 | 1429 | ||
| 1425 | t->osd = acting.primary; | 1430 | t->osd = acting.primary; |
| 1426 | } | 1431 | } |
| @@ -1918,10 +1923,12 @@ static void encode_request_partial(struct ceph_osd_request *req, | |||
| 1918 | } | 1923 | } |
| 1919 | 1924 | ||
| 1920 | ceph_encode_32(&p, req->r_attempts); /* retry_attempt */ | 1925 | ceph_encode_32(&p, req->r_attempts); /* retry_attempt */ |
| 1921 | BUG_ON(p != end - 8); /* space for features */ | 1926 | BUG_ON(p > end - 8); /* space for features */ |
| 1922 | 1927 | ||
| 1923 | msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */ | 1928 | msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */ |
| 1924 | /* front_len is finalized in encode_request_finish() */ | 1929 | /* front_len is finalized in encode_request_finish() */ |
| 1930 | msg->front.iov_len = p - msg->front.iov_base; | ||
| 1931 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | ||
| 1925 | msg->hdr.data_len = cpu_to_le32(data_len); | 1932 | msg->hdr.data_len = cpu_to_le32(data_len); |
| 1926 | /* | 1933 | /* |
| 1927 | * The header "data_off" is a hint to the receiver allowing it | 1934 | * The header "data_off" is a hint to the receiver allowing it |
| @@ -1937,11 +1944,12 @@ static void encode_request_partial(struct ceph_osd_request *req, | |||
| 1937 | static void encode_request_finish(struct ceph_msg *msg) | 1944 | static void encode_request_finish(struct ceph_msg *msg) |
| 1938 | { | 1945 | { |
| 1939 | void *p = msg->front.iov_base; | 1946 | void *p = msg->front.iov_base; |
| 1947 | void *const partial_end = p + msg->front.iov_len; | ||
| 1940 | void *const end = p + msg->front_alloc_len; | 1948 | void *const end = p + msg->front_alloc_len; |
| 1941 | 1949 | ||
| 1942 | if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) { | 1950 | if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) { |
| 1943 | /* luminous OSD -- encode features and be done */ | 1951 | /* luminous OSD -- encode features and be done */ |
| 1944 | p = end - 8; | 1952 | p = partial_end; |
| 1945 | ceph_encode_64(&p, msg->con->peer_features); | 1953 | ceph_encode_64(&p, msg->con->peer_features); |
| 1946 | } else { | 1954 | } else { |
| 1947 | struct { | 1955 | struct { |
| @@ -1984,7 +1992,7 @@ static void encode_request_finish(struct ceph_msg *msg) | |||
| 1984 | oid_len = p - oid; | 1992 | oid_len = p - oid; |
| 1985 | 1993 | ||
| 1986 | tail = p; | 1994 | tail = p; |
| 1987 | tail_len = (end - p) - 8; | 1995 | tail_len = partial_end - p; |
| 1988 | 1996 | ||
| 1989 | p = msg->front.iov_base; | 1997 | p = msg->front.iov_base; |
| 1990 | ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc)); | 1998 | ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc)); |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 64ae9f89773a..f358d0bfa76b 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
| @@ -295,6 +295,10 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c) | |||
| 295 | ret = decode_choose_arg(p, end, arg); | 295 | ret = decode_choose_arg(p, end, arg); |
| 296 | if (ret) | 296 | if (ret) |
| 297 | goto fail; | 297 | goto fail; |
| 298 | |||
| 299 | if (arg->ids_size && | ||
| 300 | arg->ids_size != c->buckets[bucket_index]->size) | ||
| 301 | goto e_inval; | ||
| 298 | } | 302 | } |
| 299 | 303 | ||
| 300 | insert_choose_arg_map(&c->choose_args, arg_map); | 304 | insert_choose_arg_map(&c->choose_args, arg_map); |
| @@ -2078,6 +2082,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting, | |||
| 2078 | u32 new_pg_num, | 2082 | u32 new_pg_num, |
| 2079 | bool old_sort_bitwise, | 2083 | bool old_sort_bitwise, |
| 2080 | bool new_sort_bitwise, | 2084 | bool new_sort_bitwise, |
| 2085 | bool old_recovery_deletes, | ||
| 2086 | bool new_recovery_deletes, | ||
| 2081 | const struct ceph_pg *pgid) | 2087 | const struct ceph_pg *pgid) |
| 2082 | { | 2088 | { |
| 2083 | return !osds_equal(old_acting, new_acting) || | 2089 | return !osds_equal(old_acting, new_acting) || |
| @@ -2085,7 +2091,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting, | |||
| 2085 | old_size != new_size || | 2091 | old_size != new_size || |
| 2086 | old_min_size != new_min_size || | 2092 | old_min_size != new_min_size || |
| 2087 | ceph_pg_is_split(pgid, old_pg_num, new_pg_num) || | 2093 | ceph_pg_is_split(pgid, old_pg_num, new_pg_num) || |
| 2088 | old_sort_bitwise != new_sort_bitwise; | 2094 | old_sort_bitwise != new_sort_bitwise || |
| 2095 | old_recovery_deletes != new_recovery_deletes; | ||
| 2089 | } | 2096 | } |
| 2090 | 2097 | ||
| 2091 | static int calc_pg_rank(int osd, const struct ceph_osds *acting) | 2098 | static int calc_pg_rank(int osd, const struct ceph_osds *acting) |
| @@ -2301,10 +2308,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi, | |||
| 2301 | } | 2308 | } |
| 2302 | } | 2309 | } |
| 2303 | 2310 | ||
| 2311 | /* | ||
| 2312 | * Magic value used for a "default" fallback choose_args, used if the | ||
| 2313 | * crush_choose_arg_map passed to do_crush() does not exist. If this | ||
| 2314 | * also doesn't exist, fall back to canonical weights. | ||
| 2315 | */ | ||
| 2316 | #define CEPH_DEFAULT_CHOOSE_ARGS -1 | ||
| 2317 | |||
| 2304 | static int do_crush(struct ceph_osdmap *map, int ruleno, int x, | 2318 | static int do_crush(struct ceph_osdmap *map, int ruleno, int x, |
| 2305 | int *result, int result_max, | 2319 | int *result, int result_max, |
| 2306 | const __u32 *weight, int weight_max, | 2320 | const __u32 *weight, int weight_max, |
| 2307 | u64 choose_args_index) | 2321 | s64 choose_args_index) |
| 2308 | { | 2322 | { |
| 2309 | struct crush_choose_arg_map *arg_map; | 2323 | struct crush_choose_arg_map *arg_map; |
| 2310 | int r; | 2324 | int r; |
| @@ -2313,6 +2327,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x, | |||
| 2313 | 2327 | ||
| 2314 | arg_map = lookup_choose_arg_map(&map->crush->choose_args, | 2328 | arg_map = lookup_choose_arg_map(&map->crush->choose_args, |
| 2315 | choose_args_index); | 2329 | choose_args_index); |
| 2330 | if (!arg_map) | ||
| 2331 | arg_map = lookup_choose_arg_map(&map->crush->choose_args, | ||
| 2332 | CEPH_DEFAULT_CHOOSE_ARGS); | ||
| 2316 | 2333 | ||
| 2317 | mutex_lock(&map->crush_workspace_mutex); | 2334 | mutex_lock(&map->crush_workspace_mutex); |
| 2318 | r = crush_do_rule(map->crush, ruleno, x, result, result_max, | 2335 | r = crush_do_rule(map->crush, ruleno, x, result, result_max, |
| @@ -2423,40 +2440,23 @@ static void apply_upmap(struct ceph_osdmap *osdmap, | |||
| 2423 | for (i = 0; i < pg->pg_upmap.len; i++) | 2440 | for (i = 0; i < pg->pg_upmap.len; i++) |
| 2424 | raw->osds[i] = pg->pg_upmap.osds[i]; | 2441 | raw->osds[i] = pg->pg_upmap.osds[i]; |
| 2425 | raw->size = pg->pg_upmap.len; | 2442 | raw->size = pg->pg_upmap.len; |
| 2426 | return; | 2443 | /* check and apply pg_upmap_items, if any */ |
| 2427 | } | 2444 | } |
| 2428 | 2445 | ||
| 2429 | pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid); | 2446 | pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid); |
| 2430 | if (pg) { | 2447 | if (pg) { |
| 2431 | /* | 2448 | for (i = 0; i < raw->size; i++) { |
| 2432 | * Note: this approach does not allow a bidirectional swap, | 2449 | for (j = 0; j < pg->pg_upmap_items.len; j++) { |
| 2433 | * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1]. | 2450 | int from = pg->pg_upmap_items.from_to[j][0]; |
| 2434 | */ | 2451 | int to = pg->pg_upmap_items.from_to[j][1]; |
| 2435 | for (i = 0; i < pg->pg_upmap_items.len; i++) { | 2452 | |
| 2436 | int from = pg->pg_upmap_items.from_to[i][0]; | 2453 | if (from == raw->osds[i]) { |
| 2437 | int to = pg->pg_upmap_items.from_to[i][1]; | 2454 | if (!(to != CRUSH_ITEM_NONE && |
| 2438 | int pos = -1; | 2455 | to < osdmap->max_osd && |
| 2439 | bool exists = false; | 2456 | osdmap->osd_weight[to] == 0)) |
| 2440 | 2457 | raw->osds[i] = to; | |
| 2441 | /* make sure replacement doesn't already appear */ | ||
| 2442 | for (j = 0; j < raw->size; j++) { | ||
| 2443 | int osd = raw->osds[j]; | ||
| 2444 | |||
| 2445 | if (osd == to) { | ||
| 2446 | exists = true; | ||
| 2447 | break; | 2458 | break; |
| 2448 | } | 2459 | } |
| 2449 | /* ignore mapping if target is marked out */ | ||
| 2450 | if (osd == from && pos < 0 && | ||
| 2451 | !(to != CRUSH_ITEM_NONE && | ||
| 2452 | to < osdmap->max_osd && | ||
| 2453 | osdmap->osd_weight[to] == 0)) { | ||
| 2454 | pos = j; | ||
| 2455 | } | ||
| 2456 | } | ||
| 2457 | if (!exists && pos >= 0) { | ||
| 2458 | raw->osds[pos] = to; | ||
| 2459 | return; | ||
| 2460 | } | 2460 | } |
| 2461 | } | 2461 | } |
| 2462 | } | 2462 | } |
