aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2019-09-04 09:40:03 -0400
committerIlya Dryomov <idryomov@gmail.com>2019-09-16 06:06:25 -0400
commitcf73d882cc51c1f245a890cccb79952a260302d3 (patch)
treeb6d86b83c52de7f8349a30a9a2074db755f4dc7d /net/ceph
parent10c12851a022662bf6085bd4384b4ebed4c447ce (diff)
libceph: use ceph_kvmalloc() for osdmap arrays
osdmap has a bunch of arrays that grow linearly with the number of OSDs. osd_state, osd_weight and osd_primary_affinity take 4 bytes per OSD. osd_addr takes 136 bytes per OSD because of sockaddr_storage. The CRUSH workspace area also grows linearly with the number of OSDs. Normally these arrays are allocated at client startup. The osdmap is usually updated in small incrementals, but once in a while a full map may need to be processed. For a cluster with 10000 OSDs, this means a bunch of 40K allocations followed by a 1.3M allocation, all of which are currently required to be physically contiguous. This results in sporadic ENOMEM errors, hanging the client. Go back to manually (re)allocating arrays and use ceph_kvmalloc() to fall back to non-contiguous allocation when necessary. Link: https://tracker.ceph.com/issues/40481 Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Jeff Layton <jlayton@kernel.org>
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/osdmap.c69
1 files changed, 43 insertions, 26 deletions
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 90437906b7bc..4e0de14f80bb 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -973,11 +973,11 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
973 struct ceph_pg_pool_info, node); 973 struct ceph_pg_pool_info, node);
974 __remove_pg_pool(&map->pg_pools, pi); 974 __remove_pg_pool(&map->pg_pools, pi);
975 } 975 }
976 kfree(map->osd_state); 976 kvfree(map->osd_state);
977 kfree(map->osd_weight); 977 kvfree(map->osd_weight);
978 kfree(map->osd_addr); 978 kvfree(map->osd_addr);
979 kfree(map->osd_primary_affinity); 979 kvfree(map->osd_primary_affinity);
980 kfree(map->crush_workspace); 980 kvfree(map->crush_workspace);
981 kfree(map); 981 kfree(map);
982} 982}
983 983
@@ -986,28 +986,41 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
986 * 986 *
987 * The new elements are properly initialized. 987 * The new elements are properly initialized.
988 */ 988 */
989static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) 989static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
990{ 990{
991 u32 *state; 991 u32 *state;
992 u32 *weight; 992 u32 *weight;
993 struct ceph_entity_addr *addr; 993 struct ceph_entity_addr *addr;
994 u32 to_copy;
994 int i; 995 int i;
995 996
996 state = krealloc(map->osd_state, max*sizeof(*state), GFP_NOFS); 997 dout("%s old %u new %u\n", __func__, map->max_osd, max);
997 if (!state) 998 if (max == map->max_osd)
998 return -ENOMEM; 999 return 0;
999 map->osd_state = state;
1000 1000
1001 weight = krealloc(map->osd_weight, max*sizeof(*weight), GFP_NOFS); 1001 state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
1002 if (!weight) 1002 weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
1003 addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
1004 if (!state || !weight || !addr) {
1005 kvfree(state);
1006 kvfree(weight);
1007 kvfree(addr);
1003 return -ENOMEM; 1008 return -ENOMEM;
1004 map->osd_weight = weight; 1009 }
1005 1010
1006 addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS); 1011 to_copy = min(map->max_osd, max);
1007 if (!addr) 1012 if (map->osd_state) {
1008 return -ENOMEM; 1013 memcpy(state, map->osd_state, to_copy * sizeof(*state));
1009 map->osd_addr = addr; 1014 memcpy(weight, map->osd_weight, to_copy * sizeof(*weight));
1015 memcpy(addr, map->osd_addr, to_copy * sizeof(*addr));
1016 kvfree(map->osd_state);
1017 kvfree(map->osd_weight);
1018 kvfree(map->osd_addr);
1019 }
1010 1020
1021 map->osd_state = state;
1022 map->osd_weight = weight;
1023 map->osd_addr = addr;
1011 for (i = map->max_osd; i < max; i++) { 1024 for (i = map->max_osd; i < max; i++) {
1012 map->osd_state[i] = 0; 1025 map->osd_state[i] = 0;
1013 map->osd_weight[i] = CEPH_OSD_OUT; 1026 map->osd_weight[i] = CEPH_OSD_OUT;
@@ -1017,12 +1030,16 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
1017 if (map->osd_primary_affinity) { 1030 if (map->osd_primary_affinity) {
1018 u32 *affinity; 1031 u32 *affinity;
1019 1032
1020 affinity = krealloc(map->osd_primary_affinity, 1033 affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)),
1021 max*sizeof(*affinity), GFP_NOFS); 1034 GFP_NOFS);
1022 if (!affinity) 1035 if (!affinity)
1023 return -ENOMEM; 1036 return -ENOMEM;
1024 map->osd_primary_affinity = affinity;
1025 1037
1038 memcpy(affinity, map->osd_primary_affinity,
1039 to_copy * sizeof(*affinity));
1040 kvfree(map->osd_primary_affinity);
1041
1042 map->osd_primary_affinity = affinity;
1026 for (i = map->max_osd; i < max; i++) 1043 for (i = map->max_osd; i < max; i++)
1027 map->osd_primary_affinity[i] = 1044 map->osd_primary_affinity[i] =
1028 CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; 1045 CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
@@ -1043,7 +1060,7 @@ static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
1043 1060
1044 work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE); 1061 work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE);
1045 dout("%s work_size %zu bytes\n", __func__, work_size); 1062 dout("%s work_size %zu bytes\n", __func__, work_size);
1046 workspace = kmalloc(work_size, GFP_NOIO); 1063 workspace = ceph_kvmalloc(work_size, GFP_NOIO);
1047 if (!workspace) { 1064 if (!workspace) {
1048 crush_destroy(crush); 1065 crush_destroy(crush);
1049 return -ENOMEM; 1066 return -ENOMEM;
@@ -1052,7 +1069,7 @@ static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
1052 1069
1053 if (map->crush) 1070 if (map->crush)
1054 crush_destroy(map->crush); 1071 crush_destroy(map->crush);
1055 kfree(map->crush_workspace); 1072 kvfree(map->crush_workspace);
1056 map->crush = crush; 1073 map->crush = crush;
1057 map->crush_workspace = workspace; 1074 map->crush_workspace = workspace;
1058 return 0; 1075 return 0;
@@ -1298,9 +1315,9 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
1298 if (!map->osd_primary_affinity) { 1315 if (!map->osd_primary_affinity) {
1299 int i; 1316 int i;
1300 1317
1301 map->osd_primary_affinity = kmalloc_array(map->max_osd, 1318 map->osd_primary_affinity = ceph_kvmalloc(
1302 sizeof(u32), 1319 array_size(map->max_osd, sizeof(*map->osd_primary_affinity)),
1303 GFP_NOFS); 1320 GFP_NOFS);
1304 if (!map->osd_primary_affinity) 1321 if (!map->osd_primary_affinity)
1305 return -ENOMEM; 1322 return -ENOMEM;
1306 1323
@@ -1321,7 +1338,7 @@ static int decode_primary_affinity(void **p, void *end,
1321 1338
1322 ceph_decode_32_safe(p, end, len, e_inval); 1339 ceph_decode_32_safe(p, end, len, e_inval);
1323 if (len == 0) { 1340 if (len == 0) {
1324 kfree(map->osd_primary_affinity); 1341 kvfree(map->osd_primary_affinity);
1325 map->osd_primary_affinity = NULL; 1342 map->osd_primary_affinity = NULL;
1326 return 0; 1343 return 0;
1327 } 1344 }