aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/percpu.c338
1 files changed, 175 insertions, 163 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index fa70122dfdd0..0cd4bf61012c 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1075,165 +1075,6 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
1075 free_bootmem(__pa(ai), ai->__ai_size); 1075 free_bootmem(__pa(ai), ai->__ai_size);
1076} 1076}
1077 1077
1078#if defined(CONFIG_SMP) && (defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
1079 defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK))
1080/**
1081 * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
1082 * @reserved_size: the size of reserved percpu area in bytes
1083 * @dyn_size: minimum free size for dynamic allocation in bytes
1084 * @atom_size: allocation atom size
1085 * @cpu_distance_fn: callback to determine distance between cpus, optional
1086 *
1087 * This function determines grouping of units, their mappings to cpus
1088 * and other parameters considering needed percpu size, allocation
1089 * atom size and distances between CPUs.
1090 *
1091 * Groups are always mutliples of atom size and CPUs which are of
1092 * LOCAL_DISTANCE both ways are grouped together and share space for
1093 * units in the same group. The returned configuration is guaranteed
1094 * to have CPUs on different nodes on different groups and >=75% usage
1095 * of allocated virtual address space.
1096 *
1097 * RETURNS:
1098 * On success, pointer to the new allocation_info is returned. On
1099 * failure, ERR_PTR value is returned.
1100 */
1101static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1102 size_t reserved_size, size_t dyn_size,
1103 size_t atom_size,
1104 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
1105{
1106 static int group_map[NR_CPUS] __initdata;
1107 static int group_cnt[NR_CPUS] __initdata;
1108 const size_t static_size = __per_cpu_end - __per_cpu_start;
1109 int nr_groups = 1, nr_units = 0;
1110 size_t size_sum, min_unit_size, alloc_size;
1111 int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
1112 int last_allocs, group, unit;
1113 unsigned int cpu, tcpu;
1114 struct pcpu_alloc_info *ai;
1115 unsigned int *cpu_map;
1116
1117 /* this function may be called multiple times */
1118 memset(group_map, 0, sizeof(group_map));
1119 memset(group_cnt, 0, sizeof(group_cnt));
1120
1121 /* calculate size_sum and ensure dyn_size is enough for early alloc */
1122 size_sum = PFN_ALIGN(static_size + reserved_size +
1123 max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
1124 dyn_size = size_sum - static_size - reserved_size;
1125
1126 /*
1127 * Determine min_unit_size, alloc_size and max_upa such that
1128 * alloc_size is multiple of atom_size and is the smallest
1129 * which can accomodate 4k aligned segments which are equal to
1130 * or larger than min_unit_size.
1131 */
1132 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1133
1134 alloc_size = roundup(min_unit_size, atom_size);
1135 upa = alloc_size / min_unit_size;
1136 while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1137 upa--;
1138 max_upa = upa;
1139
1140 /* group cpus according to their proximity */
1141 for_each_possible_cpu(cpu) {
1142 group = 0;
1143 next_group:
1144 for_each_possible_cpu(tcpu) {
1145 if (cpu == tcpu)
1146 break;
1147 if (group_map[tcpu] == group && cpu_distance_fn &&
1148 (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
1149 cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
1150 group++;
1151 nr_groups = max(nr_groups, group + 1);
1152 goto next_group;
1153 }
1154 }
1155 group_map[cpu] = group;
1156 group_cnt[group]++;
1157 }
1158
1159 /*
1160 * Expand unit size until address space usage goes over 75%
1161 * and then as much as possible without using more address
1162 * space.
1163 */
1164 last_allocs = INT_MAX;
1165 for (upa = max_upa; upa; upa--) {
1166 int allocs = 0, wasted = 0;
1167
1168 if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1169 continue;
1170
1171 for (group = 0; group < nr_groups; group++) {
1172 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
1173 allocs += this_allocs;
1174 wasted += this_allocs * upa - group_cnt[group];
1175 }
1176
1177 /*
1178 * Don't accept if wastage is over 1/3. The
1179 * greater-than comparison ensures upa==1 always
1180 * passes the following check.
1181 */
1182 if (wasted > num_possible_cpus() / 3)
1183 continue;
1184
1185 /* and then don't consume more memory */
1186 if (allocs > last_allocs)
1187 break;
1188 last_allocs = allocs;
1189 best_upa = upa;
1190 }
1191 upa = best_upa;
1192
1193 /* allocate and fill alloc_info */
1194 for (group = 0; group < nr_groups; group++)
1195 nr_units += roundup(group_cnt[group], upa);
1196
1197 ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
1198 if (!ai)
1199 return ERR_PTR(-ENOMEM);
1200 cpu_map = ai->groups[0].cpu_map;
1201
1202 for (group = 0; group < nr_groups; group++) {
1203 ai->groups[group].cpu_map = cpu_map;
1204 cpu_map += roundup(group_cnt[group], upa);
1205 }
1206
1207 ai->static_size = static_size;
1208 ai->reserved_size = reserved_size;
1209 ai->dyn_size = dyn_size;
1210 ai->unit_size = alloc_size / upa;
1211 ai->atom_size = atom_size;
1212 ai->alloc_size = alloc_size;
1213
1214 for (group = 0, unit = 0; group_cnt[group]; group++) {
1215 struct pcpu_group_info *gi = &ai->groups[group];
1216
1217 /*
1218 * Initialize base_offset as if all groups are located
1219 * back-to-back. The caller should update this to
1220 * reflect actual allocation.
1221 */
1222 gi->base_offset = unit * ai->unit_size;
1223
1224 for_each_possible_cpu(cpu)
1225 if (group_map[cpu] == group)
1226 gi->cpu_map[gi->nr_units++] = cpu;
1227 gi->nr_units = roundup(gi->nr_units, upa);
1228 unit += gi->nr_units;
1229 }
1230 BUG_ON(unit != nr_units);
1231
1232 return ai;
1233}
1234#endif /* CONFIG_SMP && (CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
1235 CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) */
1236
1237/** 1078/**
1238 * pcpu_dump_alloc_info - print out information about pcpu_alloc_info 1079 * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
1239 * @lvl: loglevel 1080 * @lvl: loglevel
@@ -1532,8 +1373,180 @@ static int __init percpu_alloc_setup(char *str)
1532} 1373}
1533early_param("percpu_alloc", percpu_alloc_setup); 1374early_param("percpu_alloc", percpu_alloc_setup);
1534 1375
1376/*
1377 * pcpu_embed_first_chunk() is used by the generic percpu setup.
1378 * Build it if needed by the arch config or the generic setup is going
1379 * to be used.
1380 */
1535#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ 1381#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
1536 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) 1382 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
1383#define BUILD_EMBED_FIRST_CHUNK
1384#endif
1385
1386/* build pcpu_page_first_chunk() iff needed by the arch config */
1387#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
1388#define BUILD_PAGE_FIRST_CHUNK
1389#endif
1390
1391/* pcpu_build_alloc_info() is used by both embed and page first chunk */
1392#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
1393/**
1394 * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
1395 * @reserved_size: the size of reserved percpu area in bytes
1396 * @dyn_size: minimum free size for dynamic allocation in bytes
1397 * @atom_size: allocation atom size
1398 * @cpu_distance_fn: callback to determine distance between cpus, optional
1399 *
1400 * This function determines grouping of units, their mappings to cpus
1401 * and other parameters considering needed percpu size, allocation
1402 * atom size and distances between CPUs.
1403 *
1404 * Groups are always mutliples of atom size and CPUs which are of
1405 * LOCAL_DISTANCE both ways are grouped together and share space for
1406 * units in the same group. The returned configuration is guaranteed
1407 * to have CPUs on different nodes on different groups and >=75% usage
1408 * of allocated virtual address space.
1409 *
1410 * RETURNS:
1411 * On success, pointer to the new allocation_info is returned. On
1412 * failure, ERR_PTR value is returned.
1413 */
1414static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1415 size_t reserved_size, size_t dyn_size,
1416 size_t atom_size,
1417 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
1418{
1419 static int group_map[NR_CPUS] __initdata;
1420 static int group_cnt[NR_CPUS] __initdata;
1421 const size_t static_size = __per_cpu_end - __per_cpu_start;
1422 int nr_groups = 1, nr_units = 0;
1423 size_t size_sum, min_unit_size, alloc_size;
1424 int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
1425 int last_allocs, group, unit;
1426 unsigned int cpu, tcpu;
1427 struct pcpu_alloc_info *ai;
1428 unsigned int *cpu_map;
1429
1430 /* this function may be called multiple times */
1431 memset(group_map, 0, sizeof(group_map));
1432 memset(group_cnt, 0, sizeof(group_cnt));
1433
1434 /* calculate size_sum and ensure dyn_size is enough for early alloc */
1435 size_sum = PFN_ALIGN(static_size + reserved_size +
1436 max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
1437 dyn_size = size_sum - static_size - reserved_size;
1438
1439 /*
1440 * Determine min_unit_size, alloc_size and max_upa such that
1441 * alloc_size is multiple of atom_size and is the smallest
1442 * which can accomodate 4k aligned segments which are equal to
1443 * or larger than min_unit_size.
1444 */
1445 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1446
1447 alloc_size = roundup(min_unit_size, atom_size);
1448 upa = alloc_size / min_unit_size;
1449 while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1450 upa--;
1451 max_upa = upa;
1452
1453 /* group cpus according to their proximity */
1454 for_each_possible_cpu(cpu) {
1455 group = 0;
1456 next_group:
1457 for_each_possible_cpu(tcpu) {
1458 if (cpu == tcpu)
1459 break;
1460 if (group_map[tcpu] == group && cpu_distance_fn &&
1461 (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
1462 cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
1463 group++;
1464 nr_groups = max(nr_groups, group + 1);
1465 goto next_group;
1466 }
1467 }
1468 group_map[cpu] = group;
1469 group_cnt[group]++;
1470 }
1471
1472 /*
1473 * Expand unit size until address space usage goes over 75%
1474 * and then as much as possible without using more address
1475 * space.
1476 */
1477 last_allocs = INT_MAX;
1478 for (upa = max_upa; upa; upa--) {
1479 int allocs = 0, wasted = 0;
1480
1481 if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1482 continue;
1483
1484 for (group = 0; group < nr_groups; group++) {
1485 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
1486 allocs += this_allocs;
1487 wasted += this_allocs * upa - group_cnt[group];
1488 }
1489
1490 /*
1491 * Don't accept if wastage is over 1/3. The
1492 * greater-than comparison ensures upa==1 always
1493 * passes the following check.
1494 */
1495 if (wasted > num_possible_cpus() / 3)
1496 continue;
1497
1498 /* and then don't consume more memory */
1499 if (allocs > last_allocs)
1500 break;
1501 last_allocs = allocs;
1502 best_upa = upa;
1503 }
1504 upa = best_upa;
1505
1506 /* allocate and fill alloc_info */
1507 for (group = 0; group < nr_groups; group++)
1508 nr_units += roundup(group_cnt[group], upa);
1509
1510 ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
1511 if (!ai)
1512 return ERR_PTR(-ENOMEM);
1513 cpu_map = ai->groups[0].cpu_map;
1514
1515 for (group = 0; group < nr_groups; group++) {
1516 ai->groups[group].cpu_map = cpu_map;
1517 cpu_map += roundup(group_cnt[group], upa);
1518 }
1519
1520 ai->static_size = static_size;
1521 ai->reserved_size = reserved_size;
1522 ai->dyn_size = dyn_size;
1523 ai->unit_size = alloc_size / upa;
1524 ai->atom_size = atom_size;
1525 ai->alloc_size = alloc_size;
1526
1527 for (group = 0, unit = 0; group_cnt[group]; group++) {
1528 struct pcpu_group_info *gi = &ai->groups[group];
1529
1530 /*
1531 * Initialize base_offset as if all groups are located
1532 * back-to-back. The caller should update this to
1533 * reflect actual allocation.
1534 */
1535 gi->base_offset = unit * ai->unit_size;
1536
1537 for_each_possible_cpu(cpu)
1538 if (group_map[cpu] == group)
1539 gi->cpu_map[gi->nr_units++] = cpu;
1540 gi->nr_units = roundup(gi->nr_units, upa);
1541 unit += gi->nr_units;
1542 }
1543 BUG_ON(unit != nr_units);
1544
1545 return ai;
1546}
1547#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
1548
1549#if defined(BUILD_EMBED_FIRST_CHUNK)
1537/** 1550/**
1538 * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem 1551 * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
1539 * @reserved_size: the size of reserved percpu area in bytes 1552 * @reserved_size: the size of reserved percpu area in bytes
@@ -1662,10 +1675,9 @@ out_free:
1662 free_bootmem(__pa(areas), areas_size); 1675 free_bootmem(__pa(areas), areas_size);
1663 return rc; 1676 return rc;
1664} 1677}
1665#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || 1678#endif /* BUILD_EMBED_FIRST_CHUNK */
1666 !CONFIG_HAVE_SETUP_PER_CPU_AREA */
1667 1679
1668#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK 1680#ifdef BUILD_PAGE_FIRST_CHUNK
1669/** 1681/**
1670 * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages 1682 * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
1671 * @reserved_size: the size of reserved percpu area in bytes 1683 * @reserved_size: the size of reserved percpu area in bytes
@@ -1773,7 +1785,7 @@ out_free_ar:
1773 pcpu_free_alloc_info(ai); 1785 pcpu_free_alloc_info(ai);
1774 return rc; 1786 return rc;
1775} 1787}
1776#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ 1788#endif /* BUILD_PAGE_FIRST_CHUNK */
1777 1789
1778#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA 1790#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
1779/* 1791/*