diff options
-rw-r--r-- | mm/percpu.c | 338 |
1 files changed, 175 insertions, 163 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index fa70122dfdd0..0cd4bf61012c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -1075,165 +1075,6 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) | |||
1075 | free_bootmem(__pa(ai), ai->__ai_size); | 1075 | free_bootmem(__pa(ai), ai->__ai_size); |
1076 | } | 1076 | } |
1077 | 1077 | ||
1078 | #if defined(CONFIG_SMP) && (defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ | ||
1079 | defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)) | ||
1080 | /** | ||
1081 | * pcpu_build_alloc_info - build alloc_info considering distances between CPUs | ||
1082 | * @reserved_size: the size of reserved percpu area in bytes | ||
1083 | * @dyn_size: minimum free size for dynamic allocation in bytes | ||
1084 | * @atom_size: allocation atom size | ||
1085 | * @cpu_distance_fn: callback to determine distance between cpus, optional | ||
1086 | * | ||
1087 | * This function determines grouping of units, their mappings to cpus | ||
1088 | * and other parameters considering needed percpu size, allocation | ||
1089 | * atom size and distances between CPUs. | ||
1090 | * | ||
1091 | * Groups are always mutliples of atom size and CPUs which are of | ||
1092 | * LOCAL_DISTANCE both ways are grouped together and share space for | ||
1093 | * units in the same group. The returned configuration is guaranteed | ||
1094 | * to have CPUs on different nodes on different groups and >=75% usage | ||
1095 | * of allocated virtual address space. | ||
1096 | * | ||
1097 | * RETURNS: | ||
1098 | * On success, pointer to the new allocation_info is returned. On | ||
1099 | * failure, ERR_PTR value is returned. | ||
1100 | */ | ||
1101 | static struct pcpu_alloc_info * __init pcpu_build_alloc_info( | ||
1102 | size_t reserved_size, size_t dyn_size, | ||
1103 | size_t atom_size, | ||
1104 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn) | ||
1105 | { | ||
1106 | static int group_map[NR_CPUS] __initdata; | ||
1107 | static int group_cnt[NR_CPUS] __initdata; | ||
1108 | const size_t static_size = __per_cpu_end - __per_cpu_start; | ||
1109 | int nr_groups = 1, nr_units = 0; | ||
1110 | size_t size_sum, min_unit_size, alloc_size; | ||
1111 | int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ | ||
1112 | int last_allocs, group, unit; | ||
1113 | unsigned int cpu, tcpu; | ||
1114 | struct pcpu_alloc_info *ai; | ||
1115 | unsigned int *cpu_map; | ||
1116 | |||
1117 | /* this function may be called multiple times */ | ||
1118 | memset(group_map, 0, sizeof(group_map)); | ||
1119 | memset(group_cnt, 0, sizeof(group_cnt)); | ||
1120 | |||
1121 | /* calculate size_sum and ensure dyn_size is enough for early alloc */ | ||
1122 | size_sum = PFN_ALIGN(static_size + reserved_size + | ||
1123 | max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE)); | ||
1124 | dyn_size = size_sum - static_size - reserved_size; | ||
1125 | |||
1126 | /* | ||
1127 | * Determine min_unit_size, alloc_size and max_upa such that | ||
1128 | * alloc_size is multiple of atom_size and is the smallest | ||
1129 | * which can accomodate 4k aligned segments which are equal to | ||
1130 | * or larger than min_unit_size. | ||
1131 | */ | ||
1132 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | ||
1133 | |||
1134 | alloc_size = roundup(min_unit_size, atom_size); | ||
1135 | upa = alloc_size / min_unit_size; | ||
1136 | while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | ||
1137 | upa--; | ||
1138 | max_upa = upa; | ||
1139 | |||
1140 | /* group cpus according to their proximity */ | ||
1141 | for_each_possible_cpu(cpu) { | ||
1142 | group = 0; | ||
1143 | next_group: | ||
1144 | for_each_possible_cpu(tcpu) { | ||
1145 | if (cpu == tcpu) | ||
1146 | break; | ||
1147 | if (group_map[tcpu] == group && cpu_distance_fn && | ||
1148 | (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || | ||
1149 | cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { | ||
1150 | group++; | ||
1151 | nr_groups = max(nr_groups, group + 1); | ||
1152 | goto next_group; | ||
1153 | } | ||
1154 | } | ||
1155 | group_map[cpu] = group; | ||
1156 | group_cnt[group]++; | ||
1157 | } | ||
1158 | |||
1159 | /* | ||
1160 | * Expand unit size until address space usage goes over 75% | ||
1161 | * and then as much as possible without using more address | ||
1162 | * space. | ||
1163 | */ | ||
1164 | last_allocs = INT_MAX; | ||
1165 | for (upa = max_upa; upa; upa--) { | ||
1166 | int allocs = 0, wasted = 0; | ||
1167 | |||
1168 | if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | ||
1169 | continue; | ||
1170 | |||
1171 | for (group = 0; group < nr_groups; group++) { | ||
1172 | int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); | ||
1173 | allocs += this_allocs; | ||
1174 | wasted += this_allocs * upa - group_cnt[group]; | ||
1175 | } | ||
1176 | |||
1177 | /* | ||
1178 | * Don't accept if wastage is over 1/3. The | ||
1179 | * greater-than comparison ensures upa==1 always | ||
1180 | * passes the following check. | ||
1181 | */ | ||
1182 | if (wasted > num_possible_cpus() / 3) | ||
1183 | continue; | ||
1184 | |||
1185 | /* and then don't consume more memory */ | ||
1186 | if (allocs > last_allocs) | ||
1187 | break; | ||
1188 | last_allocs = allocs; | ||
1189 | best_upa = upa; | ||
1190 | } | ||
1191 | upa = best_upa; | ||
1192 | |||
1193 | /* allocate and fill alloc_info */ | ||
1194 | for (group = 0; group < nr_groups; group++) | ||
1195 | nr_units += roundup(group_cnt[group], upa); | ||
1196 | |||
1197 | ai = pcpu_alloc_alloc_info(nr_groups, nr_units); | ||
1198 | if (!ai) | ||
1199 | return ERR_PTR(-ENOMEM); | ||
1200 | cpu_map = ai->groups[0].cpu_map; | ||
1201 | |||
1202 | for (group = 0; group < nr_groups; group++) { | ||
1203 | ai->groups[group].cpu_map = cpu_map; | ||
1204 | cpu_map += roundup(group_cnt[group], upa); | ||
1205 | } | ||
1206 | |||
1207 | ai->static_size = static_size; | ||
1208 | ai->reserved_size = reserved_size; | ||
1209 | ai->dyn_size = dyn_size; | ||
1210 | ai->unit_size = alloc_size / upa; | ||
1211 | ai->atom_size = atom_size; | ||
1212 | ai->alloc_size = alloc_size; | ||
1213 | |||
1214 | for (group = 0, unit = 0; group_cnt[group]; group++) { | ||
1215 | struct pcpu_group_info *gi = &ai->groups[group]; | ||
1216 | |||
1217 | /* | ||
1218 | * Initialize base_offset as if all groups are located | ||
1219 | * back-to-back. The caller should update this to | ||
1220 | * reflect actual allocation. | ||
1221 | */ | ||
1222 | gi->base_offset = unit * ai->unit_size; | ||
1223 | |||
1224 | for_each_possible_cpu(cpu) | ||
1225 | if (group_map[cpu] == group) | ||
1226 | gi->cpu_map[gi->nr_units++] = cpu; | ||
1227 | gi->nr_units = roundup(gi->nr_units, upa); | ||
1228 | unit += gi->nr_units; | ||
1229 | } | ||
1230 | BUG_ON(unit != nr_units); | ||
1231 | |||
1232 | return ai; | ||
1233 | } | ||
1234 | #endif /* CONFIG_SMP && (CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || | ||
1235 | CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) */ | ||
1236 | |||
1237 | /** | 1078 | /** |
1238 | * pcpu_dump_alloc_info - print out information about pcpu_alloc_info | 1079 | * pcpu_dump_alloc_info - print out information about pcpu_alloc_info |
1239 | * @lvl: loglevel | 1080 | * @lvl: loglevel |
@@ -1532,8 +1373,180 @@ static int __init percpu_alloc_setup(char *str) | |||
1532 | } | 1373 | } |
1533 | early_param("percpu_alloc", percpu_alloc_setup); | 1374 | early_param("percpu_alloc", percpu_alloc_setup); |
1534 | 1375 | ||
1376 | /* | ||
1377 | * pcpu_embed_first_chunk() is used by the generic percpu setup. | ||
1378 | * Build it if needed by the arch config or the generic setup is going | ||
1379 | * to be used. | ||
1380 | */ | ||
1535 | #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ | 1381 | #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ |
1536 | !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) | 1382 | !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) |
1383 | #define BUILD_EMBED_FIRST_CHUNK | ||
1384 | #endif | ||
1385 | |||
1386 | /* build pcpu_page_first_chunk() iff needed by the arch config */ | ||
1387 | #if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) | ||
1388 | #define BUILD_PAGE_FIRST_CHUNK | ||
1389 | #endif | ||
1390 | |||
1391 | /* pcpu_build_alloc_info() is used by both embed and page first chunk */ | ||
1392 | #if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK) | ||
1393 | /** | ||
1394 | * pcpu_build_alloc_info - build alloc_info considering distances between CPUs | ||
1395 | * @reserved_size: the size of reserved percpu area in bytes | ||
1396 | * @dyn_size: minimum free size for dynamic allocation in bytes | ||
1397 | * @atom_size: allocation atom size | ||
1398 | * @cpu_distance_fn: callback to determine distance between cpus, optional | ||
1399 | * | ||
1400 | * This function determines grouping of units, their mappings to cpus | ||
1401 | * and other parameters considering needed percpu size, allocation | ||
1402 | * atom size and distances between CPUs. | ||
1403 | * | ||
1404 | * Groups are always mutliples of atom size and CPUs which are of | ||
1405 | * LOCAL_DISTANCE both ways are grouped together and share space for | ||
1406 | * units in the same group. The returned configuration is guaranteed | ||
1407 | * to have CPUs on different nodes on different groups and >=75% usage | ||
1408 | * of allocated virtual address space. | ||
1409 | * | ||
1410 | * RETURNS: | ||
1411 | * On success, pointer to the new allocation_info is returned. On | ||
1412 | * failure, ERR_PTR value is returned. | ||
1413 | */ | ||
1414 | static struct pcpu_alloc_info * __init pcpu_build_alloc_info( | ||
1415 | size_t reserved_size, size_t dyn_size, | ||
1416 | size_t atom_size, | ||
1417 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn) | ||
1418 | { | ||
1419 | static int group_map[NR_CPUS] __initdata; | ||
1420 | static int group_cnt[NR_CPUS] __initdata; | ||
1421 | const size_t static_size = __per_cpu_end - __per_cpu_start; | ||
1422 | int nr_groups = 1, nr_units = 0; | ||
1423 | size_t size_sum, min_unit_size, alloc_size; | ||
1424 | int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ | ||
1425 | int last_allocs, group, unit; | ||
1426 | unsigned int cpu, tcpu; | ||
1427 | struct pcpu_alloc_info *ai; | ||
1428 | unsigned int *cpu_map; | ||
1429 | |||
1430 | /* this function may be called multiple times */ | ||
1431 | memset(group_map, 0, sizeof(group_map)); | ||
1432 | memset(group_cnt, 0, sizeof(group_cnt)); | ||
1433 | |||
1434 | /* calculate size_sum and ensure dyn_size is enough for early alloc */ | ||
1435 | size_sum = PFN_ALIGN(static_size + reserved_size + | ||
1436 | max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE)); | ||
1437 | dyn_size = size_sum - static_size - reserved_size; | ||
1438 | |||
1439 | /* | ||
1440 | * Determine min_unit_size, alloc_size and max_upa such that | ||
1441 | * alloc_size is multiple of atom_size and is the smallest | ||
1442 | * which can accomodate 4k aligned segments which are equal to | ||
1443 | * or larger than min_unit_size. | ||
1444 | */ | ||
1445 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | ||
1446 | |||
1447 | alloc_size = roundup(min_unit_size, atom_size); | ||
1448 | upa = alloc_size / min_unit_size; | ||
1449 | while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | ||
1450 | upa--; | ||
1451 | max_upa = upa; | ||
1452 | |||
1453 | /* group cpus according to their proximity */ | ||
1454 | for_each_possible_cpu(cpu) { | ||
1455 | group = 0; | ||
1456 | next_group: | ||
1457 | for_each_possible_cpu(tcpu) { | ||
1458 | if (cpu == tcpu) | ||
1459 | break; | ||
1460 | if (group_map[tcpu] == group && cpu_distance_fn && | ||
1461 | (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || | ||
1462 | cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { | ||
1463 | group++; | ||
1464 | nr_groups = max(nr_groups, group + 1); | ||
1465 | goto next_group; | ||
1466 | } | ||
1467 | } | ||
1468 | group_map[cpu] = group; | ||
1469 | group_cnt[group]++; | ||
1470 | } | ||
1471 | |||
1472 | /* | ||
1473 | * Expand unit size until address space usage goes over 75% | ||
1474 | * and then as much as possible without using more address | ||
1475 | * space. | ||
1476 | */ | ||
1477 | last_allocs = INT_MAX; | ||
1478 | for (upa = max_upa; upa; upa--) { | ||
1479 | int allocs = 0, wasted = 0; | ||
1480 | |||
1481 | if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | ||
1482 | continue; | ||
1483 | |||
1484 | for (group = 0; group < nr_groups; group++) { | ||
1485 | int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); | ||
1486 | allocs += this_allocs; | ||
1487 | wasted += this_allocs * upa - group_cnt[group]; | ||
1488 | } | ||
1489 | |||
1490 | /* | ||
1491 | * Don't accept if wastage is over 1/3. The | ||
1492 | * greater-than comparison ensures upa==1 always | ||
1493 | * passes the following check. | ||
1494 | */ | ||
1495 | if (wasted > num_possible_cpus() / 3) | ||
1496 | continue; | ||
1497 | |||
1498 | /* and then don't consume more memory */ | ||
1499 | if (allocs > last_allocs) | ||
1500 | break; | ||
1501 | last_allocs = allocs; | ||
1502 | best_upa = upa; | ||
1503 | } | ||
1504 | upa = best_upa; | ||
1505 | |||
1506 | /* allocate and fill alloc_info */ | ||
1507 | for (group = 0; group < nr_groups; group++) | ||
1508 | nr_units += roundup(group_cnt[group], upa); | ||
1509 | |||
1510 | ai = pcpu_alloc_alloc_info(nr_groups, nr_units); | ||
1511 | if (!ai) | ||
1512 | return ERR_PTR(-ENOMEM); | ||
1513 | cpu_map = ai->groups[0].cpu_map; | ||
1514 | |||
1515 | for (group = 0; group < nr_groups; group++) { | ||
1516 | ai->groups[group].cpu_map = cpu_map; | ||
1517 | cpu_map += roundup(group_cnt[group], upa); | ||
1518 | } | ||
1519 | |||
1520 | ai->static_size = static_size; | ||
1521 | ai->reserved_size = reserved_size; | ||
1522 | ai->dyn_size = dyn_size; | ||
1523 | ai->unit_size = alloc_size / upa; | ||
1524 | ai->atom_size = atom_size; | ||
1525 | ai->alloc_size = alloc_size; | ||
1526 | |||
1527 | for (group = 0, unit = 0; group_cnt[group]; group++) { | ||
1528 | struct pcpu_group_info *gi = &ai->groups[group]; | ||
1529 | |||
1530 | /* | ||
1531 | * Initialize base_offset as if all groups are located | ||
1532 | * back-to-back. The caller should update this to | ||
1533 | * reflect actual allocation. | ||
1534 | */ | ||
1535 | gi->base_offset = unit * ai->unit_size; | ||
1536 | |||
1537 | for_each_possible_cpu(cpu) | ||
1538 | if (group_map[cpu] == group) | ||
1539 | gi->cpu_map[gi->nr_units++] = cpu; | ||
1540 | gi->nr_units = roundup(gi->nr_units, upa); | ||
1541 | unit += gi->nr_units; | ||
1542 | } | ||
1543 | BUG_ON(unit != nr_units); | ||
1544 | |||
1545 | return ai; | ||
1546 | } | ||
1547 | #endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */ | ||
1548 | |||
1549 | #if defined(BUILD_EMBED_FIRST_CHUNK) | ||
1537 | /** | 1550 | /** |
1538 | * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem | 1551 | * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem |
1539 | * @reserved_size: the size of reserved percpu area in bytes | 1552 | * @reserved_size: the size of reserved percpu area in bytes |
@@ -1662,10 +1675,9 @@ out_free: | |||
1662 | free_bootmem(__pa(areas), areas_size); | 1675 | free_bootmem(__pa(areas), areas_size); |
1663 | return rc; | 1676 | return rc; |
1664 | } | 1677 | } |
1665 | #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || | 1678 | #endif /* BUILD_EMBED_FIRST_CHUNK */ |
1666 | !CONFIG_HAVE_SETUP_PER_CPU_AREA */ | ||
1667 | 1679 | ||
1668 | #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK | 1680 | #ifdef BUILD_PAGE_FIRST_CHUNK |
1669 | /** | 1681 | /** |
1670 | * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages | 1682 | * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages |
1671 | * @reserved_size: the size of reserved percpu area in bytes | 1683 | * @reserved_size: the size of reserved percpu area in bytes |
@@ -1773,7 +1785,7 @@ out_free_ar: | |||
1773 | pcpu_free_alloc_info(ai); | 1785 | pcpu_free_alloc_info(ai); |
1774 | return rc; | 1786 | return rc; |
1775 | } | 1787 | } |
1776 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ | 1788 | #endif /* BUILD_PAGE_FIRST_CHUNK */ |
1777 | 1789 | ||
1778 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | 1790 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA |
1779 | /* | 1791 | /* |