diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/percpu.c | 339 |
1 files changed, 172 insertions, 167 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index 3316e3aac7ee..2b9c4b2a2fc0 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -1231,6 +1231,178 @@ void free_percpu(void *ptr) | |||
1231 | } | 1231 | } |
1232 | EXPORT_SYMBOL_GPL(free_percpu); | 1232 | EXPORT_SYMBOL_GPL(free_percpu); |
1233 | 1233 | ||
1234 | static inline size_t pcpu_calc_fc_sizes(size_t static_size, | ||
1235 | size_t reserved_size, | ||
1236 | ssize_t *dyn_sizep) | ||
1237 | { | ||
1238 | size_t size_sum; | ||
1239 | |||
1240 | size_sum = PFN_ALIGN(static_size + reserved_size + | ||
1241 | (*dyn_sizep >= 0 ? *dyn_sizep : 0)); | ||
1242 | if (*dyn_sizep != 0) | ||
1243 | *dyn_sizep = size_sum - static_size - reserved_size; | ||
1244 | |||
1245 | return size_sum; | ||
1246 | } | ||
1247 | |||
1248 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | ||
1249 | /** | ||
1250 | * pcpu_lpage_build_unit_map - build unit_map for large page remapping | ||
1251 | * @reserved_size: the size of reserved percpu area in bytes | ||
1252 | * @dyn_sizep: in/out parameter for dynamic size, -1 for auto | ||
1253 | * @unit_sizep: out parameter for unit size | ||
1254 | * @unit_map: unit_map to be filled | ||
1255 | * @cpu_distance_fn: callback to determine distance between cpus | ||
1256 | * | ||
1257 | * This function builds cpu -> unit map and determine other parameters | ||
1258 | * considering needed percpu size, large page size and distances | ||
1259 | * between CPUs in NUMA. | ||
1260 | * | ||
1261 | * CPUs which are of LOCAL_DISTANCE both ways are grouped together and | ||
1262 | * may share units in the same large page. The returned configuration | ||
1263 | * is guaranteed to have CPUs on different nodes on different large | ||
1264 | * pages and >=75% usage of allocated virtual address space. | ||
1265 | * | ||
1266 | * RETURNS: | ||
1267 | * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and | ||
1268 | * returns the number of units to be allocated. -errno on failure. | ||
1269 | */ | ||
1270 | int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, | ||
1271 | size_t *unit_sizep, size_t lpage_size, | ||
1272 | int *unit_map, | ||
1273 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn) | ||
1274 | { | ||
1275 | static int group_map[NR_CPUS] __initdata; | ||
1276 | static int group_cnt[NR_CPUS] __initdata; | ||
1277 | const size_t static_size = __per_cpu_end - __per_cpu_start; | ||
1278 | int group_cnt_max = 0; | ||
1279 | size_t size_sum, min_unit_size, alloc_size; | ||
1280 | int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ | ||
1281 | int last_allocs; | ||
1282 | unsigned int cpu, tcpu; | ||
1283 | int group, unit; | ||
1284 | |||
1285 | /* | ||
1286 | * Determine min_unit_size, alloc_size and max_upa such that | ||
1287 | * alloc_size is multiple of lpage_size and is the smallest | ||
1288 | * which can accomodate 4k aligned segments which are equal to | ||
1289 | * or larger than min_unit_size. | ||
1290 | */ | ||
1291 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); | ||
1292 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | ||
1293 | |||
1294 | alloc_size = roundup(min_unit_size, lpage_size); | ||
1295 | upa = alloc_size / min_unit_size; | ||
1296 | while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | ||
1297 | upa--; | ||
1298 | max_upa = upa; | ||
1299 | |||
1300 | /* group cpus according to their proximity */ | ||
1301 | for_each_possible_cpu(cpu) { | ||
1302 | group = 0; | ||
1303 | next_group: | ||
1304 | for_each_possible_cpu(tcpu) { | ||
1305 | if (cpu == tcpu) | ||
1306 | break; | ||
1307 | if (group_map[tcpu] == group && | ||
1308 | (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || | ||
1309 | cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { | ||
1310 | group++; | ||
1311 | goto next_group; | ||
1312 | } | ||
1313 | } | ||
1314 | group_map[cpu] = group; | ||
1315 | group_cnt[group]++; | ||
1316 | group_cnt_max = max(group_cnt_max, group_cnt[group]); | ||
1317 | } | ||
1318 | |||
1319 | /* | ||
1320 | * Expand unit size until address space usage goes over 75% | ||
1321 | * and then as much as possible without using more address | ||
1322 | * space. | ||
1323 | */ | ||
1324 | last_allocs = INT_MAX; | ||
1325 | for (upa = max_upa; upa; upa--) { | ||
1326 | int allocs = 0, wasted = 0; | ||
1327 | |||
1328 | if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | ||
1329 | continue; | ||
1330 | |||
1331 | for (group = 0; group_cnt[group]; group++) { | ||
1332 | int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); | ||
1333 | allocs += this_allocs; | ||
1334 | wasted += this_allocs * upa - group_cnt[group]; | ||
1335 | } | ||
1336 | |||
1337 | /* | ||
1338 | * Don't accept if wastage is over 25%. The | ||
1339 | * greater-than comparison ensures upa==1 always | ||
1340 | * passes the following check. | ||
1341 | */ | ||
1342 | if (wasted > num_possible_cpus() / 3) | ||
1343 | continue; | ||
1344 | |||
1345 | /* and then don't consume more memory */ | ||
1346 | if (allocs > last_allocs) | ||
1347 | break; | ||
1348 | last_allocs = allocs; | ||
1349 | best_upa = upa; | ||
1350 | } | ||
1351 | *unit_sizep = alloc_size / best_upa; | ||
1352 | |||
1353 | /* assign units to cpus accordingly */ | ||
1354 | unit = 0; | ||
1355 | for (group = 0; group_cnt[group]; group++) { | ||
1356 | for_each_possible_cpu(cpu) | ||
1357 | if (group_map[cpu] == group) | ||
1358 | unit_map[cpu] = unit++; | ||
1359 | unit = roundup(unit, best_upa); | ||
1360 | } | ||
1361 | |||
1362 | return unit; /* unit contains aligned number of units */ | ||
1363 | } | ||
1364 | |||
1365 | static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, | ||
1366 | unsigned int *cpup); | ||
1367 | |||
1368 | static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, | ||
1369 | size_t reserved_size, size_t dyn_size, | ||
1370 | size_t unit_size, size_t lpage_size, | ||
1371 | const int *unit_map, int nr_units) | ||
1372 | { | ||
1373 | int width = 1, v = nr_units; | ||
1374 | char empty_str[] = "--------"; | ||
1375 | int upl, lpl; /* units per lpage, lpage per line */ | ||
1376 | unsigned int cpu; | ||
1377 | int lpage, unit; | ||
1378 | |||
1379 | while (v /= 10) | ||
1380 | width++; | ||
1381 | empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; | ||
1382 | |||
1383 | upl = max_t(int, lpage_size / unit_size, 1); | ||
1384 | lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); | ||
1385 | |||
1386 | printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, | ||
1387 | static_size, reserved_size, dyn_size, unit_size, lpage_size); | ||
1388 | |||
1389 | for (lpage = 0, unit = 0; unit < nr_units; unit++) { | ||
1390 | if (!(unit % upl)) { | ||
1391 | if (!(lpage++ % lpl)) { | ||
1392 | printk("\n"); | ||
1393 | printk("%spcpu-lpage: ", lvl); | ||
1394 | } else | ||
1395 | printk("| "); | ||
1396 | } | ||
1397 | if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) | ||
1398 | printk("%0*d ", width, cpu); | ||
1399 | else | ||
1400 | printk("%s ", empty_str); | ||
1401 | } | ||
1402 | printk("\n"); | ||
1403 | } | ||
1404 | #endif | ||
1405 | |||
1234 | /** | 1406 | /** |
1235 | * pcpu_setup_first_chunk - initialize the first percpu chunk | 1407 | * pcpu_setup_first_chunk - initialize the first percpu chunk |
1236 | * @static_size: the size of static percpu area in bytes | 1408 | * @static_size: the size of static percpu area in bytes |
@@ -1441,20 +1613,6 @@ static int __init percpu_alloc_setup(char *str) | |||
1441 | } | 1613 | } |
1442 | early_param("percpu_alloc", percpu_alloc_setup); | 1614 | early_param("percpu_alloc", percpu_alloc_setup); |
1443 | 1615 | ||
1444 | static inline size_t pcpu_calc_fc_sizes(size_t static_size, | ||
1445 | size_t reserved_size, | ||
1446 | ssize_t *dyn_sizep) | ||
1447 | { | ||
1448 | size_t size_sum; | ||
1449 | |||
1450 | size_sum = PFN_ALIGN(static_size + reserved_size + | ||
1451 | (*dyn_sizep >= 0 ? *dyn_sizep : 0)); | ||
1452 | if (*dyn_sizep != 0) | ||
1453 | *dyn_sizep = size_sum - static_size - reserved_size; | ||
1454 | |||
1455 | return size_sum; | ||
1456 | } | ||
1457 | |||
1458 | #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ | 1616 | #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ |
1459 | !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) | 1617 | !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) |
1460 | /** | 1618 | /** |
@@ -1637,122 +1795,6 @@ out_free_ar: | |||
1637 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ | 1795 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ |
1638 | 1796 | ||
1639 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | 1797 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK |
1640 | /** | ||
1641 | * pcpu_lpage_build_unit_map - build unit_map for large page remapping | ||
1642 | * @reserved_size: the size of reserved percpu area in bytes | ||
1643 | * @dyn_sizep: in/out parameter for dynamic size, -1 for auto | ||
1644 | * @unit_sizep: out parameter for unit size | ||
1645 | * @unit_map: unit_map to be filled | ||
1646 | * @cpu_distance_fn: callback to determine distance between cpus | ||
1647 | * | ||
1648 | * This function builds cpu -> unit map and determine other parameters | ||
1649 | * considering needed percpu size, large page size and distances | ||
1650 | * between CPUs in NUMA. | ||
1651 | * | ||
1652 | * CPUs which are of LOCAL_DISTANCE both ways are grouped together and | ||
1653 | * may share units in the same large page. The returned configuration | ||
1654 | * is guaranteed to have CPUs on different nodes on different large | ||
1655 | * pages and >=75% usage of allocated virtual address space. | ||
1656 | * | ||
1657 | * RETURNS: | ||
1658 | * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and | ||
1659 | * returns the number of units to be allocated. -errno on failure. | ||
1660 | */ | ||
1661 | int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, | ||
1662 | size_t *unit_sizep, size_t lpage_size, | ||
1663 | int *unit_map, | ||
1664 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn) | ||
1665 | { | ||
1666 | static int group_map[NR_CPUS] __initdata; | ||
1667 | static int group_cnt[NR_CPUS] __initdata; | ||
1668 | const size_t static_size = __per_cpu_end - __per_cpu_start; | ||
1669 | int group_cnt_max = 0; | ||
1670 | size_t size_sum, min_unit_size, alloc_size; | ||
1671 | int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ | ||
1672 | int last_allocs; | ||
1673 | unsigned int cpu, tcpu; | ||
1674 | int group, unit; | ||
1675 | |||
1676 | /* | ||
1677 | * Determine min_unit_size, alloc_size and max_upa such that | ||
1678 | * alloc_size is multiple of lpage_size and is the smallest | ||
1679 | * which can accomodate 4k aligned segments which are equal to | ||
1680 | * or larger than min_unit_size. | ||
1681 | */ | ||
1682 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); | ||
1683 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | ||
1684 | |||
1685 | alloc_size = roundup(min_unit_size, lpage_size); | ||
1686 | upa = alloc_size / min_unit_size; | ||
1687 | while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | ||
1688 | upa--; | ||
1689 | max_upa = upa; | ||
1690 | |||
1691 | /* group cpus according to their proximity */ | ||
1692 | for_each_possible_cpu(cpu) { | ||
1693 | group = 0; | ||
1694 | next_group: | ||
1695 | for_each_possible_cpu(tcpu) { | ||
1696 | if (cpu == tcpu) | ||
1697 | break; | ||
1698 | if (group_map[tcpu] == group && | ||
1699 | (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || | ||
1700 | cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { | ||
1701 | group++; | ||
1702 | goto next_group; | ||
1703 | } | ||
1704 | } | ||
1705 | group_map[cpu] = group; | ||
1706 | group_cnt[group]++; | ||
1707 | group_cnt_max = max(group_cnt_max, group_cnt[group]); | ||
1708 | } | ||
1709 | |||
1710 | /* | ||
1711 | * Expand unit size until address space usage goes over 75% | ||
1712 | * and then as much as possible without using more address | ||
1713 | * space. | ||
1714 | */ | ||
1715 | last_allocs = INT_MAX; | ||
1716 | for (upa = max_upa; upa; upa--) { | ||
1717 | int allocs = 0, wasted = 0; | ||
1718 | |||
1719 | if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | ||
1720 | continue; | ||
1721 | |||
1722 | for (group = 0; group_cnt[group]; group++) { | ||
1723 | int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); | ||
1724 | allocs += this_allocs; | ||
1725 | wasted += this_allocs * upa - group_cnt[group]; | ||
1726 | } | ||
1727 | |||
1728 | /* | ||
1729 | * Don't accept if wastage is over 25%. The | ||
1730 | * greater-than comparison ensures upa==1 always | ||
1731 | * passes the following check. | ||
1732 | */ | ||
1733 | if (wasted > num_possible_cpus() / 3) | ||
1734 | continue; | ||
1735 | |||
1736 | /* and then don't consume more memory */ | ||
1737 | if (allocs > last_allocs) | ||
1738 | break; | ||
1739 | last_allocs = allocs; | ||
1740 | best_upa = upa; | ||
1741 | } | ||
1742 | *unit_sizep = alloc_size / best_upa; | ||
1743 | |||
1744 | /* assign units to cpus accordingly */ | ||
1745 | unit = 0; | ||
1746 | for (group = 0; group_cnt[group]; group++) { | ||
1747 | for_each_possible_cpu(cpu) | ||
1748 | if (group_map[cpu] == group) | ||
1749 | unit_map[cpu] = unit++; | ||
1750 | unit = roundup(unit, best_upa); | ||
1751 | } | ||
1752 | |||
1753 | return unit; /* unit contains aligned number of units */ | ||
1754 | } | ||
1755 | |||
1756 | struct pcpul_ent { | 1798 | struct pcpul_ent { |
1757 | void *ptr; | 1799 | void *ptr; |
1758 | void *map_addr; | 1800 | void *map_addr; |
@@ -1778,43 +1820,6 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, | |||
1778 | return false; | 1820 | return false; |
1779 | } | 1821 | } |
1780 | 1822 | ||
1781 | static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, | ||
1782 | size_t reserved_size, size_t dyn_size, | ||
1783 | size_t unit_size, size_t lpage_size, | ||
1784 | const int *unit_map, int nr_units) | ||
1785 | { | ||
1786 | int width = 1, v = nr_units; | ||
1787 | char empty_str[] = "--------"; | ||
1788 | int upl, lpl; /* units per lpage, lpage per line */ | ||
1789 | unsigned int cpu; | ||
1790 | int lpage, unit; | ||
1791 | |||
1792 | while (v /= 10) | ||
1793 | width++; | ||
1794 | empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; | ||
1795 | |||
1796 | upl = max_t(int, lpage_size / unit_size, 1); | ||
1797 | lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); | ||
1798 | |||
1799 | printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, | ||
1800 | static_size, reserved_size, dyn_size, unit_size, lpage_size); | ||
1801 | |||
1802 | for (lpage = 0, unit = 0; unit < nr_units; unit++) { | ||
1803 | if (!(unit % upl)) { | ||
1804 | if (!(lpage++ % lpl)) { | ||
1805 | printk("\n"); | ||
1806 | printk("%spcpu-lpage: ", lvl); | ||
1807 | } else | ||
1808 | printk("| "); | ||
1809 | } | ||
1810 | if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) | ||
1811 | printk("%0*d ", width, cpu); | ||
1812 | else | ||
1813 | printk("%s ", empty_str); | ||
1814 | } | ||
1815 | printk("\n"); | ||
1816 | } | ||
1817 | |||
1818 | /** | 1823 | /** |
1819 | * pcpu_lpage_first_chunk - remap the first percpu chunk using large page | 1824 | * pcpu_lpage_first_chunk - remap the first percpu chunk using large page |
1820 | * @reserved_size: the size of reserved percpu area in bytes | 1825 | * @reserved_size: the size of reserved percpu area in bytes |