aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/percpu.c339
1 files changed, 172 insertions, 167 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 3316e3aac7ee..2b9c4b2a2fc0 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1231,6 +1231,178 @@ void free_percpu(void *ptr)
1231} 1231}
1232EXPORT_SYMBOL_GPL(free_percpu); 1232EXPORT_SYMBOL_GPL(free_percpu);
1233 1233
1234static inline size_t pcpu_calc_fc_sizes(size_t static_size,
1235 size_t reserved_size,
1236 ssize_t *dyn_sizep)
1237{
1238 size_t size_sum;
1239
1240 size_sum = PFN_ALIGN(static_size + reserved_size +
1241 (*dyn_sizep >= 0 ? *dyn_sizep : 0));
1242 if (*dyn_sizep != 0)
1243 *dyn_sizep = size_sum - static_size - reserved_size;
1244
1245 return size_sum;
1246}
1247
1248#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
1249/**
1250 * pcpu_lpage_build_unit_map - build unit_map for large page remapping
1251 * @reserved_size: the size of reserved percpu area in bytes
1252 * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
1253 * @unit_sizep: out parameter for unit size
1254 * @unit_map: unit_map to be filled
1255 * @cpu_distance_fn: callback to determine distance between cpus
1256 *
1257 * This function builds cpu -> unit map and determine other parameters
1258 * considering needed percpu size, large page size and distances
1259 * between CPUs in NUMA.
1260 *
1261 * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
1262 * may share units in the same large page. The returned configuration
1263 * is guaranteed to have CPUs on different nodes on different large
1264 * pages and >=75% usage of allocated virtual address space.
1265 *
1266 * RETURNS:
1267 * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
1268 * returns the number of units to be allocated. -errno on failure.
1269 */
1270int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
1271 size_t *unit_sizep, size_t lpage_size,
1272 int *unit_map,
1273 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
1274{
1275 static int group_map[NR_CPUS] __initdata;
1276 static int group_cnt[NR_CPUS] __initdata;
1277 const size_t static_size = __per_cpu_end - __per_cpu_start;
1278 int group_cnt_max = 0;
1279 size_t size_sum, min_unit_size, alloc_size;
1280 int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
1281 int last_allocs;
1282 unsigned int cpu, tcpu;
1283 int group, unit;
1284
1285 /*
1286 * Determine min_unit_size, alloc_size and max_upa such that
1287 * alloc_size is multiple of lpage_size and is the smallest
1288 * which can accomodate 4k aligned segments which are equal to
1289 * or larger than min_unit_size.
1290 */
1291 size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
1292 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1293
1294 alloc_size = roundup(min_unit_size, lpage_size);
1295 upa = alloc_size / min_unit_size;
1296 while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1297 upa--;
1298 max_upa = upa;
1299
1300 /* group cpus according to their proximity */
1301 for_each_possible_cpu(cpu) {
1302 group = 0;
1303 next_group:
1304 for_each_possible_cpu(tcpu) {
1305 if (cpu == tcpu)
1306 break;
1307 if (group_map[tcpu] == group &&
1308 (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
1309 cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
1310 group++;
1311 goto next_group;
1312 }
1313 }
1314 group_map[cpu] = group;
1315 group_cnt[group]++;
1316 group_cnt_max = max(group_cnt_max, group_cnt[group]);
1317 }
1318
1319 /*
1320 * Expand unit size until address space usage goes over 75%
1321 * and then as much as possible without using more address
1322 * space.
1323 */
1324 last_allocs = INT_MAX;
1325 for (upa = max_upa; upa; upa--) {
1326 int allocs = 0, wasted = 0;
1327
1328 if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1329 continue;
1330
1331 for (group = 0; group_cnt[group]; group++) {
1332 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
1333 allocs += this_allocs;
1334 wasted += this_allocs * upa - group_cnt[group];
1335 }
1336
1337 /*
1338 * Don't accept if wastage is over 25%. The
1339 * greater-than comparison ensures upa==1 always
1340 * passes the following check.
1341 */
1342 if (wasted > num_possible_cpus() / 3)
1343 continue;
1344
1345 /* and then don't consume more memory */
1346 if (allocs > last_allocs)
1347 break;
1348 last_allocs = allocs;
1349 best_upa = upa;
1350 }
1351 *unit_sizep = alloc_size / best_upa;
1352
1353 /* assign units to cpus accordingly */
1354 unit = 0;
1355 for (group = 0; group_cnt[group]; group++) {
1356 for_each_possible_cpu(cpu)
1357 if (group_map[cpu] == group)
1358 unit_map[cpu] = unit++;
1359 unit = roundup(unit, best_upa);
1360 }
1361
1362 return unit; /* unit contains aligned number of units */
1363}
1364
1365static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
1366 unsigned int *cpup);
1367
1368static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
1369 size_t reserved_size, size_t dyn_size,
1370 size_t unit_size, size_t lpage_size,
1371 const int *unit_map, int nr_units)
1372{
1373 int width = 1, v = nr_units;
1374 char empty_str[] = "--------";
1375 int upl, lpl; /* units per lpage, lpage per line */
1376 unsigned int cpu;
1377 int lpage, unit;
1378
1379 while (v /= 10)
1380 width++;
1381 empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
1382
1383 upl = max_t(int, lpage_size / unit_size, 1);
1384 lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
1385
1386 printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
1387 static_size, reserved_size, dyn_size, unit_size, lpage_size);
1388
1389 for (lpage = 0, unit = 0; unit < nr_units; unit++) {
1390 if (!(unit % upl)) {
1391 if (!(lpage++ % lpl)) {
1392 printk("\n");
1393 printk("%spcpu-lpage: ", lvl);
1394 } else
1395 printk("| ");
1396 }
1397 if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
1398 printk("%0*d ", width, cpu);
1399 else
1400 printk("%s ", empty_str);
1401 }
1402 printk("\n");
1403}
1404#endif
1405
1234/** 1406/**
1235 * pcpu_setup_first_chunk - initialize the first percpu chunk 1407 * pcpu_setup_first_chunk - initialize the first percpu chunk
1236 * @static_size: the size of static percpu area in bytes 1408 * @static_size: the size of static percpu area in bytes
@@ -1441,20 +1613,6 @@ static int __init percpu_alloc_setup(char *str)
1441} 1613}
1442early_param("percpu_alloc", percpu_alloc_setup); 1614early_param("percpu_alloc", percpu_alloc_setup);
1443 1615
1444static inline size_t pcpu_calc_fc_sizes(size_t static_size,
1445 size_t reserved_size,
1446 ssize_t *dyn_sizep)
1447{
1448 size_t size_sum;
1449
1450 size_sum = PFN_ALIGN(static_size + reserved_size +
1451 (*dyn_sizep >= 0 ? *dyn_sizep : 0));
1452 if (*dyn_sizep != 0)
1453 *dyn_sizep = size_sum - static_size - reserved_size;
1454
1455 return size_sum;
1456}
1457
1458#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ 1616#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
1459 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) 1617 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
1460/** 1618/**
@@ -1637,122 +1795,6 @@ out_free_ar:
1637#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ 1795#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
1638 1796
1639#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK 1797#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
1640/**
1641 * pcpu_lpage_build_unit_map - build unit_map for large page remapping
1642 * @reserved_size: the size of reserved percpu area in bytes
1643 * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
1644 * @unit_sizep: out parameter for unit size
1645 * @unit_map: unit_map to be filled
1646 * @cpu_distance_fn: callback to determine distance between cpus
1647 *
1648 * This function builds cpu -> unit map and determine other parameters
1649 * considering needed percpu size, large page size and distances
1650 * between CPUs in NUMA.
1651 *
1652 * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
1653 * may share units in the same large page. The returned configuration
1654 * is guaranteed to have CPUs on different nodes on different large
1655 * pages and >=75% usage of allocated virtual address space.
1656 *
1657 * RETURNS:
1658 * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
1659 * returns the number of units to be allocated. -errno on failure.
1660 */
1661int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
1662 size_t *unit_sizep, size_t lpage_size,
1663 int *unit_map,
1664 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
1665{
1666 static int group_map[NR_CPUS] __initdata;
1667 static int group_cnt[NR_CPUS] __initdata;
1668 const size_t static_size = __per_cpu_end - __per_cpu_start;
1669 int group_cnt_max = 0;
1670 size_t size_sum, min_unit_size, alloc_size;
1671 int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
1672 int last_allocs;
1673 unsigned int cpu, tcpu;
1674 int group, unit;
1675
1676 /*
1677 * Determine min_unit_size, alloc_size and max_upa such that
1678 * alloc_size is multiple of lpage_size and is the smallest
1679 * which can accomodate 4k aligned segments which are equal to
1680 * or larger than min_unit_size.
1681 */
1682 size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
1683 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1684
1685 alloc_size = roundup(min_unit_size, lpage_size);
1686 upa = alloc_size / min_unit_size;
1687 while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1688 upa--;
1689 max_upa = upa;
1690
1691 /* group cpus according to their proximity */
1692 for_each_possible_cpu(cpu) {
1693 group = 0;
1694 next_group:
1695 for_each_possible_cpu(tcpu) {
1696 if (cpu == tcpu)
1697 break;
1698 if (group_map[tcpu] == group &&
1699 (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
1700 cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
1701 group++;
1702 goto next_group;
1703 }
1704 }
1705 group_map[cpu] = group;
1706 group_cnt[group]++;
1707 group_cnt_max = max(group_cnt_max, group_cnt[group]);
1708 }
1709
1710 /*
1711 * Expand unit size until address space usage goes over 75%
1712 * and then as much as possible without using more address
1713 * space.
1714 */
1715 last_allocs = INT_MAX;
1716 for (upa = max_upa; upa; upa--) {
1717 int allocs = 0, wasted = 0;
1718
1719 if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1720 continue;
1721
1722 for (group = 0; group_cnt[group]; group++) {
1723 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
1724 allocs += this_allocs;
1725 wasted += this_allocs * upa - group_cnt[group];
1726 }
1727
1728 /*
1729 * Don't accept if wastage is over 25%. The
1730 * greater-than comparison ensures upa==1 always
1731 * passes the following check.
1732 */
1733 if (wasted > num_possible_cpus() / 3)
1734 continue;
1735
1736 /* and then don't consume more memory */
1737 if (allocs > last_allocs)
1738 break;
1739 last_allocs = allocs;
1740 best_upa = upa;
1741 }
1742 *unit_sizep = alloc_size / best_upa;
1743
1744 /* assign units to cpus accordingly */
1745 unit = 0;
1746 for (group = 0; group_cnt[group]; group++) {
1747 for_each_possible_cpu(cpu)
1748 if (group_map[cpu] == group)
1749 unit_map[cpu] = unit++;
1750 unit = roundup(unit, best_upa);
1751 }
1752
1753 return unit; /* unit contains aligned number of units */
1754}
1755
1756struct pcpul_ent { 1798struct pcpul_ent {
1757 void *ptr; 1799 void *ptr;
1758 void *map_addr; 1800 void *map_addr;
@@ -1778,43 +1820,6 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
1778 return false; 1820 return false;
1779} 1821}
1780 1822
1781static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
1782 size_t reserved_size, size_t dyn_size,
1783 size_t unit_size, size_t lpage_size,
1784 const int *unit_map, int nr_units)
1785{
1786 int width = 1, v = nr_units;
1787 char empty_str[] = "--------";
1788 int upl, lpl; /* units per lpage, lpage per line */
1789 unsigned int cpu;
1790 int lpage, unit;
1791
1792 while (v /= 10)
1793 width++;
1794 empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
1795
1796 upl = max_t(int, lpage_size / unit_size, 1);
1797 lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
1798
1799 printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
1800 static_size, reserved_size, dyn_size, unit_size, lpage_size);
1801
1802 for (lpage = 0, unit = 0; unit < nr_units; unit++) {
1803 if (!(unit % upl)) {
1804 if (!(lpage++ % lpl)) {
1805 printk("\n");
1806 printk("%spcpu-lpage: ", lvl);
1807 } else
1808 printk("| ");
1809 }
1810 if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
1811 printk("%0*d ", width, cpu);
1812 else
1813 printk("%s ", empty_str);
1814 }
1815 printk("\n");
1816}
1817
1818/** 1823/**
1819 * pcpu_lpage_first_chunk - remap the first percpu chunk using large page 1824 * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
1820 * @reserved_size: the size of reserved percpu area in bytes 1825 * @reserved_size: the size of reserved percpu area in bytes