aboutsummaryrefslogtreecommitdiffstats
path: root/mm/percpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/percpu.c')
-rw-r--r--mm/percpu.c209
1 files changed, 205 insertions, 4 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index f3fe7bc7378f..17db527ee2e2 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1190,6 +1190,19 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1190 return pcpu_unit_size; 1190 return pcpu_unit_size;
1191} 1191}
1192 1192
1193static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size,
1194 ssize_t *dyn_sizep)
1195{
1196 size_t size_sum;
1197
1198 size_sum = PFN_ALIGN(static_size + reserved_size +
1199 (*dyn_sizep >= 0 ? *dyn_sizep : 0));
1200 if (*dyn_sizep != 0)
1201 *dyn_sizep = size_sum - static_size - reserved_size;
1202
1203 return size_sum;
1204}
1205
1193/* 1206/*
1194 * Embedding first chunk setup helper. 1207 * Embedding first chunk setup helper.
1195 */ 1208 */
@@ -1241,10 +1254,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1241 unsigned int cpu; 1254 unsigned int cpu;
1242 1255
1243 /* determine parameters and allocate */ 1256 /* determine parameters and allocate */
1244 pcpue_size = PFN_ALIGN(static_size + reserved_size + 1257 pcpue_size = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
1245 (dyn_size >= 0 ? dyn_size : 0));
1246 if (dyn_size != 0)
1247 dyn_size = pcpue_size - static_size - reserved_size;
1248 1258
1249 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); 1259 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
1250 chunk_size = pcpue_unit_size * num_possible_cpus(); 1260 chunk_size = pcpue_unit_size * num_possible_cpus();
@@ -1391,6 +1401,197 @@ out_free_ar:
1391} 1401}
1392 1402
1393/* 1403/*
1404 * Large page remapping first chunk setup helper
1405 */
1406#ifdef CONFIG_NEED_MULTIPLE_NODES
1407struct pcpul_ent {
1408 unsigned int cpu;
1409 void *ptr;
1410};
1411
1412static size_t pcpul_size;
1413static size_t pcpul_unit_size;
1414static struct pcpul_ent *pcpul_map;
1415static struct vm_struct pcpul_vm;
1416
1417static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
1418{
1419 size_t off = (size_t)pageno << PAGE_SHIFT;
1420
1421 if (off >= pcpul_size)
1422 return NULL;
1423
1424 return virt_to_page(pcpul_map[cpu].ptr + off);
1425}
1426
1427/**
1428 * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
1429 * @static_size: the size of static percpu area in bytes
1430 * @reserved_size: the size of reserved percpu area in bytes
1431 * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
1432 * @lpage_size: the size of a large page
1433 * @alloc_fn: function to allocate percpu lpage, always called with lpage_size
1434 * @free_fn: function to free percpu memory, @size <= lpage_size
1435 * @map_fn: function to map percpu lpage, always called with lpage_size
1436 *
1437 * This allocator uses large page as unit. A large page is allocated
1438 * for each cpu and each is remapped into vmalloc area using large
1439 * page mapping. As large page can be quite large, only part of it is
1440 * used for the first chunk. Unused part is returned to the bootmem
1441 * allocator.
1442 *
1443 * So, the large pages are mapped twice - once to the physical mapping
1444 * and to the vmalloc area for the first percpu chunk. The double
1445 * mapping does add one more large TLB entry pressure but still is
1446 * much better than only using 4k mappings while still being NUMA
1447 * friendly.
1448 *
1449 * RETURNS:
1450 * The determined pcpu_unit_size which can be used to initialize
1451 * percpu access on success, -errno on failure.
1452 */
1453ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
1454 ssize_t dyn_size, size_t lpage_size,
1455 pcpu_fc_alloc_fn_t alloc_fn,
1456 pcpu_fc_free_fn_t free_fn,
1457 pcpu_fc_map_fn_t map_fn)
1458{
1459 size_t size_sum;
1460 size_t map_size;
1461 unsigned int cpu;
1462 int i, j;
1463 ssize_t ret;
1464
1465 /*
1466 * Currently supports only single page. Supporting multiple
1467 * pages won't be too difficult if it ever becomes necessary.
1468 */
1469 size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
1470
1471 pcpul_unit_size = lpage_size;
1472 pcpul_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1473 if (pcpul_size > pcpul_unit_size) {
1474 pr_warning("PERCPU: static data is larger than large page, "
1475 "can't use large page\n");
1476 return -EINVAL;
1477 }
1478
1479 /* allocate pointer array and alloc large pages */
1480 map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
1481 pcpul_map = alloc_bootmem(map_size);
1482
1483 for_each_possible_cpu(cpu) {
1484 void *ptr;
1485
1486 ptr = alloc_fn(cpu, lpage_size);
1487 if (!ptr) {
1488 pr_warning("PERCPU: failed to allocate large page "
1489 "for cpu%u\n", cpu);
1490 goto enomem;
1491 }
1492
1493 /*
1494 * Only use pcpul_size bytes and give back the rest.
1495 *
1496 * Ingo: The lpage_size up-rounding bootmem is needed
1497 * to make sure the partial lpage is still fully RAM -
1498 * it's not well-specified to have a incompatible area
1499 * (unmapped RAM, device memory, etc.) in that hole.
1500 */
1501 free_fn(ptr + pcpul_size, lpage_size - pcpul_size);
1502
1503 pcpul_map[cpu].cpu = cpu;
1504 pcpul_map[cpu].ptr = ptr;
1505
1506 memcpy(ptr, __per_cpu_load, static_size);
1507 }
1508
1509 /* allocate address and map */
1510 pcpul_vm.flags = VM_ALLOC;
1511 pcpul_vm.size = num_possible_cpus() * pcpul_unit_size;
1512 vm_area_register_early(&pcpul_vm, pcpul_unit_size);
1513
1514 for_each_possible_cpu(cpu)
1515 map_fn(pcpul_map[cpu].ptr, pcpul_unit_size,
1516 pcpul_vm.addr + cpu * pcpul_unit_size);
1517
1518 /* we're ready, commit */
1519 pr_info("PERCPU: Remapped at %p with large pages, static data "
1520 "%zu bytes\n", pcpul_vm.addr, static_size);
1521
1522 ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
1523 reserved_size, dyn_size, pcpul_unit_size,
1524 pcpul_vm.addr, NULL);
1525
1526 /* sort pcpul_map array for pcpu_lpage_remapped() */
1527 for (i = 0; i < num_possible_cpus() - 1; i++)
1528 for (j = i + 1; j < num_possible_cpus(); j++)
1529 if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
1530 struct pcpul_ent tmp = pcpul_map[i];
1531 pcpul_map[i] = pcpul_map[j];
1532 pcpul_map[j] = tmp;
1533 }
1534
1535 return ret;
1536
1537enomem:
1538 for_each_possible_cpu(cpu)
1539 if (pcpul_map[cpu].ptr)
1540 free_fn(pcpul_map[cpu].ptr, pcpul_size);
1541 free_bootmem(__pa(pcpul_map), map_size);
1542 return -ENOMEM;
1543}
1544
1545/**
1546 * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
1547 * @kaddr: the kernel address in question
1548 *
1549 * Determine whether @kaddr falls in the pcpul recycled area. This is
1550 * used by pageattr to detect VM aliases and break up the pcpu large
1551 * page mapping such that the same physical page is not mapped under
1552 * different attributes.
1553 *
1554 * The recycled area is always at the tail of a partially used large
1555 * page.
1556 *
1557 * RETURNS:
1558 * Address of corresponding remapped pcpu address if match is found;
1559 * otherwise, NULL.
1560 */
1561void *pcpu_lpage_remapped(void *kaddr)
1562{
1563 unsigned long unit_mask = pcpul_unit_size - 1;
1564 void *lpage_addr = (void *)((unsigned long)kaddr & ~unit_mask);
1565 unsigned long offset = (unsigned long)kaddr & unit_mask;
1566 int left = 0, right = num_possible_cpus() - 1;
1567 int pos;
1568
1569 /* pcpul in use at all? */
1570 if (!pcpul_map)
1571 return NULL;
1572
1573 /* okay, perform binary search */
1574 while (left <= right) {
1575 pos = (left + right) / 2;
1576
1577 if (pcpul_map[pos].ptr < lpage_addr)
1578 left = pos + 1;
1579 else if (pcpul_map[pos].ptr > lpage_addr)
1580 right = pos - 1;
1581 else {
1582 /* it shouldn't be in the area for the first chunk */
1583 WARN_ON(offset < pcpul_size);
1584
1585 return pcpul_vm.addr +
1586 pcpul_map[pos].cpu * pcpul_unit_size + offset;
1587 }
1588 }
1589
1590 return NULL;
1591}
1592#endif
1593
1594/*
1394 * Generic percpu area setup. 1595 * Generic percpu area setup.
1395 * 1596 *
1396 * The embedding helper is used because its behavior closely resembles 1597 * The embedding helper is used because its behavior closely resembles