diff options
Diffstat (limited to 'mm/percpu.c')
-rw-r--r-- | mm/percpu.c | 113 |
1 files changed, 85 insertions, 28 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index cc9c4c64606d..c2826d05505c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -1747,15 +1747,25 @@ early_param("percpu_alloc", percpu_alloc_setup); | |||
1747 | * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem | 1747 | * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem |
1748 | * @reserved_size: the size of reserved percpu area in bytes | 1748 | * @reserved_size: the size of reserved percpu area in bytes |
1749 | * @dyn_size: free size for dynamic allocation in bytes, -1 for auto | 1749 | * @dyn_size: free size for dynamic allocation in bytes, -1 for auto |
1750 | * @atom_size: allocation atom size | ||
1751 | * @cpu_distance_fn: callback to determine distance between cpus, optional | ||
1752 | * @alloc_fn: function to allocate percpu page | ||
1753 | * @free_fn: funtion to free percpu page | ||
1750 | * | 1754 | * |
1751 | * This is a helper to ease setting up embedded first percpu chunk and | 1755 | * This is a helper to ease setting up embedded first percpu chunk and |
1752 | * can be called where pcpu_setup_first_chunk() is expected. | 1756 | * can be called where pcpu_setup_first_chunk() is expected. |
1753 | * | 1757 | * |
1754 | * If this function is used to setup the first chunk, it is allocated | 1758 | * If this function is used to setup the first chunk, it is allocated |
1755 | * as a contiguous area using bootmem allocator and used as-is without | 1759 | * by calling @alloc_fn and used as-is without being mapped into |
1756 | * being mapped into vmalloc area. This enables the first chunk to | 1760 | * vmalloc area. Allocations are always whole multiples of @atom_size |
1757 | * piggy back on the linear physical mapping which often uses larger | 1761 | * aligned to @atom_size. |
1758 | * page size. | 1762 | * |
1763 | * This enables the first chunk to piggy back on the linear physical | ||
1764 | * mapping which often uses larger page size. Please note that this | ||
1765 | * can result in very sparse cpu->unit mapping on NUMA machines thus | ||
1766 | * requiring large vmalloc address space. Don't use this allocator if | ||
1767 | * vmalloc space is not orders of magnitude larger than distances | ||
1768 | * between node memory addresses (ie. 32bit NUMA machines). | ||
1759 | * | 1769 | * |
1760 | * When @dyn_size is positive, dynamic area might be larger than | 1770 | * When @dyn_size is positive, dynamic area might be larger than |
1761 | * specified to fill page alignment. When @dyn_size is auto, | 1771 | * specified to fill page alignment. When @dyn_size is auto, |
@@ -1763,53 +1773,88 @@ early_param("percpu_alloc", percpu_alloc_setup); | |||
1763 | * and reserved areas. | 1773 | * and reserved areas. |
1764 | * | 1774 | * |
1765 | * If the needed size is smaller than the minimum or specified unit | 1775 | * If the needed size is smaller than the minimum or specified unit |
1766 | * size, the leftover is returned to the bootmem allocator. | 1776 | * size, the leftover is returned using @free_fn. |
1767 | * | 1777 | * |
1768 | * RETURNS: | 1778 | * RETURNS: |
1769 | * 0 on success, -errno on failure. | 1779 | * 0 on success, -errno on failure. |
1770 | */ | 1780 | */ |
1771 | int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) | 1781 | int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, |
1782 | size_t atom_size, | ||
1783 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn, | ||
1784 | pcpu_fc_alloc_fn_t alloc_fn, | ||
1785 | pcpu_fc_free_fn_t free_fn) | ||
1772 | { | 1786 | { |
1787 | void *base = (void *)ULONG_MAX; | ||
1788 | void **areas = NULL; | ||
1773 | struct pcpu_alloc_info *ai; | 1789 | struct pcpu_alloc_info *ai; |
1774 | size_t size_sum, chunk_size; | 1790 | size_t size_sum, areas_size; |
1775 | void *base; | 1791 | int group, i, rc; |
1776 | int unit; | ||
1777 | int rc; | ||
1778 | 1792 | ||
1779 | ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL); | 1793 | ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, |
1794 | cpu_distance_fn); | ||
1780 | if (IS_ERR(ai)) | 1795 | if (IS_ERR(ai)) |
1781 | return PTR_ERR(ai); | 1796 | return PTR_ERR(ai); |
1782 | BUG_ON(ai->nr_groups != 1); | ||
1783 | BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); | ||
1784 | 1797 | ||
1785 | size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; | 1798 | size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; |
1786 | chunk_size = ai->unit_size * num_possible_cpus(); | 1799 | areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *)); |
1787 | 1800 | ||
1788 | base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, | 1801 | areas = alloc_bootmem_nopanic(areas_size); |
1789 | __pa(MAX_DMA_ADDRESS)); | 1802 | if (!areas) { |
1790 | if (!base) { | ||
1791 | pr_warning("PERCPU: failed to allocate %zu bytes for " | ||
1792 | "embedding\n", chunk_size); | ||
1793 | rc = -ENOMEM; | 1803 | rc = -ENOMEM; |
1794 | goto out_free_ai; | 1804 | goto out_free; |
1795 | } | 1805 | } |
1796 | 1806 | ||
1797 | /* return the leftover and copy */ | 1807 | /* allocate, copy and determine base address */ |
1798 | for (unit = 0; unit < num_possible_cpus(); unit++) { | 1808 | for (group = 0; group < ai->nr_groups; group++) { |
1799 | void *ptr = base + unit * ai->unit_size; | 1809 | struct pcpu_group_info *gi = &ai->groups[group]; |
1810 | unsigned int cpu = NR_CPUS; | ||
1811 | void *ptr; | ||
1812 | |||
1813 | for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++) | ||
1814 | cpu = gi->cpu_map[i]; | ||
1815 | BUG_ON(cpu == NR_CPUS); | ||
1816 | |||
1817 | /* allocate space for the whole group */ | ||
1818 | ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size); | ||
1819 | if (!ptr) { | ||
1820 | rc = -ENOMEM; | ||
1821 | goto out_free_areas; | ||
1822 | } | ||
1823 | areas[group] = ptr; | ||
1800 | 1824 | ||
1801 | free_bootmem(__pa(ptr + size_sum), ai->unit_size - size_sum); | 1825 | base = min(ptr, base); |
1802 | memcpy(ptr, __per_cpu_load, ai->static_size); | 1826 | |
1827 | for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) { | ||
1828 | if (gi->cpu_map[i] == NR_CPUS) { | ||
1829 | /* unused unit, free whole */ | ||
1830 | free_fn(ptr, ai->unit_size); | ||
1831 | continue; | ||
1832 | } | ||
1833 | /* copy and return the unused part */ | ||
1834 | memcpy(ptr, __per_cpu_load, ai->static_size); | ||
1835 | free_fn(ptr + size_sum, ai->unit_size - size_sum); | ||
1836 | } | ||
1803 | } | 1837 | } |
1804 | 1838 | ||
1805 | /* we're ready, commit */ | 1839 | /* base address is now known, determine group base offsets */ |
1840 | for (group = 0; group < ai->nr_groups; group++) | ||
1841 | ai->groups[group].base_offset = areas[group] - base; | ||
1842 | |||
1806 | pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", | 1843 | pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", |
1807 | PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, | 1844 | PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, |
1808 | ai->dyn_size, ai->unit_size); | 1845 | ai->dyn_size, ai->unit_size); |
1809 | 1846 | ||
1810 | rc = pcpu_setup_first_chunk(ai, base); | 1847 | rc = pcpu_setup_first_chunk(ai, base); |
1811 | out_free_ai: | 1848 | goto out_free; |
1849 | |||
1850 | out_free_areas: | ||
1851 | for (group = 0; group < ai->nr_groups; group++) | ||
1852 | free_fn(areas[group], | ||
1853 | ai->groups[group].nr_units * ai->unit_size); | ||
1854 | out_free: | ||
1812 | pcpu_free_alloc_info(ai); | 1855 | pcpu_free_alloc_info(ai); |
1856 | if (areas) | ||
1857 | free_bootmem(__pa(areas), areas_size); | ||
1813 | return rc; | 1858 | return rc; |
1814 | } | 1859 | } |
1815 | #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || | 1860 | #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || |
@@ -2177,6 +2222,17 @@ void *pcpu_lpage_remapped(void *kaddr) | |||
2177 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | 2222 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; |
2178 | EXPORT_SYMBOL(__per_cpu_offset); | 2223 | EXPORT_SYMBOL(__per_cpu_offset); |
2179 | 2224 | ||
2225 | static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, | ||
2226 | size_t align) | ||
2227 | { | ||
2228 | return __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); | ||
2229 | } | ||
2230 | |||
2231 | static void __init pcpu_dfl_fc_free(void *ptr, size_t size) | ||
2232 | { | ||
2233 | free_bootmem(__pa(ptr), size); | ||
2234 | } | ||
2235 | |||
2180 | void __init setup_per_cpu_areas(void) | 2236 | void __init setup_per_cpu_areas(void) |
2181 | { | 2237 | { |
2182 | unsigned long delta; | 2238 | unsigned long delta; |
@@ -2188,7 +2244,8 @@ void __init setup_per_cpu_areas(void) | |||
2188 | * what the legacy allocator did. | 2244 | * what the legacy allocator did. |
2189 | */ | 2245 | */ |
2190 | rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, | 2246 | rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, |
2191 | PERCPU_DYNAMIC_RESERVE); | 2247 | PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, |
2248 | pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); | ||
2192 | if (rc < 0) | 2249 | if (rc < 0) |
2193 | panic("Failed to initialized percpu areas."); | 2250 | panic("Failed to initialized percpu areas."); |
2194 | 2251 | ||