diff options
| -rw-r--r-- | Documentation/kernel-parameters.txt | 10 | ||||
| -rw-r--r-- | arch/x86/mm/pageattr.c | 20 | ||||
| -rw-r--r-- | include/linux/percpu.h | 16 | ||||
| -rw-r--r-- | mm/percpu.c | 241 |
4 files changed, 6 insertions, 281 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index dee9ce2e6cfa..e710093e3d32 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -1920,11 +1920,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
| 1920 | See arch/parisc/kernel/pdc_chassis.c | 1920 | See arch/parisc/kernel/pdc_chassis.c |
| 1921 | 1921 | ||
| 1922 | percpu_alloc= Select which percpu first chunk allocator to use. | 1922 | percpu_alloc= Select which percpu first chunk allocator to use. |
| 1923 | Currently supported values are "embed", "page" and | 1923 | Currently supported values are "embed" and "page". |
| 1924 | "lpage". Archs may support subset or none of the | 1924 | Archs may support subset or none of the selections. |
| 1925 | selections. See comments in mm/percpu.c for details | 1925 | See comments in mm/percpu.c for details on each |
| 1926 | on each allocator. This parameter is primarily for | 1926 | allocator. This parameter is primarily for debugging |
| 1927 | debugging and performance comparison. | 1927 | and performance comparison. |
| 1928 | 1928 | ||
| 1929 | pf. [PARIDE] | 1929 | pf. [PARIDE] |
| 1930 | See Documentation/blockdev/paride.txt. | 1930 | See Documentation/blockdev/paride.txt. |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dce282f65700..f53cfc7f963d 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
| @@ -687,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
| 687 | { | 687 | { |
| 688 | struct cpa_data alias_cpa; | 688 | struct cpa_data alias_cpa; |
| 689 | unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); | 689 | unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); |
| 690 | unsigned long vaddr, remapped; | 690 | unsigned long vaddr; |
| 691 | int ret; | 691 | int ret; |
| 692 | 692 | ||
| 693 | if (cpa->pfn >= max_pfn_mapped) | 693 | if (cpa->pfn >= max_pfn_mapped) |
| @@ -745,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
| 745 | } | 745 | } |
| 746 | #endif | 746 | #endif |
| 747 | 747 | ||
| 748 | /* | ||
| 749 | * If the PMD page was partially used for per-cpu remapping, | ||
| 750 | * the recycled area needs to be split and modified. Because | ||
| 751 | * the area is always proper subset of a PMD page | ||
| 752 | * cpa->numpages is guaranteed to be 1 for these areas, so | ||
| 753 | * there's no need to loop over and check for further remaps. | ||
| 754 | */ | ||
| 755 | remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); | ||
| 756 | if (remapped) { | ||
| 757 | WARN_ON(cpa->numpages > 1); | ||
| 758 | alias_cpa = *cpa; | ||
| 759 | alias_cpa.vaddr = &remapped; | ||
| 760 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | ||
| 761 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | ||
| 762 | if (ret) | ||
| 763 | return ret; | ||
| 764 | } | ||
| 765 | |||
| 766 | return 0; | 748 | return 0; |
| 767 | } | 749 | } |
| 768 | 750 | ||
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 25359932740e..878836ca999c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
| @@ -82,7 +82,6 @@ enum pcpu_fc { | |||
| 82 | PCPU_FC_AUTO, | 82 | PCPU_FC_AUTO, |
| 83 | PCPU_FC_EMBED, | 83 | PCPU_FC_EMBED, |
| 84 | PCPU_FC_PAGE, | 84 | PCPU_FC_PAGE, |
| 85 | PCPU_FC_LPAGE, | ||
| 86 | 85 | ||
| 87 | PCPU_FC_NR, | 86 | PCPU_FC_NR, |
| 88 | }; | 87 | }; |
| @@ -95,7 +94,6 @@ typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, | |||
| 95 | typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); | 94 | typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); |
| 96 | typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); | 95 | typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); |
| 97 | typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); | 96 | typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); |
| 98 | typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); | ||
| 99 | 97 | ||
| 100 | extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, | 98 | extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, |
| 101 | int nr_units); | 99 | int nr_units); |
| @@ -124,20 +122,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, | |||
| 124 | pcpu_fc_populate_pte_fn_t populate_pte_fn); | 122 | pcpu_fc_populate_pte_fn_t populate_pte_fn); |
| 125 | #endif | 123 | #endif |
| 126 | 124 | ||
| 127 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | ||
| 128 | extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, | ||
| 129 | pcpu_fc_alloc_fn_t alloc_fn, | ||
| 130 | pcpu_fc_free_fn_t free_fn, | ||
| 131 | pcpu_fc_map_fn_t map_fn); | ||
| 132 | |||
| 133 | extern void *pcpu_lpage_remapped(void *kaddr); | ||
| 134 | #else | ||
| 135 | static inline void *pcpu_lpage_remapped(void *kaddr) | ||
| 136 | { | ||
| 137 | return NULL; | ||
| 138 | } | ||
| 139 | #endif | ||
| 140 | |||
| 141 | /* | 125 | /* |
| 142 | * Use this to get to a cpu's version of the per-cpu object | 126 | * Use this to get to a cpu's version of the per-cpu object |
| 143 | * dynamically allocated. Non-atomic access to the current CPU's | 127 | * dynamically allocated. Non-atomic access to the current CPU's |
diff --git a/mm/percpu.c b/mm/percpu.c index c2826d05505c..77933928107d 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
| @@ -1713,7 +1713,6 @@ const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { | |||
| 1713 | [PCPU_FC_AUTO] = "auto", | 1713 | [PCPU_FC_AUTO] = "auto", |
| 1714 | [PCPU_FC_EMBED] = "embed", | 1714 | [PCPU_FC_EMBED] = "embed", |
| 1715 | [PCPU_FC_PAGE] = "page", | 1715 | [PCPU_FC_PAGE] = "page", |
| 1716 | [PCPU_FC_LPAGE] = "lpage", | ||
| 1717 | }; | 1716 | }; |
| 1718 | 1717 | ||
| 1719 | enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO; | 1718 | enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO; |
| @@ -1730,10 +1729,6 @@ static int __init percpu_alloc_setup(char *str) | |||
| 1730 | else if (!strcmp(str, "page")) | 1729 | else if (!strcmp(str, "page")) |
| 1731 | pcpu_chosen_fc = PCPU_FC_PAGE; | 1730 | pcpu_chosen_fc = PCPU_FC_PAGE; |
| 1732 | #endif | 1731 | #endif |
| 1733 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | ||
| 1734 | else if (!strcmp(str, "lpage")) | ||
| 1735 | pcpu_chosen_fc = PCPU_FC_LPAGE; | ||
| 1736 | #endif | ||
| 1737 | else | 1732 | else |
| 1738 | pr_warning("PERCPU: unknown allocator %s specified\n", str); | 1733 | pr_warning("PERCPU: unknown allocator %s specified\n", str); |
| 1739 | 1734 | ||
| @@ -1970,242 +1965,6 @@ out_free_ar: | |||
| 1970 | } | 1965 | } |
| 1971 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ | 1966 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ |
| 1972 | 1967 | ||
| 1973 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | ||
| 1974 | struct pcpul_ent { | ||
| 1975 | void *ptr; | ||
| 1976 | void *map_addr; | ||
| 1977 | }; | ||
| 1978 | |||
| 1979 | static size_t pcpul_size; | ||
| 1980 | static size_t pcpul_lpage_size; | ||
| 1981 | static int pcpul_nr_lpages; | ||
| 1982 | static struct pcpul_ent *pcpul_map; | ||
| 1983 | |||
| 1984 | static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai, | ||
| 1985 | unsigned int *cpup) | ||
| 1986 | { | ||
| 1987 | int group, cunit; | ||
| 1988 | |||
| 1989 | for (group = 0, cunit = 0; group < ai->nr_groups; group++) { | ||
| 1990 | const struct pcpu_group_info *gi = &ai->groups[group]; | ||
| 1991 | |||
| 1992 | if (unit < cunit + gi->nr_units) { | ||
| 1993 | if (cpup) | ||
| 1994 | *cpup = gi->cpu_map[unit - cunit]; | ||
| 1995 | return true; | ||
| 1996 | } | ||
| 1997 | cunit += gi->nr_units; | ||
| 1998 | } | ||
| 1999 | |||
| 2000 | return false; | ||
| 2001 | } | ||
| 2002 | |||
| 2003 | static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai) | ||
| 2004 | { | ||
| 2005 | int group, unit, i; | ||
| 2006 | |||
| 2007 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { | ||
| 2008 | const struct pcpu_group_info *gi = &ai->groups[group]; | ||
| 2009 | |||
| 2010 | for (i = 0; i < gi->nr_units; i++) | ||
| 2011 | if (gi->cpu_map[i] == cpu) | ||
| 2012 | return unit + i; | ||
| 2013 | } | ||
| 2014 | BUG(); | ||
| 2015 | } | ||
| 2016 | |||
| 2017 | /** | ||
| 2018 | * pcpu_lpage_first_chunk - remap the first percpu chunk using large page | ||
| 2019 | * @ai: pcpu_alloc_info | ||
| 2020 | * @alloc_fn: function to allocate percpu lpage, always called with lpage_size | ||
| 2021 | * @free_fn: function to free percpu memory, @size <= lpage_size | ||
| 2022 | * @map_fn: function to map percpu lpage, always called with lpage_size | ||
| 2023 | * | ||
| 2024 | * This allocator uses large page to build and map the first chunk. | ||
| 2025 | * Unlike other helpers, the caller should provide fully initialized | ||
| 2026 | * @ai. This can be done using pcpu_build_alloc_info(). This two | ||
| 2027 | * stage initialization is to allow arch code to evaluate the | ||
| 2028 | * parameters before committing to it. | ||
| 2029 | * | ||
| 2030 | * Large pages are allocated as directed by @unit_map and other | ||
| 2031 | * parameters and mapped to vmalloc space. Unused holes are returned | ||
| 2032 | * to the page allocator. Note that these holes end up being actively | ||
| 2033 | * mapped twice - once to the physical mapping and to the vmalloc area | ||
| 2034 | * for the first percpu chunk. Depending on architecture, this might | ||
| 2035 | * cause problem when changing page attributes of the returned area. | ||
| 2036 | * These double mapped areas can be detected using | ||
| 2037 | * pcpu_lpage_remapped(). | ||
| 2038 | * | ||
| 2039 | * RETURNS: | ||
| 2040 | * 0 on success, -errno on failure. | ||
| 2041 | */ | ||
| 2042 | int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, | ||
| 2043 | pcpu_fc_alloc_fn_t alloc_fn, | ||
| 2044 | pcpu_fc_free_fn_t free_fn, | ||
| 2045 | pcpu_fc_map_fn_t map_fn) | ||
| 2046 | { | ||
| 2047 | static struct vm_struct vm; | ||
| 2048 | const size_t lpage_size = ai->atom_size; | ||
| 2049 | size_t chunk_size, map_size; | ||
| 2050 | unsigned int cpu; | ||
| 2051 | int i, j, unit, nr_units, rc; | ||
| 2052 | |||
| 2053 | nr_units = 0; | ||
| 2054 | for (i = 0; i < ai->nr_groups; i++) | ||
| 2055 | nr_units += ai->groups[i].nr_units; | ||
| 2056 | |||
| 2057 | chunk_size = ai->unit_size * nr_units; | ||
| 2058 | BUG_ON(chunk_size % lpage_size); | ||
| 2059 | |||
| 2060 | pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size; | ||
| 2061 | pcpul_lpage_size = lpage_size; | ||
| 2062 | pcpul_nr_lpages = chunk_size / lpage_size; | ||
| 2063 | |||
| 2064 | /* allocate pointer array and alloc large pages */ | ||
| 2065 | map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]); | ||
| 2066 | pcpul_map = alloc_bootmem(map_size); | ||
| 2067 | |||
| 2068 | /* allocate all pages */ | ||
| 2069 | for (i = 0; i < pcpul_nr_lpages; i++) { | ||
| 2070 | size_t offset = i * lpage_size; | ||
| 2071 | int first_unit = offset / ai->unit_size; | ||
| 2072 | int last_unit = (offset + lpage_size - 1) / ai->unit_size; | ||
| 2073 | void *ptr; | ||
| 2074 | |||
| 2075 | /* find out which cpu is mapped to this unit */ | ||
| 2076 | for (unit = first_unit; unit <= last_unit; unit++) | ||
| 2077 | if (pcpul_unit_to_cpu(unit, ai, &cpu)) | ||
| 2078 | goto found; | ||
| 2079 | continue; | ||
| 2080 | found: | ||
| 2081 | ptr = alloc_fn(cpu, lpage_size, lpage_size); | ||
| 2082 | if (!ptr) { | ||
| 2083 | pr_warning("PERCPU: failed to allocate large page " | ||
| 2084 | "for cpu%u\n", cpu); | ||
| 2085 | goto enomem; | ||
| 2086 | } | ||
| 2087 | |||
| 2088 | pcpul_map[i].ptr = ptr; | ||
| 2089 | } | ||
| 2090 | |||
| 2091 | /* return unused holes */ | ||
| 2092 | for (unit = 0; unit < nr_units; unit++) { | ||
| 2093 | size_t start = unit * ai->unit_size; | ||
| 2094 | size_t end = start + ai->unit_size; | ||
| 2095 | size_t off, next; | ||
| 2096 | |||
| 2097 | /* don't free used part of occupied unit */ | ||
| 2098 | if (pcpul_unit_to_cpu(unit, ai, NULL)) | ||
| 2099 | start += pcpul_size; | ||
| 2100 | |||
| 2101 | /* unit can span more than one page, punch the holes */ | ||
| 2102 | for (off = start; off < end; off = next) { | ||
| 2103 | void *ptr = pcpul_map[off / lpage_size].ptr; | ||
| 2104 | next = min(roundup(off + 1, lpage_size), end); | ||
| 2105 | if (ptr) | ||
| 2106 | free_fn(ptr + off % lpage_size, next - off); | ||
| 2107 | } | ||
| 2108 | } | ||
| 2109 | |||
| 2110 | /* allocate address, map and copy */ | ||
| 2111 | vm.flags = VM_ALLOC; | ||
| 2112 | vm.size = chunk_size; | ||
| 2113 | vm_area_register_early(&vm, ai->unit_size); | ||
| 2114 | |||
| 2115 | for (i = 0; i < pcpul_nr_lpages; i++) { | ||
| 2116 | if (!pcpul_map[i].ptr) | ||
| 2117 | continue; | ||
| 2118 | pcpul_map[i].map_addr = vm.addr + i * lpage_size; | ||
| 2119 | map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr); | ||
| 2120 | } | ||
| 2121 | |||
| 2122 | for_each_possible_cpu(cpu) | ||
| 2123 | memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size, | ||
| 2124 | __per_cpu_load, ai->static_size); | ||
| 2125 | |||
| 2126 | /* we're ready, commit */ | ||
| 2127 | pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n", | ||
| 2128 | vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size, | ||
| 2129 | ai->unit_size); | ||
| 2130 | |||
| 2131 | rc = pcpu_setup_first_chunk(ai, vm.addr); | ||
| 2132 | |||
| 2133 | /* | ||
| 2134 | * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped | ||
| 2135 | * lpages are pushed to the end and trimmed. | ||
| 2136 | */ | ||
| 2137 | for (i = 0; i < pcpul_nr_lpages - 1; i++) | ||
| 2138 | for (j = i + 1; j < pcpul_nr_lpages; j++) { | ||
| 2139 | struct pcpul_ent tmp; | ||
| 2140 | |||
| 2141 | if (!pcpul_map[j].ptr) | ||
| 2142 | continue; | ||
| 2143 | if (pcpul_map[i].ptr && | ||
| 2144 | pcpul_map[i].ptr < pcpul_map[j].ptr) | ||
| 2145 | continue; | ||
| 2146 | |||
| 2147 | tmp = pcpul_map[i]; | ||
| 2148 | pcpul_map[i] = pcpul_map[j]; | ||
| 2149 | pcpul_map[j] = tmp; | ||
| 2150 | } | ||
| 2151 | |||
| 2152 | while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr) | ||
| 2153 | pcpul_nr_lpages--; | ||
| 2154 | |||
| 2155 | return rc; | ||
| 2156 | |||
| 2157 | enomem: | ||
| 2158 | for (i = 0; i < pcpul_nr_lpages; i++) | ||
| 2159 | if (pcpul_map[i].ptr) | ||
| 2160 | free_fn(pcpul_map[i].ptr, lpage_size); | ||
| 2161 | free_bootmem(__pa(pcpul_map), map_size); | ||
| 2162 | return -ENOMEM; | ||
| 2163 | } | ||
| 2164 | |||
| 2165 | /** | ||
| 2166 | * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area | ||
| 2167 | * @kaddr: the kernel address in question | ||
| 2168 | * | ||
| 2169 | * Determine whether @kaddr falls in the pcpul recycled area. This is | ||
| 2170 | * used by pageattr to detect VM aliases and break up the pcpu large | ||
| 2171 | * page mapping such that the same physical page is not mapped under | ||
| 2172 | * different attributes. | ||
| 2173 | * | ||
| 2174 | * The recycled area is always at the tail of a partially used large | ||
| 2175 | * page. | ||
| 2176 | * | ||
| 2177 | * RETURNS: | ||
| 2178 | * Address of corresponding remapped pcpu address if match is found; | ||
| 2179 | * otherwise, NULL. | ||
| 2180 | */ | ||
| 2181 | void *pcpu_lpage_remapped(void *kaddr) | ||
| 2182 | { | ||
| 2183 | unsigned long lpage_mask = pcpul_lpage_size - 1; | ||
| 2184 | void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask); | ||
| 2185 | unsigned long offset = (unsigned long)kaddr & lpage_mask; | ||
| 2186 | int left = 0, right = pcpul_nr_lpages - 1; | ||
| 2187 | int pos; | ||
| 2188 | |||
| 2189 | /* pcpul in use at all? */ | ||
| 2190 | if (!pcpul_map) | ||
| 2191 | return NULL; | ||
| 2192 | |||
| 2193 | /* okay, perform binary search */ | ||
| 2194 | while (left <= right) { | ||
| 2195 | pos = (left + right) / 2; | ||
| 2196 | |||
| 2197 | if (pcpul_map[pos].ptr < lpage_addr) | ||
| 2198 | left = pos + 1; | ||
| 2199 | else if (pcpul_map[pos].ptr > lpage_addr) | ||
| 2200 | right = pos - 1; | ||
| 2201 | else | ||
| 2202 | return pcpul_map[pos].map_addr + offset; | ||
| 2203 | } | ||
| 2204 | |||
| 2205 | return NULL; | ||
| 2206 | } | ||
| 2207 | #endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */ | ||
| 2208 | |||
| 2209 | /* | 1968 | /* |
| 2210 | * Generic percpu area setup. | 1969 | * Generic percpu area setup. |
| 2211 | * | 1970 | * |
