diff options
author | Tejun Heo <tj@kernel.org> | 2009-08-14 02:00:53 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2009-08-14 02:00:53 -0400 |
commit | e933a73f48e3b2d40cfa56d81e2646f194b5a66a (patch) | |
tree | e828fbdac9ff888a3e8e3d750e14f132abd7ffa0 | |
parent | 4518e6a0c038b98be4c480e6f4481e8676bd15dd (diff) |
percpu: kill lpage first chunk allocator
With x86 converted to embedding allocator, lpage doesn't have any user
left. Kill it along with cpa handling code.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Beulich <JBeulich@novell.com>
-rw-r--r-- | Documentation/kernel-parameters.txt | 10 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 20 | ||||
-rw-r--r-- | include/linux/percpu.h | 16 | ||||
-rw-r--r-- | mm/percpu.c | 241 |
4 files changed, 6 insertions, 281 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index dee9ce2e6cfa..e710093e3d32 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1920,11 +1920,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1920 | See arch/parisc/kernel/pdc_chassis.c | 1920 | See arch/parisc/kernel/pdc_chassis.c |
1921 | 1921 | ||
1922 | percpu_alloc= Select which percpu first chunk allocator to use. | 1922 | percpu_alloc= Select which percpu first chunk allocator to use. |
1923 | Currently supported values are "embed", "page" and | 1923 | Currently supported values are "embed" and "page". |
1924 | "lpage". Archs may support subset or none of the | 1924 | Archs may support subset or none of the selections. |
1925 | selections. See comments in mm/percpu.c for details | 1925 | See comments in mm/percpu.c for details on each |
1926 | on each allocator. This parameter is primarily for | 1926 | allocator. This parameter is primarily for debugging |
1927 | debugging and performance comparison. | 1927 | and performance comparison. |
1928 | 1928 | ||
1929 | pf. [PARIDE] | 1929 | pf. [PARIDE] |
1930 | See Documentation/blockdev/paride.txt. | 1930 | See Documentation/blockdev/paride.txt. |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dce282f65700..f53cfc7f963d 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -687,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
687 | { | 687 | { |
688 | struct cpa_data alias_cpa; | 688 | struct cpa_data alias_cpa; |
689 | unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); | 689 | unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); |
690 | unsigned long vaddr, remapped; | 690 | unsigned long vaddr; |
691 | int ret; | 691 | int ret; |
692 | 692 | ||
693 | if (cpa->pfn >= max_pfn_mapped) | 693 | if (cpa->pfn >= max_pfn_mapped) |
@@ -745,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
745 | } | 745 | } |
746 | #endif | 746 | #endif |
747 | 747 | ||
748 | /* | ||
749 | * If the PMD page was partially used for per-cpu remapping, | ||
750 | * the recycled area needs to be split and modified. Because | ||
751 | * the area is always proper subset of a PMD page | ||
752 | * cpa->numpages is guaranteed to be 1 for these areas, so | ||
753 | * there's no need to loop over and check for further remaps. | ||
754 | */ | ||
755 | remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); | ||
756 | if (remapped) { | ||
757 | WARN_ON(cpa->numpages > 1); | ||
758 | alias_cpa = *cpa; | ||
759 | alias_cpa.vaddr = &remapped; | ||
760 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | ||
761 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | ||
762 | if (ret) | ||
763 | return ret; | ||
764 | } | ||
765 | |||
766 | return 0; | 748 | return 0; |
767 | } | 749 | } |
768 | 750 | ||
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 25359932740e..878836ca999c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
@@ -82,7 +82,6 @@ enum pcpu_fc { | |||
82 | PCPU_FC_AUTO, | 82 | PCPU_FC_AUTO, |
83 | PCPU_FC_EMBED, | 83 | PCPU_FC_EMBED, |
84 | PCPU_FC_PAGE, | 84 | PCPU_FC_PAGE, |
85 | PCPU_FC_LPAGE, | ||
86 | 85 | ||
87 | PCPU_FC_NR, | 86 | PCPU_FC_NR, |
88 | }; | 87 | }; |
@@ -95,7 +94,6 @@ typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, | |||
95 | typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); | 94 | typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); |
96 | typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); | 95 | typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); |
97 | typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); | 96 | typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); |
98 | typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); | ||
99 | 97 | ||
100 | extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, | 98 | extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, |
101 | int nr_units); | 99 | int nr_units); |
@@ -124,20 +122,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, | |||
124 | pcpu_fc_populate_pte_fn_t populate_pte_fn); | 122 | pcpu_fc_populate_pte_fn_t populate_pte_fn); |
125 | #endif | 123 | #endif |
126 | 124 | ||
127 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | ||
128 | extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, | ||
129 | pcpu_fc_alloc_fn_t alloc_fn, | ||
130 | pcpu_fc_free_fn_t free_fn, | ||
131 | pcpu_fc_map_fn_t map_fn); | ||
132 | |||
133 | extern void *pcpu_lpage_remapped(void *kaddr); | ||
134 | #else | ||
135 | static inline void *pcpu_lpage_remapped(void *kaddr) | ||
136 | { | ||
137 | return NULL; | ||
138 | } | ||
139 | #endif | ||
140 | |||
141 | /* | 125 | /* |
142 | * Use this to get to a cpu's version of the per-cpu object | 126 | * Use this to get to a cpu's version of the per-cpu object |
143 | * dynamically allocated. Non-atomic access to the current CPU's | 127 | * dynamically allocated. Non-atomic access to the current CPU's |
diff --git a/mm/percpu.c b/mm/percpu.c index c2826d05505c..77933928107d 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -1713,7 +1713,6 @@ const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { | |||
1713 | [PCPU_FC_AUTO] = "auto", | 1713 | [PCPU_FC_AUTO] = "auto", |
1714 | [PCPU_FC_EMBED] = "embed", | 1714 | [PCPU_FC_EMBED] = "embed", |
1715 | [PCPU_FC_PAGE] = "page", | 1715 | [PCPU_FC_PAGE] = "page", |
1716 | [PCPU_FC_LPAGE] = "lpage", | ||
1717 | }; | 1716 | }; |
1718 | 1717 | ||
1719 | enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO; | 1718 | enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO; |
@@ -1730,10 +1729,6 @@ static int __init percpu_alloc_setup(char *str) | |||
1730 | else if (!strcmp(str, "page")) | 1729 | else if (!strcmp(str, "page")) |
1731 | pcpu_chosen_fc = PCPU_FC_PAGE; | 1730 | pcpu_chosen_fc = PCPU_FC_PAGE; |
1732 | #endif | 1731 | #endif |
1733 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | ||
1734 | else if (!strcmp(str, "lpage")) | ||
1735 | pcpu_chosen_fc = PCPU_FC_LPAGE; | ||
1736 | #endif | ||
1737 | else | 1732 | else |
1738 | pr_warning("PERCPU: unknown allocator %s specified\n", str); | 1733 | pr_warning("PERCPU: unknown allocator %s specified\n", str); |
1739 | 1734 | ||
@@ -1970,242 +1965,6 @@ out_free_ar: | |||
1970 | } | 1965 | } |
1971 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ | 1966 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ |
1972 | 1967 | ||
1973 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | ||
1974 | struct pcpul_ent { | ||
1975 | void *ptr; | ||
1976 | void *map_addr; | ||
1977 | }; | ||
1978 | |||
1979 | static size_t pcpul_size; | ||
1980 | static size_t pcpul_lpage_size; | ||
1981 | static int pcpul_nr_lpages; | ||
1982 | static struct pcpul_ent *pcpul_map; | ||
1983 | |||
1984 | static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai, | ||
1985 | unsigned int *cpup) | ||
1986 | { | ||
1987 | int group, cunit; | ||
1988 | |||
1989 | for (group = 0, cunit = 0; group < ai->nr_groups; group++) { | ||
1990 | const struct pcpu_group_info *gi = &ai->groups[group]; | ||
1991 | |||
1992 | if (unit < cunit + gi->nr_units) { | ||
1993 | if (cpup) | ||
1994 | *cpup = gi->cpu_map[unit - cunit]; | ||
1995 | return true; | ||
1996 | } | ||
1997 | cunit += gi->nr_units; | ||
1998 | } | ||
1999 | |||
2000 | return false; | ||
2001 | } | ||
2002 | |||
2003 | static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai) | ||
2004 | { | ||
2005 | int group, unit, i; | ||
2006 | |||
2007 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { | ||
2008 | const struct pcpu_group_info *gi = &ai->groups[group]; | ||
2009 | |||
2010 | for (i = 0; i < gi->nr_units; i++) | ||
2011 | if (gi->cpu_map[i] == cpu) | ||
2012 | return unit + i; | ||
2013 | } | ||
2014 | BUG(); | ||
2015 | } | ||
2016 | |||
2017 | /** | ||
2018 | * pcpu_lpage_first_chunk - remap the first percpu chunk using large page | ||
2019 | * @ai: pcpu_alloc_info | ||
2020 | * @alloc_fn: function to allocate percpu lpage, always called with lpage_size | ||
2021 | * @free_fn: function to free percpu memory, @size <= lpage_size | ||
2022 | * @map_fn: function to map percpu lpage, always called with lpage_size | ||
2023 | * | ||
2024 | * This allocator uses large page to build and map the first chunk. | ||
2025 | * Unlike other helpers, the caller should provide fully initialized | ||
2026 | * @ai. This can be done using pcpu_build_alloc_info(). This two | ||
2027 | * stage initialization is to allow arch code to evaluate the | ||
2028 | * parameters before committing to it. | ||
2029 | * | ||
2030 | * Large pages are allocated as directed by @unit_map and other | ||
2031 | * parameters and mapped to vmalloc space. Unused holes are returned | ||
2032 | * to the page allocator. Note that these holes end up being actively | ||
2033 | * mapped twice - once to the physical mapping and to the vmalloc area | ||
2034 | * for the first percpu chunk. Depending on architecture, this might | ||
2035 | * cause problem when changing page attributes of the returned area. | ||
2036 | * These double mapped areas can be detected using | ||
2037 | * pcpu_lpage_remapped(). | ||
2038 | * | ||
2039 | * RETURNS: | ||
2040 | * 0 on success, -errno on failure. | ||
2041 | */ | ||
2042 | int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, | ||
2043 | pcpu_fc_alloc_fn_t alloc_fn, | ||
2044 | pcpu_fc_free_fn_t free_fn, | ||
2045 | pcpu_fc_map_fn_t map_fn) | ||
2046 | { | ||
2047 | static struct vm_struct vm; | ||
2048 | const size_t lpage_size = ai->atom_size; | ||
2049 | size_t chunk_size, map_size; | ||
2050 | unsigned int cpu; | ||
2051 | int i, j, unit, nr_units, rc; | ||
2052 | |||
2053 | nr_units = 0; | ||
2054 | for (i = 0; i < ai->nr_groups; i++) | ||
2055 | nr_units += ai->groups[i].nr_units; | ||
2056 | |||
2057 | chunk_size = ai->unit_size * nr_units; | ||
2058 | BUG_ON(chunk_size % lpage_size); | ||
2059 | |||
2060 | pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size; | ||
2061 | pcpul_lpage_size = lpage_size; | ||
2062 | pcpul_nr_lpages = chunk_size / lpage_size; | ||
2063 | |||
2064 | /* allocate pointer array and alloc large pages */ | ||
2065 | map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]); | ||
2066 | pcpul_map = alloc_bootmem(map_size); | ||
2067 | |||
2068 | /* allocate all pages */ | ||
2069 | for (i = 0; i < pcpul_nr_lpages; i++) { | ||
2070 | size_t offset = i * lpage_size; | ||
2071 | int first_unit = offset / ai->unit_size; | ||
2072 | int last_unit = (offset + lpage_size - 1) / ai->unit_size; | ||
2073 | void *ptr; | ||
2074 | |||
2075 | /* find out which cpu is mapped to this unit */ | ||
2076 | for (unit = first_unit; unit <= last_unit; unit++) | ||
2077 | if (pcpul_unit_to_cpu(unit, ai, &cpu)) | ||
2078 | goto found; | ||
2079 | continue; | ||
2080 | found: | ||
2081 | ptr = alloc_fn(cpu, lpage_size, lpage_size); | ||
2082 | if (!ptr) { | ||
2083 | pr_warning("PERCPU: failed to allocate large page " | ||
2084 | "for cpu%u\n", cpu); | ||
2085 | goto enomem; | ||
2086 | } | ||
2087 | |||
2088 | pcpul_map[i].ptr = ptr; | ||
2089 | } | ||
2090 | |||
2091 | /* return unused holes */ | ||
2092 | for (unit = 0; unit < nr_units; unit++) { | ||
2093 | size_t start = unit * ai->unit_size; | ||
2094 | size_t end = start + ai->unit_size; | ||
2095 | size_t off, next; | ||
2096 | |||
2097 | /* don't free used part of occupied unit */ | ||
2098 | if (pcpul_unit_to_cpu(unit, ai, NULL)) | ||
2099 | start += pcpul_size; | ||
2100 | |||
2101 | /* unit can span more than one page, punch the holes */ | ||
2102 | for (off = start; off < end; off = next) { | ||
2103 | void *ptr = pcpul_map[off / lpage_size].ptr; | ||
2104 | next = min(roundup(off + 1, lpage_size), end); | ||
2105 | if (ptr) | ||
2106 | free_fn(ptr + off % lpage_size, next - off); | ||
2107 | } | ||
2108 | } | ||
2109 | |||
2110 | /* allocate address, map and copy */ | ||
2111 | vm.flags = VM_ALLOC; | ||
2112 | vm.size = chunk_size; | ||
2113 | vm_area_register_early(&vm, ai->unit_size); | ||
2114 | |||
2115 | for (i = 0; i < pcpul_nr_lpages; i++) { | ||
2116 | if (!pcpul_map[i].ptr) | ||
2117 | continue; | ||
2118 | pcpul_map[i].map_addr = vm.addr + i * lpage_size; | ||
2119 | map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr); | ||
2120 | } | ||
2121 | |||
2122 | for_each_possible_cpu(cpu) | ||
2123 | memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size, | ||
2124 | __per_cpu_load, ai->static_size); | ||
2125 | |||
2126 | /* we're ready, commit */ | ||
2127 | pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n", | ||
2128 | vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size, | ||
2129 | ai->unit_size); | ||
2130 | |||
2131 | rc = pcpu_setup_first_chunk(ai, vm.addr); | ||
2132 | |||
2133 | /* | ||
2134 | * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped | ||
2135 | * lpages are pushed to the end and trimmed. | ||
2136 | */ | ||
2137 | for (i = 0; i < pcpul_nr_lpages - 1; i++) | ||
2138 | for (j = i + 1; j < pcpul_nr_lpages; j++) { | ||
2139 | struct pcpul_ent tmp; | ||
2140 | |||
2141 | if (!pcpul_map[j].ptr) | ||
2142 | continue; | ||
2143 | if (pcpul_map[i].ptr && | ||
2144 | pcpul_map[i].ptr < pcpul_map[j].ptr) | ||
2145 | continue; | ||
2146 | |||
2147 | tmp = pcpul_map[i]; | ||
2148 | pcpul_map[i] = pcpul_map[j]; | ||
2149 | pcpul_map[j] = tmp; | ||
2150 | } | ||
2151 | |||
2152 | while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr) | ||
2153 | pcpul_nr_lpages--; | ||
2154 | |||
2155 | return rc; | ||
2156 | |||
2157 | enomem: | ||
2158 | for (i = 0; i < pcpul_nr_lpages; i++) | ||
2159 | if (pcpul_map[i].ptr) | ||
2160 | free_fn(pcpul_map[i].ptr, lpage_size); | ||
2161 | free_bootmem(__pa(pcpul_map), map_size); | ||
2162 | return -ENOMEM; | ||
2163 | } | ||
2164 | |||
2165 | /** | ||
2166 | * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area | ||
2167 | * @kaddr: the kernel address in question | ||
2168 | * | ||
2169 | * Determine whether @kaddr falls in the pcpul recycled area. This is | ||
2170 | * used by pageattr to detect VM aliases and break up the pcpu large | ||
2171 | * page mapping such that the same physical page is not mapped under | ||
2172 | * different attributes. | ||
2173 | * | ||
2174 | * The recycled area is always at the tail of a partially used large | ||
2175 | * page. | ||
2176 | * | ||
2177 | * RETURNS: | ||
2178 | * Address of corresponding remapped pcpu address if match is found; | ||
2179 | * otherwise, NULL. | ||
2180 | */ | ||
2181 | void *pcpu_lpage_remapped(void *kaddr) | ||
2182 | { | ||
2183 | unsigned long lpage_mask = pcpul_lpage_size - 1; | ||
2184 | void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask); | ||
2185 | unsigned long offset = (unsigned long)kaddr & lpage_mask; | ||
2186 | int left = 0, right = pcpul_nr_lpages - 1; | ||
2187 | int pos; | ||
2188 | |||
2189 | /* pcpul in use at all? */ | ||
2190 | if (!pcpul_map) | ||
2191 | return NULL; | ||
2192 | |||
2193 | /* okay, perform binary search */ | ||
2194 | while (left <= right) { | ||
2195 | pos = (left + right) / 2; | ||
2196 | |||
2197 | if (pcpul_map[pos].ptr < lpage_addr) | ||
2198 | left = pos + 1; | ||
2199 | else if (pcpul_map[pos].ptr > lpage_addr) | ||
2200 | right = pos - 1; | ||
2201 | else | ||
2202 | return pcpul_map[pos].map_addr + offset; | ||
2203 | } | ||
2204 | |||
2205 | return NULL; | ||
2206 | } | ||
2207 | #endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */ | ||
2208 | |||
2209 | /* | 1968 | /* |
2210 | * Generic percpu area setup. | 1969 | * Generic percpu area setup. |
2211 | * | 1970 | * |