diff options
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 86 |
1 files changed, 85 insertions, 1 deletions
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 4a17c96f4f6c..fd4c399675df 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -43,6 +43,35 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { | |||
43 | EXPORT_SYMBOL(__per_cpu_offset); | 43 | EXPORT_SYMBOL(__per_cpu_offset); |
44 | 44 | ||
45 | /** | 45 | /** |
46 | * pcpu_need_numa - determine percpu allocation needs to consider NUMA | ||
47 | * | ||
48 | * If NUMA is not configured or there is only one NUMA node available, | ||
49 | * there is no reason to consider NUMA. This function determines | ||
50 | * whether percpu allocation should consider NUMA or not. | ||
51 | * | ||
52 | * RETURNS: | ||
53 | * true if NUMA should be considered; otherwise, false. | ||
54 | */ | ||
55 | static bool __init pcpu_need_numa(void) | ||
56 | { | ||
57 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
58 | pg_data_t *last = NULL; | ||
59 | unsigned int cpu; | ||
60 | |||
61 | for_each_possible_cpu(cpu) { | ||
62 | int node = early_cpu_to_node(cpu); | ||
63 | |||
64 | if (node_online(node) && NODE_DATA(node) && | ||
65 | last && last != NODE_DATA(node)) | ||
66 | return true; | ||
67 | |||
68 | last = NODE_DATA(node); | ||
69 | } | ||
70 | #endif | ||
71 | return false; | ||
72 | } | ||
73 | |||
74 | /** | ||
46 | * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu | 75 | * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu |
47 | * @cpu: cpu to allocate for | 76 | * @cpu: cpu to allocate for |
48 | * @size: size allocation in bytes | 77 | * @size: size allocation in bytes |
@@ -82,6 +111,59 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
82 | } | 111 | } |
83 | 112 | ||
84 | /* | 113 | /* |
114 | * Embedding allocator | ||
115 | * | ||
116 | * The first chunk is sized to just contain the static area plus | ||
117 | * PERCPU_DYNAMIC_RESERVE and allocated as a contiguous area using | ||
118 | * bootmem allocator and used as-is without being mapped into vmalloc | ||
119 | * area. This enables the first chunk to piggy back on the linear | ||
120 | * physical PMD mapping and doesn't add any additional pressure to | ||
121 | * TLB. | ||
122 | */ | ||
123 | static void *pcpue_ptr __initdata; | ||
124 | static size_t pcpue_unit_size __initdata; | ||
125 | |||
126 | static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) | ||
127 | { | ||
128 | return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size | ||
129 | + ((size_t)pageno << PAGE_SHIFT)); | ||
130 | } | ||
131 | |||
132 | static ssize_t __init setup_pcpu_embed(size_t static_size) | ||
133 | { | ||
134 | unsigned int cpu; | ||
135 | |||
136 | /* | ||
137 | * If large page isn't supported, there's no benefit in doing | ||
138 | * this. Also, embedding allocation doesn't play well with | ||
139 | * NUMA. | ||
140 | */ | ||
141 | if (!cpu_has_pse || pcpu_need_numa()) | ||
142 | return -EINVAL; | ||
143 | |||
144 | /* allocate and copy */ | ||
145 | pcpue_unit_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); | ||
146 | pcpue_unit_size = max(pcpue_unit_size, PCPU_MIN_UNIT_SIZE); | ||
147 | pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, | ||
148 | PAGE_SIZE); | ||
149 | if (!pcpue_ptr) | ||
150 | return -ENOMEM; | ||
151 | |||
152 | for_each_possible_cpu(cpu) | ||
153 | memcpy(pcpue_ptr + cpu * pcpue_unit_size, __per_cpu_load, | ||
154 | static_size); | ||
155 | |||
156 | /* we're ready, commit */ | ||
157 | pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", | ||
158 | pcpue_unit_size >> PAGE_SHIFT, pcpue_ptr, static_size); | ||
159 | |||
160 | return pcpu_setup_first_chunk(pcpue_get_page, static_size, | ||
161 | pcpue_unit_size, | ||
162 | pcpue_unit_size - static_size, pcpue_ptr, | ||
163 | NULL); | ||
164 | } | ||
165 | |||
166 | /* | ||
85 | * 4k page allocator | 167 | * 4k page allocator |
86 | * | 168 | * |
87 | * This is the basic allocator. Static percpu area is allocated | 169 | * This is the basic allocator. Static percpu area is allocated |
@@ -178,7 +260,9 @@ void __init setup_per_cpu_areas(void) | |||
178 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); | 260 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); |
179 | 261 | ||
180 | /* allocate percpu area */ | 262 | /* allocate percpu area */ |
181 | ret = setup_pcpu_4k(static_size); | 263 | ret = setup_pcpu_embed(static_size); |
264 | if (ret < 0) | ||
265 | ret = setup_pcpu_4k(static_size); | ||
182 | if (ret < 0) | 266 | if (ret < 0) |
183 | panic("cannot allocate static percpu area (%zu bytes, err=%zd)", | 267 | panic("cannot allocate static percpu area (%zu bytes, err=%zd)", |
184 | static_size, ret); | 268 | static_size, ret); |