diff options
author | Yinghai Lu <yinghai@kernel.org> | 2009-03-11 23:07:39 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-03-12 21:52:19 -0400 |
commit | 0d890355bff25e1dc03a577a90ed80741489ca54 (patch) | |
tree | 0cb740303fd6d94108e2e18086f77716fec32586 /arch/x86/kernel/cpu/mtrr/cleanup.c | |
parent | c1ab7e93c6ddf8a068719b97b7e26c3d8eba7c32 (diff) |
x86: separate mtrr cleanup/mtrr_e820 trim to separate file
Impact: cleanup
mtrr main.c is too big, seperate mtrr cleanup and mtrr e820 trim
code to another file.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <49B87C7B.80809@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/mtrr/cleanup.c')
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/cleanup.c | 1089 |
1 files changed, 1089 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c new file mode 100644 index 000000000000..58b58bbf7eb3 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -0,0 +1,1089 @@ | |||
1 | /* MTRR (Memory Type Range Register) cleanup | ||
2 | |||
3 | Copyright (C) 2009 Yinghai Lu | ||
4 | |||
5 | This library is free software; you can redistribute it and/or | ||
6 | modify it under the terms of the GNU Library General Public | ||
7 | License as published by the Free Software Foundation; either | ||
8 | version 2 of the License, or (at your option) any later version. | ||
9 | |||
10 | This library is distributed in the hope that it will be useful, | ||
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Library General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Library General Public | ||
16 | License along with this library; if not, write to the Free | ||
17 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
18 | */ | ||
19 | |||
20 | #include <linux/module.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/pci.h> | ||
23 | #include <linux/smp.h> | ||
24 | #include <linux/cpu.h> | ||
25 | #include <linux/mutex.h> | ||
26 | #include <linux/sort.h> | ||
27 | |||
28 | #include <asm/e820.h> | ||
29 | #include <asm/mtrr.h> | ||
30 | #include <asm/uaccess.h> | ||
31 | #include <asm/processor.h> | ||
32 | #include <asm/msr.h> | ||
33 | #include <asm/kvm_para.h> | ||
34 | #include "mtrr.h" | ||
35 | |||
36 | /* should be related to MTRR_VAR_RANGES nums */ | ||
37 | #define RANGE_NUM 256 | ||
38 | |||
39 | struct res_range { | ||
40 | unsigned long start; | ||
41 | unsigned long end; | ||
42 | }; | ||
43 | |||
44 | static int __init | ||
45 | add_range(struct res_range *range, int nr_range, unsigned long start, | ||
46 | unsigned long end) | ||
47 | { | ||
48 | /* out of slots */ | ||
49 | if (nr_range >= RANGE_NUM) | ||
50 | return nr_range; | ||
51 | |||
52 | range[nr_range].start = start; | ||
53 | range[nr_range].end = end; | ||
54 | |||
55 | nr_range++; | ||
56 | |||
57 | return nr_range; | ||
58 | } | ||
59 | |||
60 | static int __init | ||
61 | add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, | ||
62 | unsigned long end) | ||
63 | { | ||
64 | int i; | ||
65 | |||
66 | /* try to merge it with old one */ | ||
67 | for (i = 0; i < nr_range; i++) { | ||
68 | unsigned long final_start, final_end; | ||
69 | unsigned long common_start, common_end; | ||
70 | |||
71 | if (!range[i].end) | ||
72 | continue; | ||
73 | |||
74 | common_start = max(range[i].start, start); | ||
75 | common_end = min(range[i].end, end); | ||
76 | if (common_start > common_end + 1) | ||
77 | continue; | ||
78 | |||
79 | final_start = min(range[i].start, start); | ||
80 | final_end = max(range[i].end, end); | ||
81 | |||
82 | range[i].start = final_start; | ||
83 | range[i].end = final_end; | ||
84 | return nr_range; | ||
85 | } | ||
86 | |||
87 | /* need to add that */ | ||
88 | return add_range(range, nr_range, start, end); | ||
89 | } | ||
90 | |||
91 | static void __init | ||
92 | subtract_range(struct res_range *range, unsigned long start, unsigned long end) | ||
93 | { | ||
94 | int i, j; | ||
95 | |||
96 | for (j = 0; j < RANGE_NUM; j++) { | ||
97 | if (!range[j].end) | ||
98 | continue; | ||
99 | |||
100 | if (start <= range[j].start && end >= range[j].end) { | ||
101 | range[j].start = 0; | ||
102 | range[j].end = 0; | ||
103 | continue; | ||
104 | } | ||
105 | |||
106 | if (start <= range[j].start && end < range[j].end && | ||
107 | range[j].start < end + 1) { | ||
108 | range[j].start = end + 1; | ||
109 | continue; | ||
110 | } | ||
111 | |||
112 | |||
113 | if (start > range[j].start && end >= range[j].end && | ||
114 | range[j].end > start - 1) { | ||
115 | range[j].end = start - 1; | ||
116 | continue; | ||
117 | } | ||
118 | |||
119 | if (start > range[j].start && end < range[j].end) { | ||
120 | /* find the new spare */ | ||
121 | for (i = 0; i < RANGE_NUM; i++) { | ||
122 | if (range[i].end == 0) | ||
123 | break; | ||
124 | } | ||
125 | if (i < RANGE_NUM) { | ||
126 | range[i].end = range[j].end; | ||
127 | range[i].start = end + 1; | ||
128 | } else { | ||
129 | printk(KERN_ERR "run of slot in ranges\n"); | ||
130 | } | ||
131 | range[j].end = start - 1; | ||
132 | continue; | ||
133 | } | ||
134 | } | ||
135 | } | ||
136 | |||
137 | static int __init cmp_range(const void *x1, const void *x2) | ||
138 | { | ||
139 | const struct res_range *r1 = x1; | ||
140 | const struct res_range *r2 = x2; | ||
141 | long start1, start2; | ||
142 | |||
143 | start1 = r1->start; | ||
144 | start2 = r2->start; | ||
145 | |||
146 | return start1 - start2; | ||
147 | } | ||
148 | |||
149 | struct var_mtrr_range_state { | ||
150 | unsigned long base_pfn; | ||
151 | unsigned long size_pfn; | ||
152 | mtrr_type type; | ||
153 | }; | ||
154 | |||
155 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | ||
156 | static int __initdata debug_print; | ||
157 | |||
158 | static int __init | ||
159 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | ||
160 | unsigned long extra_remove_base, | ||
161 | unsigned long extra_remove_size) | ||
162 | { | ||
163 | unsigned long i, base, size; | ||
164 | mtrr_type type; | ||
165 | |||
166 | for (i = 0; i < num_var_ranges; i++) { | ||
167 | type = range_state[i].type; | ||
168 | if (type != MTRR_TYPE_WRBACK) | ||
169 | continue; | ||
170 | base = range_state[i].base_pfn; | ||
171 | size = range_state[i].size_pfn; | ||
172 | nr_range = add_range_with_merge(range, nr_range, base, | ||
173 | base + size - 1); | ||
174 | } | ||
175 | if (debug_print) { | ||
176 | printk(KERN_DEBUG "After WB checking\n"); | ||
177 | for (i = 0; i < nr_range; i++) | ||
178 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | ||
179 | range[i].start, range[i].end + 1); | ||
180 | } | ||
181 | |||
182 | /* take out UC ranges */ | ||
183 | for (i = 0; i < num_var_ranges; i++) { | ||
184 | type = range_state[i].type; | ||
185 | if (type != MTRR_TYPE_UNCACHABLE && | ||
186 | type != MTRR_TYPE_WRPROT) | ||
187 | continue; | ||
188 | size = range_state[i].size_pfn; | ||
189 | if (!size) | ||
190 | continue; | ||
191 | base = range_state[i].base_pfn; | ||
192 | subtract_range(range, base, base + size - 1); | ||
193 | } | ||
194 | if (extra_remove_size) | ||
195 | subtract_range(range, extra_remove_base, | ||
196 | extra_remove_base + extra_remove_size - 1); | ||
197 | |||
198 | /* get new range num */ | ||
199 | nr_range = 0; | ||
200 | for (i = 0; i < RANGE_NUM; i++) { | ||
201 | if (!range[i].end) | ||
202 | continue; | ||
203 | nr_range++; | ||
204 | } | ||
205 | if (debug_print) { | ||
206 | printk(KERN_DEBUG "After UC checking\n"); | ||
207 | for (i = 0; i < nr_range; i++) | ||
208 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | ||
209 | range[i].start, range[i].end + 1); | ||
210 | } | ||
211 | |||
212 | /* sort the ranges */ | ||
213 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
214 | if (debug_print) { | ||
215 | printk(KERN_DEBUG "After sorting\n"); | ||
216 | for (i = 0; i < nr_range; i++) | ||
217 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | ||
218 | range[i].start, range[i].end + 1); | ||
219 | } | ||
220 | |||
221 | /* clear those is not used */ | ||
222 | for (i = nr_range; i < RANGE_NUM; i++) | ||
223 | memset(&range[i], 0, sizeof(range[i])); | ||
224 | |||
225 | return nr_range; | ||
226 | } | ||
227 | |||
228 | static struct res_range __initdata range[RANGE_NUM]; | ||
229 | static int __initdata nr_range; | ||
230 | |||
231 | #ifdef CONFIG_MTRR_SANITIZER | ||
232 | |||
233 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | ||
234 | { | ||
235 | unsigned long sum; | ||
236 | int i; | ||
237 | |||
238 | sum = 0; | ||
239 | for (i = 0; i < nr_range; i++) | ||
240 | sum += range[i].end + 1 - range[i].start; | ||
241 | |||
242 | return sum; | ||
243 | } | ||
244 | |||
245 | static int enable_mtrr_cleanup __initdata = | ||
246 | CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; | ||
247 | |||
248 | static int __init disable_mtrr_cleanup_setup(char *str) | ||
249 | { | ||
250 | enable_mtrr_cleanup = 0; | ||
251 | return 0; | ||
252 | } | ||
253 | early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); | ||
254 | |||
255 | static int __init enable_mtrr_cleanup_setup(char *str) | ||
256 | { | ||
257 | enable_mtrr_cleanup = 1; | ||
258 | return 0; | ||
259 | } | ||
260 | early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); | ||
261 | |||
262 | static int __init mtrr_cleanup_debug_setup(char *str) | ||
263 | { | ||
264 | debug_print = 1; | ||
265 | return 0; | ||
266 | } | ||
267 | early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); | ||
268 | |||
269 | struct var_mtrr_state { | ||
270 | unsigned long range_startk; | ||
271 | unsigned long range_sizek; | ||
272 | unsigned long chunk_sizek; | ||
273 | unsigned long gran_sizek; | ||
274 | unsigned int reg; | ||
275 | }; | ||
276 | |||
277 | static void __init | ||
278 | set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | ||
279 | unsigned char type, unsigned int address_bits) | ||
280 | { | ||
281 | u32 base_lo, base_hi, mask_lo, mask_hi; | ||
282 | u64 base, mask; | ||
283 | |||
284 | if (!sizek) { | ||
285 | fill_mtrr_var_range(reg, 0, 0, 0, 0); | ||
286 | return; | ||
287 | } | ||
288 | |||
289 | mask = (1ULL << address_bits) - 1; | ||
290 | mask &= ~((((u64)sizek) << 10) - 1); | ||
291 | |||
292 | base = ((u64)basek) << 10; | ||
293 | |||
294 | base |= type; | ||
295 | mask |= 0x800; | ||
296 | |||
297 | base_lo = base & ((1ULL<<32) - 1); | ||
298 | base_hi = base >> 32; | ||
299 | |||
300 | mask_lo = mask & ((1ULL<<32) - 1); | ||
301 | mask_hi = mask >> 32; | ||
302 | |||
303 | fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); | ||
304 | } | ||
305 | |||
306 | static void __init | ||
307 | save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | ||
308 | unsigned char type) | ||
309 | { | ||
310 | range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); | ||
311 | range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); | ||
312 | range_state[reg].type = type; | ||
313 | } | ||
314 | |||
315 | static void __init | ||
316 | set_var_mtrr_all(unsigned int address_bits) | ||
317 | { | ||
318 | unsigned long basek, sizek; | ||
319 | unsigned char type; | ||
320 | unsigned int reg; | ||
321 | |||
322 | for (reg = 0; reg < num_var_ranges; reg++) { | ||
323 | basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); | ||
324 | sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); | ||
325 | type = range_state[reg].type; | ||
326 | |||
327 | set_var_mtrr(reg, basek, sizek, type, address_bits); | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static unsigned long to_size_factor(unsigned long sizek, char *factorp) | ||
332 | { | ||
333 | char factor; | ||
334 | unsigned long base = sizek; | ||
335 | |||
336 | if (base & ((1<<10) - 1)) { | ||
337 | /* not MB alignment */ | ||
338 | factor = 'K'; | ||
339 | } else if (base & ((1<<20) - 1)) { | ||
340 | factor = 'M'; | ||
341 | base >>= 10; | ||
342 | } else { | ||
343 | factor = 'G'; | ||
344 | base >>= 20; | ||
345 | } | ||
346 | |||
347 | *factorp = factor; | ||
348 | |||
349 | return base; | ||
350 | } | ||
351 | |||
352 | static unsigned int __init | ||
353 | range_to_mtrr(unsigned int reg, unsigned long range_startk, | ||
354 | unsigned long range_sizek, unsigned char type) | ||
355 | { | ||
356 | if (!range_sizek || (reg >= num_var_ranges)) | ||
357 | return reg; | ||
358 | |||
359 | while (range_sizek) { | ||
360 | unsigned long max_align, align; | ||
361 | unsigned long sizek; | ||
362 | |||
363 | /* Compute the maximum size I can make a range */ | ||
364 | if (range_startk) | ||
365 | max_align = ffs(range_startk) - 1; | ||
366 | else | ||
367 | max_align = 32; | ||
368 | align = fls(range_sizek) - 1; | ||
369 | if (align > max_align) | ||
370 | align = max_align; | ||
371 | |||
372 | sizek = 1 << align; | ||
373 | if (debug_print) { | ||
374 | char start_factor = 'K', size_factor = 'K'; | ||
375 | unsigned long start_base, size_base; | ||
376 | |||
377 | start_base = to_size_factor(range_startk, | ||
378 | &start_factor), | ||
379 | size_base = to_size_factor(sizek, &size_factor), | ||
380 | |||
381 | printk(KERN_DEBUG "Setting variable MTRR %d, " | ||
382 | "base: %ld%cB, range: %ld%cB, type %s\n", | ||
383 | reg, start_base, start_factor, | ||
384 | size_base, size_factor, | ||
385 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | ||
386 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") | ||
387 | ); | ||
388 | } | ||
389 | save_var_mtrr(reg++, range_startk, sizek, type); | ||
390 | range_startk += sizek; | ||
391 | range_sizek -= sizek; | ||
392 | if (reg >= num_var_ranges) | ||
393 | break; | ||
394 | } | ||
395 | return reg; | ||
396 | } | ||
397 | |||
398 | static unsigned __init | ||
399 | range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | ||
400 | unsigned long sizek) | ||
401 | { | ||
402 | unsigned long hole_basek, hole_sizek; | ||
403 | unsigned long second_basek, second_sizek; | ||
404 | unsigned long range0_basek, range0_sizek; | ||
405 | unsigned long range_basek, range_sizek; | ||
406 | unsigned long chunk_sizek; | ||
407 | unsigned long gran_sizek; | ||
408 | |||
409 | hole_basek = 0; | ||
410 | hole_sizek = 0; | ||
411 | second_basek = 0; | ||
412 | second_sizek = 0; | ||
413 | chunk_sizek = state->chunk_sizek; | ||
414 | gran_sizek = state->gran_sizek; | ||
415 | |||
416 | /* align with gran size, prevent small block used up MTRRs */ | ||
417 | range_basek = ALIGN(state->range_startk, gran_sizek); | ||
418 | if ((range_basek > basek) && basek) | ||
419 | return second_sizek; | ||
420 | state->range_sizek -= (range_basek - state->range_startk); | ||
421 | range_sizek = ALIGN(state->range_sizek, gran_sizek); | ||
422 | |||
423 | while (range_sizek > state->range_sizek) { | ||
424 | range_sizek -= gran_sizek; | ||
425 | if (!range_sizek) | ||
426 | return 0; | ||
427 | } | ||
428 | state->range_sizek = range_sizek; | ||
429 | |||
430 | /* try to append some small hole */ | ||
431 | range0_basek = state->range_startk; | ||
432 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); | ||
433 | |||
434 | /* no increase */ | ||
435 | if (range0_sizek == state->range_sizek) { | ||
436 | if (debug_print) | ||
437 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", | ||
438 | range0_basek<<10, | ||
439 | (range0_basek + state->range_sizek)<<10); | ||
440 | state->reg = range_to_mtrr(state->reg, range0_basek, | ||
441 | state->range_sizek, MTRR_TYPE_WRBACK); | ||
442 | return 0; | ||
443 | } | ||
444 | |||
445 | /* only cut back, when it is not the last */ | ||
446 | if (sizek) { | ||
447 | while (range0_basek + range0_sizek > (basek + sizek)) { | ||
448 | if (range0_sizek >= chunk_sizek) | ||
449 | range0_sizek -= chunk_sizek; | ||
450 | else | ||
451 | range0_sizek = 0; | ||
452 | |||
453 | if (!range0_sizek) | ||
454 | break; | ||
455 | } | ||
456 | } | ||
457 | |||
458 | second_try: | ||
459 | range_basek = range0_basek + range0_sizek; | ||
460 | |||
461 | /* one hole in the middle */ | ||
462 | if (range_basek > basek && range_basek <= (basek + sizek)) | ||
463 | second_sizek = range_basek - basek; | ||
464 | |||
465 | if (range0_sizek > state->range_sizek) { | ||
466 | |||
467 | /* one hole in middle or at end */ | ||
468 | hole_sizek = range0_sizek - state->range_sizek - second_sizek; | ||
469 | |||
470 | /* hole size should be less than half of range0 size */ | ||
471 | if (hole_sizek >= (range0_sizek >> 1) && | ||
472 | range0_sizek >= chunk_sizek) { | ||
473 | range0_sizek -= chunk_sizek; | ||
474 | second_sizek = 0; | ||
475 | hole_sizek = 0; | ||
476 | |||
477 | goto second_try; | ||
478 | } | ||
479 | } | ||
480 | |||
481 | if (range0_sizek) { | ||
482 | if (debug_print) | ||
483 | printk(KERN_DEBUG "range0: %016lx - %016lx\n", | ||
484 | range0_basek<<10, | ||
485 | (range0_basek + range0_sizek)<<10); | ||
486 | state->reg = range_to_mtrr(state->reg, range0_basek, | ||
487 | range0_sizek, MTRR_TYPE_WRBACK); | ||
488 | } | ||
489 | |||
490 | if (range0_sizek < state->range_sizek) { | ||
491 | /* need to handle left over */ | ||
492 | range_sizek = state->range_sizek - range0_sizek; | ||
493 | |||
494 | if (debug_print) | ||
495 | printk(KERN_DEBUG "range: %016lx - %016lx\n", | ||
496 | range_basek<<10, | ||
497 | (range_basek + range_sizek)<<10); | ||
498 | state->reg = range_to_mtrr(state->reg, range_basek, | ||
499 | range_sizek, MTRR_TYPE_WRBACK); | ||
500 | } | ||
501 | |||
502 | if (hole_sizek) { | ||
503 | hole_basek = range_basek - hole_sizek - second_sizek; | ||
504 | if (debug_print) | ||
505 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", | ||
506 | hole_basek<<10, | ||
507 | (hole_basek + hole_sizek)<<10); | ||
508 | state->reg = range_to_mtrr(state->reg, hole_basek, | ||
509 | hole_sizek, MTRR_TYPE_UNCACHABLE); | ||
510 | } | ||
511 | |||
512 | return second_sizek; | ||
513 | } | ||
514 | |||
515 | static void __init | ||
516 | set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, | ||
517 | unsigned long size_pfn) | ||
518 | { | ||
519 | unsigned long basek, sizek; | ||
520 | unsigned long second_sizek = 0; | ||
521 | |||
522 | if (state->reg >= num_var_ranges) | ||
523 | return; | ||
524 | |||
525 | basek = base_pfn << (PAGE_SHIFT - 10); | ||
526 | sizek = size_pfn << (PAGE_SHIFT - 10); | ||
527 | |||
528 | /* See if I can merge with the last range */ | ||
529 | if ((basek <= 1024) || | ||
530 | (state->range_startk + state->range_sizek == basek)) { | ||
531 | unsigned long endk = basek + sizek; | ||
532 | state->range_sizek = endk - state->range_startk; | ||
533 | return; | ||
534 | } | ||
535 | /* Write the range mtrrs */ | ||
536 | if (state->range_sizek != 0) | ||
537 | second_sizek = range_to_mtrr_with_hole(state, basek, sizek); | ||
538 | |||
539 | /* Allocate an msr */ | ||
540 | state->range_startk = basek + second_sizek; | ||
541 | state->range_sizek = sizek - second_sizek; | ||
542 | } | ||
543 | |||
544 | /* mininum size of mtrr block that can take hole */ | ||
545 | static u64 mtrr_chunk_size __initdata = (256ULL<<20); | ||
546 | |||
547 | static int __init parse_mtrr_chunk_size_opt(char *p) | ||
548 | { | ||
549 | if (!p) | ||
550 | return -EINVAL; | ||
551 | mtrr_chunk_size = memparse(p, &p); | ||
552 | return 0; | ||
553 | } | ||
554 | early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); | ||
555 | |||
556 | /* granity of mtrr of block */ | ||
557 | static u64 mtrr_gran_size __initdata; | ||
558 | |||
559 | static int __init parse_mtrr_gran_size_opt(char *p) | ||
560 | { | ||
561 | if (!p) | ||
562 | return -EINVAL; | ||
563 | mtrr_gran_size = memparse(p, &p); | ||
564 | return 0; | ||
565 | } | ||
566 | early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); | ||
567 | |||
568 | static int nr_mtrr_spare_reg __initdata = | ||
569 | CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; | ||
570 | |||
571 | static int __init parse_mtrr_spare_reg(char *arg) | ||
572 | { | ||
573 | if (arg) | ||
574 | nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); | ||
575 | return 0; | ||
576 | } | ||
577 | |||
578 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | ||
579 | |||
580 | static int __init | ||
581 | x86_setup_var_mtrrs(struct res_range *range, int nr_range, | ||
582 | u64 chunk_size, u64 gran_size) | ||
583 | { | ||
584 | struct var_mtrr_state var_state; | ||
585 | int i; | ||
586 | int num_reg; | ||
587 | |||
588 | var_state.range_startk = 0; | ||
589 | var_state.range_sizek = 0; | ||
590 | var_state.reg = 0; | ||
591 | var_state.chunk_sizek = chunk_size >> 10; | ||
592 | var_state.gran_sizek = gran_size >> 10; | ||
593 | |||
594 | memset(range_state, 0, sizeof(range_state)); | ||
595 | |||
596 | /* Write the range etc */ | ||
597 | for (i = 0; i < nr_range; i++) | ||
598 | set_var_mtrr_range(&var_state, range[i].start, | ||
599 | range[i].end - range[i].start + 1); | ||
600 | |||
601 | /* Write the last range */ | ||
602 | if (var_state.range_sizek != 0) | ||
603 | range_to_mtrr_with_hole(&var_state, 0, 0); | ||
604 | |||
605 | num_reg = var_state.reg; | ||
606 | /* Clear out the extra MTRR's */ | ||
607 | while (var_state.reg < num_var_ranges) { | ||
608 | save_var_mtrr(var_state.reg, 0, 0, 0); | ||
609 | var_state.reg++; | ||
610 | } | ||
611 | |||
612 | return num_reg; | ||
613 | } | ||
614 | |||
615 | struct mtrr_cleanup_result { | ||
616 | unsigned long gran_sizek; | ||
617 | unsigned long chunk_sizek; | ||
618 | unsigned long lose_cover_sizek; | ||
619 | unsigned int num_reg; | ||
620 | int bad; | ||
621 | }; | ||
622 | |||
623 | /* | ||
624 | * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G | ||
625 | * chunk size: gran_size, ..., 2G | ||
626 | * so we need (1+16)*8 | ||
627 | */ | ||
628 | #define NUM_RESULT 136 | ||
629 | #define PSHIFT (PAGE_SHIFT - 10) | ||
630 | |||
631 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; | ||
632 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | ||
633 | |||
634 | static void __init print_out_mtrr_range_state(void) | ||
635 | { | ||
636 | int i; | ||
637 | char start_factor = 'K', size_factor = 'K'; | ||
638 | unsigned long start_base, size_base; | ||
639 | mtrr_type type; | ||
640 | |||
641 | for (i = 0; i < num_var_ranges; i++) { | ||
642 | |||
643 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); | ||
644 | if (!size_base) | ||
645 | continue; | ||
646 | |||
647 | size_base = to_size_factor(size_base, &size_factor), | ||
648 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); | ||
649 | start_base = to_size_factor(start_base, &start_factor), | ||
650 | type = range_state[i].type; | ||
651 | |||
652 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", | ||
653 | i, start_base, start_factor, | ||
654 | size_base, size_factor, | ||
655 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | ||
656 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
657 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
658 | ); | ||
659 | } | ||
660 | } | ||
661 | |||
662 | static int __init mtrr_need_cleanup(void) | ||
663 | { | ||
664 | int i; | ||
665 | mtrr_type type; | ||
666 | unsigned long size; | ||
667 | /* extra one for all 0 */ | ||
668 | int num[MTRR_NUM_TYPES + 1]; | ||
669 | |||
670 | /* check entries number */ | ||
671 | memset(num, 0, sizeof(num)); | ||
672 | for (i = 0; i < num_var_ranges; i++) { | ||
673 | type = range_state[i].type; | ||
674 | size = range_state[i].size_pfn; | ||
675 | if (type >= MTRR_NUM_TYPES) | ||
676 | continue; | ||
677 | if (!size) | ||
678 | type = MTRR_NUM_TYPES; | ||
679 | if (type == MTRR_TYPE_WRPROT) | ||
680 | type = MTRR_TYPE_UNCACHABLE; | ||
681 | num[type]++; | ||
682 | } | ||
683 | |||
684 | /* check if we got UC entries */ | ||
685 | if (!num[MTRR_TYPE_UNCACHABLE]) | ||
686 | return 0; | ||
687 | |||
688 | /* check if we only had WB and UC */ | ||
689 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | ||
690 | num_var_ranges - num[MTRR_NUM_TYPES]) | ||
691 | return 0; | ||
692 | |||
693 | return 1; | ||
694 | } | ||
695 | |||
696 | static unsigned long __initdata range_sums; | ||
697 | static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, | ||
698 | unsigned long extra_remove_base, | ||
699 | unsigned long extra_remove_size, | ||
700 | int i) | ||
701 | { | ||
702 | int num_reg; | ||
703 | static struct res_range range_new[RANGE_NUM]; | ||
704 | static int nr_range_new; | ||
705 | unsigned long range_sums_new; | ||
706 | |||
707 | /* convert ranges to var ranges state */ | ||
708 | num_reg = x86_setup_var_mtrrs(range, nr_range, | ||
709 | chunk_size, gran_size); | ||
710 | |||
711 | /* we got new setting in range_state, check it */ | ||
712 | memset(range_new, 0, sizeof(range_new)); | ||
713 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
714 | extra_remove_base, extra_remove_size); | ||
715 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
716 | |||
717 | result[i].chunk_sizek = chunk_size >> 10; | ||
718 | result[i].gran_sizek = gran_size >> 10; | ||
719 | result[i].num_reg = num_reg; | ||
720 | if (range_sums < range_sums_new) { | ||
721 | result[i].lose_cover_sizek = | ||
722 | (range_sums_new - range_sums) << PSHIFT; | ||
723 | result[i].bad = 1; | ||
724 | } else | ||
725 | result[i].lose_cover_sizek = | ||
726 | (range_sums - range_sums_new) << PSHIFT; | ||
727 | |||
728 | /* double check it */ | ||
729 | if (!result[i].bad && !result[i].lose_cover_sizek) { | ||
730 | if (nr_range_new != nr_range || | ||
731 | memcmp(range, range_new, sizeof(range))) | ||
732 | result[i].bad = 1; | ||
733 | } | ||
734 | |||
735 | if (!result[i].bad && (range_sums - range_sums_new < | ||
736 | min_loss_pfn[num_reg])) { | ||
737 | min_loss_pfn[num_reg] = | ||
738 | range_sums - range_sums_new; | ||
739 | } | ||
740 | } | ||
741 | |||
742 | static void __init mtrr_print_out_one_result(int i) | ||
743 | { | ||
744 | char gran_factor, chunk_factor, lose_factor; | ||
745 | unsigned long gran_base, chunk_base, lose_base; | ||
746 | |||
747 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
748 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
749 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
750 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
751 | result[i].bad ? "*BAD*" : " ", | ||
752 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
753 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
754 | result[i].num_reg, result[i].bad ? "-" : "", | ||
755 | lose_base, lose_factor); | ||
756 | } | ||
757 | |||
758 | static int __init mtrr_search_optimal_index(void) | ||
759 | { | ||
760 | int i; | ||
761 | int num_reg_good; | ||
762 | int index_good; | ||
763 | |||
764 | if (nr_mtrr_spare_reg >= num_var_ranges) | ||
765 | nr_mtrr_spare_reg = num_var_ranges - 1; | ||
766 | num_reg_good = -1; | ||
767 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | ||
768 | if (!min_loss_pfn[i]) | ||
769 | num_reg_good = i; | ||
770 | } | ||
771 | |||
772 | index_good = -1; | ||
773 | if (num_reg_good != -1) { | ||
774 | for (i = 0; i < NUM_RESULT; i++) { | ||
775 | if (!result[i].bad && | ||
776 | result[i].num_reg == num_reg_good && | ||
777 | !result[i].lose_cover_sizek) { | ||
778 | index_good = i; | ||
779 | break; | ||
780 | } | ||
781 | } | ||
782 | } | ||
783 | |||
784 | return index_good; | ||
785 | } | ||
786 | |||
787 | |||
788 | int __init mtrr_cleanup(unsigned address_bits) | ||
789 | { | ||
790 | unsigned long extra_remove_base, extra_remove_size; | ||
791 | unsigned long base, size, def, dummy; | ||
792 | mtrr_type type; | ||
793 | u64 chunk_size, gran_size; | ||
794 | int index_good; | ||
795 | int i; | ||
796 | |||
797 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | ||
798 | return 0; | ||
799 | rdmsr(MTRRdefType_MSR, def, dummy); | ||
800 | def &= 0xff; | ||
801 | if (def != MTRR_TYPE_UNCACHABLE) | ||
802 | return 0; | ||
803 | |||
804 | /* get it and store it aside */ | ||
805 | memset(range_state, 0, sizeof(range_state)); | ||
806 | for (i = 0; i < num_var_ranges; i++) { | ||
807 | mtrr_if->get(i, &base, &size, &type); | ||
808 | range_state[i].base_pfn = base; | ||
809 | range_state[i].size_pfn = size; | ||
810 | range_state[i].type = type; | ||
811 | } | ||
812 | |||
813 | /* check if we need handle it and can handle it */ | ||
814 | if (!mtrr_need_cleanup()) | ||
815 | return 0; | ||
816 | |||
817 | /* print original var MTRRs at first, for debugging: */ | ||
818 | printk(KERN_DEBUG "original variable MTRRs\n"); | ||
819 | print_out_mtrr_range_state(); | ||
820 | |||
821 | memset(range, 0, sizeof(range)); | ||
822 | extra_remove_size = 0; | ||
823 | extra_remove_base = 1 << (32 - PAGE_SHIFT); | ||
824 | if (mtrr_tom2) | ||
825 | extra_remove_size = | ||
826 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; | ||
827 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, | ||
828 | extra_remove_size); | ||
829 | /* | ||
830 | * [0, 1M) should always be coverred by var mtrr with WB | ||
831 | * and fixed mtrrs should take effective before var mtrr for it | ||
832 | */ | ||
833 | nr_range = add_range_with_merge(range, nr_range, 0, | ||
834 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | ||
835 | /* sort the ranges */ | ||
836 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
837 | |||
838 | range_sums = sum_ranges(range, nr_range); | ||
839 | printk(KERN_INFO "total RAM coverred: %ldM\n", | ||
840 | range_sums >> (20 - PAGE_SHIFT)); | ||
841 | |||
842 | if (mtrr_chunk_size && mtrr_gran_size) { | ||
843 | i = 0; | ||
844 | mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, | ||
845 | extra_remove_base, extra_remove_size, i); | ||
846 | |||
847 | mtrr_print_out_one_result(i); | ||
848 | |||
849 | if (!result[i].bad) { | ||
850 | set_var_mtrr_all(address_bits); | ||
851 | printk(KERN_DEBUG "New variable MTRRs\n"); | ||
852 | print_out_mtrr_range_state(); | ||
853 | return 1; | ||
854 | } | ||
855 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " | ||
856 | "will find optimal one\n"); | ||
857 | } | ||
858 | |||
859 | i = 0; | ||
860 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); | ||
861 | memset(result, 0, sizeof(result)); | ||
862 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { | ||
863 | |||
864 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); | ||
865 | chunk_size <<= 1) { | ||
866 | |||
867 | if (i >= NUM_RESULT) | ||
868 | continue; | ||
869 | |||
870 | mtrr_calc_range_state(chunk_size, gran_size, | ||
871 | extra_remove_base, extra_remove_size, i); | ||
872 | if (debug_print) { | ||
873 | mtrr_print_out_one_result(i); | ||
874 | printk(KERN_INFO "\n"); | ||
875 | } | ||
876 | |||
877 | i++; | ||
878 | } | ||
879 | } | ||
880 | |||
881 | /* try to find the optimal index */ | ||
882 | index_good = mtrr_search_optimal_index(); | ||
883 | |||
884 | if (index_good != -1) { | ||
885 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); | ||
886 | i = index_good; | ||
887 | mtrr_print_out_one_result(i); | ||
888 | |||
889 | /* convert ranges to var ranges state */ | ||
890 | chunk_size = result[i].chunk_sizek; | ||
891 | chunk_size <<= 10; | ||
892 | gran_size = result[i].gran_sizek; | ||
893 | gran_size <<= 10; | ||
894 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); | ||
895 | set_var_mtrr_all(address_bits); | ||
896 | printk(KERN_DEBUG "New variable MTRRs\n"); | ||
897 | print_out_mtrr_range_state(); | ||
898 | return 1; | ||
899 | } else { | ||
900 | /* print out all */ | ||
901 | for (i = 0; i < NUM_RESULT; i++) | ||
902 | mtrr_print_out_one_result(i); | ||
903 | } | ||
904 | |||
905 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); | ||
906 | printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); | ||
907 | |||
908 | return 0; | ||
909 | } | ||
910 | #else | ||
911 | int __init mtrr_cleanup(unsigned address_bits) | ||
912 | { | ||
913 | return 0; | ||
914 | } | ||
915 | #endif | ||
916 | |||
917 | static int disable_mtrr_trim; | ||
918 | |||
919 | static int __init disable_mtrr_trim_setup(char *str) | ||
920 | { | ||
921 | disable_mtrr_trim = 1; | ||
922 | return 0; | ||
923 | } | ||
924 | early_param("disable_mtrr_trim", disable_mtrr_trim_setup); | ||
925 | |||
926 | /* | ||
927 | * Newer AMD K8s and later CPUs have a special magic MSR way to force WB | ||
928 | * for memory >4GB. Check for that here. | ||
929 | * Note this won't check if the MTRRs < 4GB where the magic bit doesn't | ||
930 | * apply to are wrong, but so far we don't know of any such case in the wild. | ||
931 | */ | ||
932 | #define Tom2Enabled (1U << 21) | ||
933 | #define Tom2ForceMemTypeWB (1U << 22) | ||
934 | |||
935 | int __init amd_special_default_mtrr(void) | ||
936 | { | ||
937 | u32 l, h; | ||
938 | |||
939 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | ||
940 | return 0; | ||
941 | if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) | ||
942 | return 0; | ||
943 | /* In case some hypervisor doesn't pass SYSCFG through */ | ||
944 | if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) | ||
945 | return 0; | ||
946 | /* | ||
947 | * Memory between 4GB and top of mem is forced WB by this magic bit. | ||
948 | * Reserved before K8RevF, but should be zero there. | ||
949 | */ | ||
950 | if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) == | ||
951 | (Tom2Enabled | Tom2ForceMemTypeWB)) | ||
952 | return 1; | ||
953 | return 0; | ||
954 | } | ||
955 | |||
956 | static u64 __init real_trim_memory(unsigned long start_pfn, | ||
957 | unsigned long limit_pfn) | ||
958 | { | ||
959 | u64 trim_start, trim_size; | ||
960 | trim_start = start_pfn; | ||
961 | trim_start <<= PAGE_SHIFT; | ||
962 | trim_size = limit_pfn; | ||
963 | trim_size <<= PAGE_SHIFT; | ||
964 | trim_size -= trim_start; | ||
965 | |||
966 | return e820_update_range(trim_start, trim_size, E820_RAM, | ||
967 | E820_RESERVED); | ||
968 | } | ||
969 | /** | ||
970 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs | ||
971 | * @end_pfn: ending page frame number | ||
972 | * | ||
973 | * Some buggy BIOSes don't setup the MTRRs properly for systems with certain | ||
974 | * memory configurations. This routine checks that the highest MTRR matches | ||
975 | * the end of memory, to make sure the MTRRs having a write back type cover | ||
976 | * all of the memory the kernel is intending to use. If not, it'll trim any | ||
977 | * memory off the end by adjusting end_pfn, removing it from the kernel's | ||
978 | * allocation pools, warning the user with an obnoxious message. | ||
979 | */ | ||
980 | int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | ||
981 | { | ||
982 | unsigned long i, base, size, highest_pfn = 0, def, dummy; | ||
983 | mtrr_type type; | ||
984 | u64 total_trim_size; | ||
985 | |||
986 | /* extra one for all 0 */ | ||
987 | int num[MTRR_NUM_TYPES + 1]; | ||
988 | /* | ||
989 | * Make sure we only trim uncachable memory on machines that | ||
990 | * support the Intel MTRR architecture: | ||
991 | */ | ||
992 | if (!is_cpu(INTEL) || disable_mtrr_trim) | ||
993 | return 0; | ||
994 | rdmsr(MTRRdefType_MSR, def, dummy); | ||
995 | def &= 0xff; | ||
996 | if (def != MTRR_TYPE_UNCACHABLE) | ||
997 | return 0; | ||
998 | |||
999 | /* get it and store it aside */ | ||
1000 | memset(range_state, 0, sizeof(range_state)); | ||
1001 | for (i = 0; i < num_var_ranges; i++) { | ||
1002 | mtrr_if->get(i, &base, &size, &type); | ||
1003 | range_state[i].base_pfn = base; | ||
1004 | range_state[i].size_pfn = size; | ||
1005 | range_state[i].type = type; | ||
1006 | } | ||
1007 | |||
1008 | /* Find highest cached pfn */ | ||
1009 | for (i = 0; i < num_var_ranges; i++) { | ||
1010 | type = range_state[i].type; | ||
1011 | if (type != MTRR_TYPE_WRBACK) | ||
1012 | continue; | ||
1013 | base = range_state[i].base_pfn; | ||
1014 | size = range_state[i].size_pfn; | ||
1015 | if (highest_pfn < base + size) | ||
1016 | highest_pfn = base + size; | ||
1017 | } | ||
1018 | |||
1019 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ | ||
1020 | if (!highest_pfn) { | ||
1021 | printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); | ||
1022 | return 0; | ||
1023 | } | ||
1024 | |||
1025 | /* check entries number */ | ||
1026 | memset(num, 0, sizeof(num)); | ||
1027 | for (i = 0; i < num_var_ranges; i++) { | ||
1028 | type = range_state[i].type; | ||
1029 | if (type >= MTRR_NUM_TYPES) | ||
1030 | continue; | ||
1031 | size = range_state[i].size_pfn; | ||
1032 | if (!size) | ||
1033 | type = MTRR_NUM_TYPES; | ||
1034 | num[type]++; | ||
1035 | } | ||
1036 | |||
1037 | /* no entry for WB? */ | ||
1038 | if (!num[MTRR_TYPE_WRBACK]) | ||
1039 | return 0; | ||
1040 | |||
1041 | /* check if we only had WB and UC */ | ||
1042 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | ||
1043 | num_var_ranges - num[MTRR_NUM_TYPES]) | ||
1044 | return 0; | ||
1045 | |||
1046 | memset(range, 0, sizeof(range)); | ||
1047 | nr_range = 0; | ||
1048 | if (mtrr_tom2) { | ||
1049 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); | ||
1050 | range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; | ||
1051 | if (highest_pfn < range[nr_range].end + 1) | ||
1052 | highest_pfn = range[nr_range].end + 1; | ||
1053 | nr_range++; | ||
1054 | } | ||
1055 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | ||
1056 | |||
1057 | total_trim_size = 0; | ||
1058 | /* check the head */ | ||
1059 | if (range[0].start) | ||
1060 | total_trim_size += real_trim_memory(0, range[0].start); | ||
1061 | /* check the holes */ | ||
1062 | for (i = 0; i < nr_range - 1; i++) { | ||
1063 | if (range[i].end + 1 < range[i+1].start) | ||
1064 | total_trim_size += real_trim_memory(range[i].end + 1, | ||
1065 | range[i+1].start); | ||
1066 | } | ||
1067 | /* check the top */ | ||
1068 | i = nr_range - 1; | ||
1069 | if (range[i].end + 1 < end_pfn) | ||
1070 | total_trim_size += real_trim_memory(range[i].end + 1, | ||
1071 | end_pfn); | ||
1072 | |||
1073 | if (total_trim_size) { | ||
1074 | printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" | ||
1075 | " all of memory, losing %lluMB of RAM.\n", | ||
1076 | total_trim_size >> 20); | ||
1077 | |||
1078 | if (!changed_by_mtrr_cleanup) | ||
1079 | WARN_ON(1); | ||
1080 | |||
1081 | printk(KERN_INFO "update e820 for mtrr\n"); | ||
1082 | update_e820(); | ||
1083 | |||
1084 | return 1; | ||
1085 | } | ||
1086 | |||
1087 | return 0; | ||
1088 | } | ||
1089 | |||