aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorJesse Barnes <jesse.barnes@intel.com>2008-01-30 07:33:18 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-30 07:33:18 -0500
commit99fc8d424bc5d803fe92cad56c068fe64e73747a (patch)
tree983f615ed69b98c614f38b7240c343c9d7f9418d /arch/x86/kernel
parent03252919b79891063cf99145612360efbdf9500b (diff)
x86, 32-bit: trim memory not covered by wb mtrrs
On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all available RAM, meaning the last few megs (or even gigs) of memory will be marked uncached. Since Linux tends to allocate from high memory addresses first, this causes the machine to be unusably slow as soon as the kernel starts really using memory (i.e. right around init time). This patch works around the problem by scanning the MTRRs at boot and figuring out whether the current end_pfn value (setup by early e820 code) goes beyond the highest WB MTRR range, and if so, trimming it to match. A fairly obnoxious KERN_WARNING is printed too, letting the user know that not all of their memory is available due to a likely BIOS bug. Something similar could be done on i386 if needed, but the boot ordering would be slightly different, since the MTRR code on i386 depends on the boot_cpu_data structure being setup. This patch fixes a bug in the last patch that caused the code to run on non-Intel machines (AMD machines apparently don't need it and it's untested on other non-Intel machines, so best keep it off). Further enhancements and fixes from: Yinghai Lu <Yinghai.Lu@Sun.COM> Andi Kleen <ak@suse.de> Signed-off-by: Jesse Barnes <jesse.barnes@intel.com> Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Yinghai Lu <yhlu.kernel@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/bugs_64.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c140
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h3
-rw-r--r--arch/x86/kernel/setup_64.c7
6 files changed, 129 insertions, 38 deletions
diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/bugs_64.c
index 9a189cef6404..8f520f93ffd4 100644
--- a/arch/x86/kernel/bugs_64.c
+++ b/arch/x86/kernel/bugs_64.c
@@ -13,7 +13,6 @@
13void __init check_bugs(void) 13void __init check_bugs(void)
14{ 14{
15 identify_cpu(&boot_cpu_data); 15 identify_cpu(&boot_cpu_data);
16 mtrr_bp_init();
17#if !defined(CONFIG_SMP) 16#if !defined(CONFIG_SMP)
18 printk("CPU: "); 17 printk("CPU: ");
19 print_cpu_info(&boot_cpu_data); 18 print_cpu_info(&boot_cpu_data);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 55d31ff118fb..103d61a59b19 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,7 +14,7 @@
14#include "mtrr.h" 14#include "mtrr.h"
15 15
16struct mtrr_state { 16struct mtrr_state {
17 struct mtrr_var_range *var_ranges; 17 struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
18 mtrr_type fixed_ranges[NUM_FIXED_RANGES]; 18 mtrr_type fixed_ranges[NUM_FIXED_RANGES];
19 unsigned char enabled; 19 unsigned char enabled;
20 unsigned char have_fixed; 20 unsigned char have_fixed;
@@ -86,12 +86,6 @@ void __init get_mtrr_state(void)
86 struct mtrr_var_range *vrs; 86 struct mtrr_var_range *vrs;
87 unsigned lo, dummy; 87 unsigned lo, dummy;
88 88
89 if (!mtrr_state.var_ranges) {
90 mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
91 GFP_KERNEL);
92 if (!mtrr_state.var_ranges)
93 return;
94 }
95 vrs = mtrr_state.var_ranges; 89 vrs = mtrr_state.var_ranges;
96 90
97 rdmsr(MTRRcap_MSR, lo, dummy); 91 rdmsr(MTRRcap_MSR, lo, dummy);
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 14535686c099..91e150acb46c 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -11,10 +11,6 @@
11#include <asm/mtrr.h> 11#include <asm/mtrr.h>
12#include "mtrr.h" 12#include "mtrr.h"
13 13
14/* RED-PEN: this is accessed without any locking */
15extern unsigned int *usage_table;
16
17
18#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) 14#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
19 15
20static const char *const mtrr_strings[MTRR_NUM_TYPES] = 16static const char *const mtrr_strings[MTRR_NUM_TYPES] =
@@ -397,7 +393,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
397 for (i = 0; i < max; i++) { 393 for (i = 0; i < max; i++) {
398 mtrr_if->get(i, &base, &size, &type); 394 mtrr_if->get(i, &base, &size, &type);
399 if (size == 0) 395 if (size == 0)
400 usage_table[i] = 0; 396 mtrr_usage_table[i] = 0;
401 else { 397 else {
402 if (size < (0x100000 >> PAGE_SHIFT)) { 398 if (size < (0x100000 >> PAGE_SHIFT)) {
403 /* less than 1MB */ 399 /* less than 1MB */
@@ -411,7 +407,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
411 len += seq_printf(seq, 407 len += seq_printf(seq,
412 "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", 408 "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
413 i, base, base >> (20 - PAGE_SHIFT), size, factor, 409 i, base, base >> (20 - PAGE_SHIFT), size, factor,
414 mtrr_attrib_to_str(type), usage_table[i]); 410 mtrr_attrib_to_str(type), mtrr_usage_table[i]);
415 } 411 }
416 } 412 }
417 return 0; 413 return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 60af5ed2b5c0..ccd36ed2187b 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -38,8 +38,8 @@
38#include <linux/cpu.h> 38#include <linux/cpu.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40 40
41#include <asm/e820.h>
41#include <asm/mtrr.h> 42#include <asm/mtrr.h>
42
43#include <asm/uaccess.h> 43#include <asm/uaccess.h>
44#include <asm/processor.h> 44#include <asm/processor.h>
45#include <asm/msr.h> 45#include <asm/msr.h>
@@ -47,7 +47,7 @@
47 47
48u32 num_var_ranges = 0; 48u32 num_var_ranges = 0;
49 49
50unsigned int *usage_table; 50unsigned int mtrr_usage_table[MAX_VAR_RANGES];
51static DEFINE_MUTEX(mtrr_mutex); 51static DEFINE_MUTEX(mtrr_mutex);
52 52
53u64 size_or_mask, size_and_mask; 53u64 size_or_mask, size_and_mask;
@@ -121,13 +121,8 @@ static void __init init_table(void)
121 int i, max; 121 int i, max;
122 122
123 max = num_var_ranges; 123 max = num_var_ranges;
124 if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
125 == NULL) {
126 printk(KERN_ERR "mtrr: could not allocate\n");
127 return;
128 }
129 for (i = 0; i < max; i++) 124 for (i = 0; i < max; i++)
130 usage_table[i] = 1; 125 mtrr_usage_table[i] = 1;
131} 126}
132 127
133struct set_mtrr_data { 128struct set_mtrr_data {
@@ -383,7 +378,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
383 goto out; 378 goto out;
384 } 379 }
385 if (increment) 380 if (increment)
386 ++usage_table[i]; 381 ++mtrr_usage_table[i];
387 error = i; 382 error = i;
388 goto out; 383 goto out;
389 } 384 }
@@ -391,15 +386,15 @@ int mtrr_add_page(unsigned long base, unsigned long size,
391 i = mtrr_if->get_free_region(base, size, replace); 386 i = mtrr_if->get_free_region(base, size, replace);
392 if (i >= 0) { 387 if (i >= 0) {
393 set_mtrr(i, base, size, type); 388 set_mtrr(i, base, size, type);
394 if (likely(replace < 0)) 389 if (likely(replace < 0)) {
395 usage_table[i] = 1; 390 mtrr_usage_table[i] = 1;
396 else { 391 } else {
397 usage_table[i] = usage_table[replace]; 392 mtrr_usage_table[i] = mtrr_usage_table[replace];
398 if (increment) 393 if (increment)
399 usage_table[i]++; 394 mtrr_usage_table[i]++;
400 if (unlikely(replace != i)) { 395 if (unlikely(replace != i)) {
401 set_mtrr(replace, 0, 0, 0); 396 set_mtrr(replace, 0, 0, 0);
402 usage_table[replace] = 0; 397 mtrr_usage_table[replace] = 0;
403 } 398 }
404 } 399 }
405 } else 400 } else
@@ -529,11 +524,11 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
529 printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); 524 printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
530 goto out; 525 goto out;
531 } 526 }
532 if (usage_table[reg] < 1) { 527 if (mtrr_usage_table[reg] < 1) {
533 printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); 528 printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
534 goto out; 529 goto out;
535 } 530 }
536 if (--usage_table[reg] < 1) 531 if (--mtrr_usage_table[reg] < 1)
537 set_mtrr(reg, 0, 0, 0); 532 set_mtrr(reg, 0, 0, 0);
538 error = reg; 533 error = reg;
539 out: 534 out:
@@ -593,16 +588,11 @@ struct mtrr_value {
593 unsigned long lsize; 588 unsigned long lsize;
594}; 589};
595 590
596static struct mtrr_value * mtrr_state; 591static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
597 592
598static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 593static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
599{ 594{
600 int i; 595 int i;
601 int size = num_var_ranges * sizeof(struct mtrr_value);
602
603 mtrr_state = kzalloc(size,GFP_ATOMIC);
604 if (!mtrr_state)
605 return -ENOMEM;
606 596
607 for (i = 0; i < num_var_ranges; i++) { 597 for (i = 0; i < num_var_ranges; i++) {
608 mtrr_if->get(i, 598 mtrr_if->get(i,
@@ -624,7 +614,6 @@ static int mtrr_restore(struct sys_device * sysdev)
624 mtrr_state[i].lsize, 614 mtrr_state[i].lsize,
625 mtrr_state[i].ltype); 615 mtrr_state[i].ltype);
626 } 616 }
627 kfree(mtrr_state);
628 return 0; 617 return 0;
629} 618}
630 619
@@ -635,6 +624,109 @@ static struct sysdev_driver mtrr_sysdev_driver = {
635 .resume = mtrr_restore, 624 .resume = mtrr_restore,
636}; 625};
637 626
627#ifdef CONFIG_X86_64
628static int disable_mtrr_trim;
629
630static int __init disable_mtrr_trim_setup(char *str)
631{
632 disable_mtrr_trim = 1;
633 return 0;
634}
635early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
636
637/*
638 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
639 * for memory >4GB. Check for that here.
640 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
641 * apply to are wrong, but so far we don't know of any such case in the wild.
642 */
643#define Tom2Enabled (1U << 21)
644#define Tom2ForceMemTypeWB (1U << 22)
645
646static __init int amd_special_default_mtrr(unsigned long end_pfn)
647{
648 u32 l, h;
649
650 /* Doesn't apply to memory < 4GB */
651 if (end_pfn <= (0xffffffff >> PAGE_SHIFT))
652 return 0;
653 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
654 return 0;
655 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
656 return 0;
657 /* In case some hypervisor doesn't pass SYSCFG through */
658 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
659 return 0;
660 /*
661 * Memory between 4GB and top of mem is forced WB by this magic bit.
662 * Reserved before K8RevF, but should be zero there.
663 */
664 if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
665 (Tom2Enabled | Tom2ForceMemTypeWB))
666 return 1;
667 return 0;
668}
669
670/**
671 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
672 *
673 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
674 * memory configurations. This routine checks that the highest MTRR matches
675 * the end of memory, to make sure the MTRRs having a write back type cover
676 * all of the memory the kernel is intending to use. If not, it'll trim any
677 * memory off the end by adjusting end_pfn, removing it from the kernel's
678 * allocation pools, warning the user with an obnoxious message.
679 */
680int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
681{
682 unsigned long i, base, size, highest_addr = 0, def, dummy;
683 mtrr_type type;
684 u64 trim_start, trim_size;
685
686 /*
687 * Make sure we only trim uncachable memory on machines that
688 * support the Intel MTRR architecture:
689 */
690 rdmsr(MTRRdefType_MSR, def, dummy);
691 def &= 0xff;
692 if (!is_cpu(INTEL) || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE)
693 return 0;
694
695 /* Find highest cached pfn */
696 for (i = 0; i < num_var_ranges; i++) {
697 mtrr_if->get(i, &base, &size, &type);
698 if (type != MTRR_TYPE_WRBACK)
699 continue;
700 base <<= PAGE_SHIFT;
701 size <<= PAGE_SHIFT;
702 if (highest_addr < base + size)
703 highest_addr = base + size;
704 }
705
706 if (amd_special_default_mtrr(end_pfn))
707 return 0;
708
709 if ((highest_addr >> PAGE_SHIFT) < end_pfn) {
710 printk(KERN_WARNING "***************\n");
711 printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
712 printk(KERN_WARNING "**** MTRRs don't cover all of "
713 "memory, trimmed %ld pages\n", end_pfn -
714 (highest_addr >> PAGE_SHIFT));
715 printk(KERN_WARNING "***************\n");
716
717 printk(KERN_INFO "update e820 for mtrr\n");
718 trim_start = highest_addr;
719 trim_size = end_pfn;
720 trim_size <<= PAGE_SHIFT;
721 trim_size -= trim_start;
722 add_memory_region(trim_start, trim_size, E820_RESERVED);
723 update_e820();
724 return 1;
725 }
726
727 return 0;
728}
729#endif
638 730
639/** 731/**
640 * mtrr_bp_init - initialize mtrrs on the boot CPU 732 * mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 54347e9a95c0..fb74a2c20814 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -12,6 +12,7 @@
12#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) 12#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
13 13
14#define NUM_FIXED_RANGES 88 14#define NUM_FIXED_RANGES 88
15#define MAX_VAR_RANGES 256
15#define MTRRfix64K_00000_MSR 0x250 16#define MTRRfix64K_00000_MSR 0x250
16#define MTRRfix16K_80000_MSR 0x258 17#define MTRRfix16K_80000_MSR 0x258
17#define MTRRfix16K_A0000_MSR 0x259 18#define MTRRfix16K_A0000_MSR 0x259
@@ -32,6 +33,8 @@
32 an 8 bit field: */ 33 an 8 bit field: */
33typedef u8 mtrr_type; 34typedef u8 mtrr_type;
34 35
36extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
37
35struct mtrr_ops { 38struct mtrr_ops {
36 u32 vendor; 39 u32 vendor;
37 u32 use_intel_if; 40 u32 use_intel_if;
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6cbd15625dce..12948316e6a6 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -310,6 +310,13 @@ void __init setup_arch(char **cmdline_p)
310 * we are rounding upwards: 310 * we are rounding upwards:
311 */ 311 */
312 end_pfn = e820_end_of_ram(); 312 end_pfn = e820_end_of_ram();
313 /* update e820 for memory not covered by WB MTRRs */
314 mtrr_bp_init();
315 if (mtrr_trim_uncached_memory(end_pfn)) {
316 e820_register_active_regions(0, 0, -1UL);
317 end_pfn = e820_end_of_ram();
318 }
319
313 num_physpages = end_pfn; 320 num_physpages = end_pfn;
314 321
315 check_efer(); 322 check_efer();