#include <linux/bitops.h> #include <linux/kernel.h> #include <linux/init.h> #include <asm/processor.h> #include <asm/e820.h> #include <asm/mtrr.h> #include <asm/msr.h> #include "cpu.h" #ifdef CONFIG_X86_OOSTORE static u32 __cpuinit power2(u32 x) { u32 s = 1; while (s <= x) s <<= 1; return s >>= 1; } /* * Set up an actual MCR */ static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) { u32 lo, hi; hi = base & ~0xFFF; lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ lo &= ~0xFFF; /* Remove the ctrl value bits */ lo |= key; /* Attribute we wish to set */ wrmsr(reg+MSR_IDT_MCR0, lo, hi); mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ } /* * Figure what we can cover with MCR's * * Shortcut: We know you can't put 4Gig of RAM on a winchip */ static u32 __cpuinit ramtop(void) { u32 clip = 0xFFFFFFFFUL; u32 top = 0; int i; for (i = 0; i < e820.nr_map; i++) { unsigned long start, end; if (e820.map[i].addr > 0xFFFFFFFFUL) continue; /* * Don't MCR over reserved space. Ignore the ISA hole * we frob around that catastrophe already */ if (e820.map[i].type == E820_RESERVED) { if (e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) clip = e820.map[i].addr; continue; } start = e820.map[i].addr; end = e820.map[i].addr + e820.map[i].size; if (start >= end) continue; if (end > top) top = end; } /* * Everything below 'top' should be RAM except for the ISA hole. * Because of the limited MCR's we want to map NV/ACPI into our * MCR range for gunk in RAM * * Clip might cause us to MCR insufficient RAM but that is an * acceptable failure mode and should only bite obscure boxes with * a VESA hole at 15Mb * * The second case Clip sometimes kicks in is when the EBDA is marked * as reserved. Again we fail safe with reasonable results */ if (top > clip) top = clip; return top; } /* * Compute a set of MCR's to give maximum coverage */ static int __cpuinit centaur_mcr_compute(int nr, int key) { u32 mem = ramtop(); u32 root = power2(mem); u32 base = root; u32 top = root; u32 floor = 0; int ct = 0; while (ct < nr) { u32 fspace = 0; u32 high; u32 low; /* * Find the largest block we will fill going upwards */ high = power2(mem-top); /* * Find the largest block we will fill going downwards */ low = base/2; /* * Don't fill below 1Mb going downwards as there * is an ISA hole in the way. */ if (base <= 1024*1024) low = 0; /* * See how much space we could cover by filling below * the ISA hole */ if (floor == 0) fspace = 512*1024; else if (floor == 512*1024) fspace = 128*1024; /* And forget ROM space */ /* * Now install the largest coverage we get */ if (fspace > high && fspace > low) { centaur_mcr_insert(ct, floor, fspace, key); floor += fspace; } else if (high > low) { centaur_mcr_insert(ct, top, high, key); top += high; } else if (low > 0) { base -= low; centaur_mcr_insert(ct, base, low, key); } else break; ct++; } /* * We loaded ct values. We now need to set the mask. The caller * must do this bit. */ return ct; } static void __cpuinit centaur_create_optimal_mcr(void) { int used; int i; /* * Allocate up to 6 mcrs to mark as much of ram as possible * as write combining and weak write ordered. * * To experiment with: Linux never uses stack operations for * mmio spaces so we could globally enable stack operation wc * * Load the registers with type 31 - full write combining, all * writes weakly ordered. */ used = centaur_mcr_compute(6, 31); /* * Wipe unused MCRs */ for (i = used; i < 8; i++) wrmsr(MSR_IDT_MCR0+i, 0, 0); } static void __cpuinit winchip2_create_optimal_mcr(void) { u32 lo, hi; int used; int i; /* * Allocate up to 6 mcrs to mark as much of ram as possible * as write combining, weak store ordered. * * Load the registers with type 25 * 8 - weak write ordering * 16 - weak read ordering * 1 - write combining */ used = centaur_mcr_compute(6, 25); /* * Mark the registers we are using. */ rdmsr(MSR_IDT_MCR_CTRL, lo, hi); for (i = 0; i < used; i++) lo |= 1<<(9+i); wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* * Wipe unused MCRs */ for (i = used; i < 8; i++) wrmsr(MSR_IDT_MCR0+i, 0, 0); } /* * Handle the MCR key on the Winchip 2. */ static void __cpuinit winchip2_unprotect_mcr(void) { u32 lo, hi; u32 key; rdmsr(MSR_IDT_MCR_CTRL, lo, hi); lo &= ~0x1C0; /* blank bits 8-6 */ key = (lo>>17) & 7; lo |= key<<6; /* replace with unlock key */ wrmsr(MSR_IDT_MCR_CTRL, lo, hi); } static void __cpuinit winchip2_protect_mcr(void) { u32 lo, hi; rdmsr(MSR_IDT_MCR_CTRL, lo, hi); lo &= ~0x1C0; /* blank bits 8-6 */ wrmsr(MSR_IDT_MCR_CTRL, lo, hi); } #endif /* CONFIG_X86_OOSTORE */ #define ACE_PRESENT (1 << 6) #define ACE_ENABLED (1 << 7) #define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ #define RNG_PRESENT (1 << 2) #define RNG_ENABLED (1 << 3) #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ static void __cpuinit init_c3(struct cpuinfo_x86 *c) { u32 lo, hi; /* Test for Centaur Extended Feature Flags presence */ if (cpuid_eax(0xC0000000) >= 0xC0000001) { u32 tmp = cpuid_edx(0xC0000001); /* enable ACE unit, if present and disabled */ if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { rdmsr(MSR_VIA_FCR, lo, hi); lo |= ACE_FCR; /* enable ACE unit */ wrmsr(MSR_VIA_FCR, lo, hi); printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); } /* enable RNG unit, if present and disabled */ if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { rdmsr(MSR_VIA_RNG, lo, hi); lo |= RNG_ENABLE; /* enable RNG unit */ wrmsr(MSR_VIA_RNG, lo, hi); printk(KERN_INFO "CPU: Enabled h/w RNG\n"); } /* store Centaur Extended Feature Flags as * word 5 of the CPU capability bit array */ c->x86_capability[5] = cpuid_edx(0xC0000001); } #ifdef CONFIG_X86_32 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ if (c->x86_model >= 6 && c->x86_model <= 9) { rdmsr(MSR_VIA_FCR, lo, hi); lo |= (1<<1 | 1<<7); wrmsr(MSR_VIA_FCR, lo, hi); set_cpu_cap(c, X86_FEATURE_CX8); } /* Before Nehemiah, the C3's had 3dNOW! */ if (c->x86_model >= 6 && c->x86_model < 9) set_cpu_cap(c, X86_FEATURE_3DNOW); #endif if (c->x86 == 0x6 && c->x86_model >= 0xf) { c->x86_cache_alignment = c->x86_clflush_size * 2; set_cpu_cap(c, X86_FEATURE_REP_GOOD); } display_cacheinfo(c); } enum { ECX8 = 1<<1, EIERRINT = 1<<2, DPM = 1<<3, DMCE = 1<<4, DSTPCLK = 1<<5, ELINEAR = 1<<6, DSMC = 1<<7, DTLOCK = 1<<8, EDCTLB = 1<<8, EMMX = 1<<9, DPDC = 1<<11, EBRPRED = 1<<12, DIC = 1<<13, DDC = 1<<14, DNA = 1<<15, ERETSTK = 1<<16, E2MMX = 1<<19, EAMD3D = 1<<20, }; static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) { switch (c->x86) { #ifdef CONFIG_X86_32 case 5: /* Emulate MTRRs using Centaur's MCR. */ set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); break; #endif case 6: if (c->x86_model >= 0xf) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); break; } #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif } static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_32 char *name; u32 fcr_set = 0; u32 fcr_clr = 0; u32 lo, hi, newlo; u32 aa, bb, cc, dd; /* * Bit 31 in normal CPUID used for nonstandard 3DNow ID; * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); #endif early_init_centaur(c); switch (c->x86) { #ifdef CONFIG_X86_32 case 5: switch (c->x86_model) { case 4: name = "C6"; fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; fcr_clr = DPDC; printk(KERN_NOTICE "Disabling bugged TSC.\n"); clear_cpu_cap(c, X86_FEATURE_TSC); #ifdef CONFIG_X86_OOSTORE centaur_create_optimal_mcr(); /* * Enable: * write combining on non-stack, non-string * write combining on string, all types * weak write ordering * * The C6 original lacks weak read order * * Note 0x120 is write only on Winchip 1 */ wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); #endif break; case 8: switch (c->x86_mask) { default: name = "2"; break; case 7 ... 9: name = "2A"; break; case 10 ... 15: name = "2B"; break; } fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| E2MMX|EAMD3D; fcr_clr = DPDC; #ifdef CONFIG_X86_OOSTORE winchip2_unprotect_mcr(); winchip2_create_optimal_mcr(); rdmsr(MSR_IDT_MCR_CTRL, lo, hi); /* * Enable: * write combining on non-stack, non-string * write combining on string, all types * weak write ordering */ lo |= 31; wrmsr(MSR_IDT_MCR_CTRL, lo, hi); winchip2_protect_mcr(); #endif break; case 9: name = "3"; fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| E2MMX|EAMD3D; fcr_clr = DPDC; #ifdef CONFIG_X86_OOSTORE winchip2_unprotect_mcr(); winchip2_create_optimal_mcr(); rdmsr(MSR_IDT_MCR_CTRL, lo, hi); /* * Enable: * write combining on non-stack, non-string * write combining on string, all types * weak write ordering */ lo |= 31; wrmsr(MSR_IDT_MCR_CTRL, lo, hi); winchip2_protect_mcr(); #endif break; default: name = "??"; } rdmsr(MSR_IDT_FCR1, lo, hi); newlo = (lo|fcr_set) & (~fcr_clr); if (newlo != lo) { printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo); wrmsr(MSR_IDT_FCR1, newlo, hi); } else { printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); } /* Emulate MTRRs using Centaur's MCR. */ set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); /* Report CX8 */ set_cpu_cap(c, X86_FEATURE_CX8); /* Set 3DNow! on Winchip 2 and above. */ if (c->x86_model >= 8) set_cpu_cap(c, X86_FEATURE_3DNOW); /* See if we can find out some more. */ if (cpuid_eax(0x80000000) >= 0x80000005) { /* Yes, we can. */ cpuid(0x80000005, &aa, &bb, &cc, &dd); /* Add L1 data and code cache sizes. */ c->x86_cache_size = (cc>>24)+(dd>>24); } sprintf(c->x86_model_id, "WinChip %s", name); break; #endif case 6: init_c3(c); break; } #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); #endif } static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) { #ifdef CONFIG_X86_32 /* VIA C3 CPUs (670-68F) need further shifting. */ if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) size >>= 8; /* * There's also an erratum in Nehemiah stepping 1, which * returns '65KB' instead of '64KB' * - Note, it seems this may only be in engineering samples. */ if ((c->x86 == 6) && (c->x86_model == 9) && (c->x86_mask == 1) && (size == 65)) size -= 1; #endif return size; } static const struct cpu_dev __cpuinitconst centaur_cpu_dev = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, .c_x86_vendor = X86_VENDOR_CENTAUR, }; cpu_dev_register(centaur_cpu_dev);