aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-25 05:37:07 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-25 05:37:07 -0400
commit0e2f65ee30eee2db054f7fd73f462c5da33ec963 (patch)
tree26c61eb7745da0c0d9135e9d12088f570cb8530d /arch/x86/kernel/cpu
parentda7878d75b8520c9ae00d27dfbbce546a7bfdfbb (diff)
parentfb2e405fc1fc8b20d9c78eaa1c7fd5a297efde43 (diff)
Merge branch 'linus' into x86/pebs
Conflicts: arch/x86/Kconfig.cpu arch/x86/kernel/cpu/intel.c arch/x86/kernel/setup_64.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c17
-rw-r--r--arch/x86/kernel/cpu/amd.c44
-rw-r--r--arch/x86/kernel/cpu/amd_64.c224
-rw-r--r--arch/x86/kernel/cpu/bugs.c50
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c33
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c35
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/common_64.c670
-rw-r--r--arch/x86/kernel/cpu/cpu.h5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c16
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c44
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.h1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c23
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c157
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c7
-rw-r--r--arch/x86/kernel/cpu/intel.c14
-rw-r--r--arch/x86/kernel/cpu/intel_64.c95
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c13
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c36
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c54
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c92
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c38
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c905
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h3
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c214
-rw-r--r--arch/x86/kernel/cpu/proc.c2
31 files changed, 2403 insertions, 408 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a0c6f8190887..ee76eaad3001 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -6,11 +6,15 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o
6obj-y += proc.o feature_names.o 6obj-y += proc.o feature_names.o
7 7
8obj-$(CONFIG_X86_32) += common.o bugs.o 8obj-$(CONFIG_X86_32) += common.o bugs.o
9obj-$(CONFIG_X86_64) += common_64.o bugs_64.o
9obj-$(CONFIG_X86_32) += amd.o 10obj-$(CONFIG_X86_32) += amd.o
11obj-$(CONFIG_X86_64) += amd_64.o
10obj-$(CONFIG_X86_32) += cyrix.o 12obj-$(CONFIG_X86_32) += cyrix.o
11obj-$(CONFIG_X86_32) += centaur.o 13obj-$(CONFIG_X86_32) += centaur.o
14obj-$(CONFIG_X86_64) += centaur_64.o
12obj-$(CONFIG_X86_32) += transmeta.o 15obj-$(CONFIG_X86_32) += transmeta.o
13obj-$(CONFIG_X86_32) += intel.o 16obj-$(CONFIG_X86_32) += intel.o
17obj-$(CONFIG_X86_64) += intel_64.o
14obj-$(CONFIG_X86_32) += umc.o 18obj-$(CONFIG_X86_32) += umc.o
15 19
16obj-$(CONFIG_X86_MCE) += mcheck/ 20obj-$(CONFIG_X86_MCE) += mcheck/
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index c2e1ce33c7cb..84a8220a6072 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -1,9 +1,7 @@
1
2/* 1/*
3 * Routines to indentify additional cpu features that are scattered in 2 * Routines to indentify additional cpu features that are scattered in
4 * cpuid space. 3 * cpuid space.
5 */ 4 */
6
7#include <linux/cpu.h> 5#include <linux/cpu.h>
8 6
9#include <asm/pat.h> 7#include <asm/pat.h>
@@ -53,19 +51,20 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
53#ifdef CONFIG_X86_PAT 51#ifdef CONFIG_X86_PAT
54void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) 52void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
55{ 53{
54 if (!cpu_has_pat)
55 pat_disable("PAT not supported by CPU.");
56
56 switch (c->x86_vendor) { 57 switch (c->x86_vendor) {
57 case X86_VENDOR_AMD:
58 if (c->x86 >= 0xf && c->x86 <= 0x11)
59 return;
60 break;
61 case X86_VENDOR_INTEL: 58 case X86_VENDOR_INTEL:
62 if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) 59 if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
63 return; 60 return;
64 break; 61 break;
62 case X86_VENDOR_AMD:
63 case X86_VENDOR_CENTAUR:
64 case X86_VENDOR_TRANSMETA:
65 return;
65 } 66 }
66 67
67 pat_disable(cpu_has_pat ? 68 pat_disable("PAT disabled. Not yet verified on this CPU type.");
68 "PAT disabled. Not yet verified on this CPU type." :
69 "PAT not supported by CPU.");
70} 69}
71#endif 70#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 245866828294..cae9cabc3031 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -24,45 +24,6 @@
24extern void vide(void); 24extern void vide(void);
25__asm__(".align 4\nvide: ret"); 25__asm__(".align 4\nvide: ret");
26 26
27#ifdef CONFIG_X86_LOCAL_APIC
28#define ENABLE_C1E_MASK 0x18000000
29#define CPUID_PROCESSOR_SIGNATURE 1
30#define CPUID_XFAM 0x0ff00000
31#define CPUID_XFAM_K8 0x00000000
32#define CPUID_XFAM_10H 0x00100000
33#define CPUID_XFAM_11H 0x00200000
34#define CPUID_XMOD 0x000f0000
35#define CPUID_XMOD_REV_F 0x00040000
36
37/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
38static __cpuinit int amd_apic_timer_broken(void)
39{
40 u32 lo, hi;
41 u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
42 switch (eax & CPUID_XFAM) {
43 case CPUID_XFAM_K8:
44 if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
45 break;
46 case CPUID_XFAM_10H:
47 case CPUID_XFAM_11H:
48 rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
49 if (lo & ENABLE_C1E_MASK) {
50 if (smp_processor_id() != boot_cpu_physical_apicid)
51 printk(KERN_INFO "AMD C1E detected late. "
52 " Force timer broadcast.\n");
53 return 1;
54 }
55 break;
56 default:
57 /* err on the side of caution */
58 return 1;
59 }
60 return 0;
61}
62#endif
63
64int force_mwait __cpuinitdata;
65
66static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) 27static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
67{ 28{
68 if (cpuid_eax(0x80000000) >= 0x80000007) { 29 if (cpuid_eax(0x80000000) >= 0x80000007) {
@@ -297,11 +258,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
297 num_cache_leaves = 3; 258 num_cache_leaves = 3;
298 } 259 }
299 260
300#ifdef CONFIG_X86_LOCAL_APIC
301 if (amd_apic_timer_broken())
302 local_apic_timer_disabled = 1;
303#endif
304
305 /* K6s reports MCEs but don't actually have all the MSRs */ 261 /* K6s reports MCEs but don't actually have all the MSRs */
306 if (c->x86 < 6) 262 if (c->x86 < 6)
307 clear_cpu_cap(c, X86_FEATURE_MCE); 263 clear_cpu_cap(c, X86_FEATURE_MCE);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
new file mode 100644
index 000000000000..d1692b2a41ff
--- /dev/null
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -0,0 +1,224 @@
1#include <linux/init.h>
2#include <linux/mm.h>
3
4#include <asm/numa_64.h>
5#include <asm/mmconfig.h>
6#include <asm/cacheflush.h>
7
8#include <mach_apic.h>
9
10#include "cpu.h"
11
12int force_mwait __cpuinitdata;
13
14#ifdef CONFIG_NUMA
15static int __cpuinit nearby_node(int apicid)
16{
17 int i, node;
18
19 for (i = apicid - 1; i >= 0; i--) {
20 node = apicid_to_node[i];
21 if (node != NUMA_NO_NODE && node_online(node))
22 return node;
23 }
24 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
25 node = apicid_to_node[i];
26 if (node != NUMA_NO_NODE && node_online(node))
27 return node;
28 }
29 return first_node(node_online_map); /* Shouldn't happen */
30}
31#endif
32
33/*
34 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
35 * Assumes number of cores is a power of two.
36 */
37static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
38{
39#ifdef CONFIG_SMP
40 unsigned bits;
41#ifdef CONFIG_NUMA
42 int cpu = smp_processor_id();
43 int node = 0;
44 unsigned apicid = hard_smp_processor_id();
45#endif
46 bits = c->x86_coreid_bits;
47
48 /* Low order bits define the core id (index of core in socket) */
49 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
50 /* Convert the initial APIC ID into the socket ID */
51 c->phys_proc_id = c->initial_apicid >> bits;
52
53#ifdef CONFIG_NUMA
54 node = c->phys_proc_id;
55 if (apicid_to_node[apicid] != NUMA_NO_NODE)
56 node = apicid_to_node[apicid];
57 if (!node_online(node)) {
58 /* Two possibilities here:
59 - The CPU is missing memory and no node was created.
60 In that case try picking one from a nearby CPU
61 - The APIC IDs differ from the HyperTransport node IDs
62 which the K8 northbridge parsing fills in.
63 Assume they are all increased by a constant offset,
64 but in the same order as the HT nodeids.
65 If that doesn't result in a usable node fall back to the
66 path for the previous case. */
67
68 int ht_nodeid = c->initial_apicid;
69
70 if (ht_nodeid >= 0 &&
71 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
72 node = apicid_to_node[ht_nodeid];
73 /* Pick a nearby node */
74 if (!node_online(node))
75 node = nearby_node(apicid);
76 }
77 numa_set_node(cpu, node);
78
79 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
80#endif
81#endif
82}
83
84static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
85{
86#ifdef CONFIG_SMP
87 unsigned bits, ecx;
88
89 /* Multi core CPU? */
90 if (c->extended_cpuid_level < 0x80000008)
91 return;
92
93 ecx = cpuid_ecx(0x80000008);
94
95 c->x86_max_cores = (ecx & 0xff) + 1;
96
97 /* CPU telling us the core id bits shift? */
98 bits = (ecx >> 12) & 0xF;
99
100 /* Otherwise recompute */
101 if (bits == 0) {
102 while ((1 << bits) < c->x86_max_cores)
103 bits++;
104 }
105
106 c->x86_coreid_bits = bits;
107
108#endif
109}
110
111static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
112{
113 early_init_amd_mc(c);
114
115 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
116 if (c->x86_power & (1<<8))
117 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
118
119 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
120}
121
122static void __cpuinit init_amd(struct cpuinfo_x86 *c)
123{
124 unsigned level;
125
126#ifdef CONFIG_SMP
127 unsigned long value;
128
129 /*
130 * Disable TLB flush filter by setting HWCR.FFDIS on K8
131 * bit 6 of msr C001_0015
132 *
133 * Errata 63 for SH-B3 steppings
134 * Errata 122 for all steppings (F+ have it disabled by default)
135 */
136 if (c->x86 == 0xf) {
137 rdmsrl(MSR_K8_HWCR, value);
138 value |= 1 << 6;
139 wrmsrl(MSR_K8_HWCR, value);
140 }
141#endif
142
143 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
144 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
145 clear_cpu_cap(c, 0*32+31);
146
147 /* On C+ stepping K8 rep microcode works well for copy/memset */
148 if (c->x86 == 0xf) {
149 level = cpuid_eax(1);
150 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
151 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
152 }
153 if (c->x86 == 0x10 || c->x86 == 0x11)
154 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
155
156 /* Enable workaround for FXSAVE leak */
157 if (c->x86 >= 6)
158 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
159
160 level = get_model_name(c);
161 if (!level) {
162 switch (c->x86) {
163 case 0xf:
164 /* Should distinguish Models here, but this is only
165 a fallback anyways. */
166 strcpy(c->x86_model_id, "Hammer");
167 break;
168 }
169 }
170 display_cacheinfo(c);
171
172 /* Multi core CPU? */
173 if (c->extended_cpuid_level >= 0x80000008)
174 amd_detect_cmp(c);
175
176 if (c->extended_cpuid_level >= 0x80000006 &&
177 (cpuid_edx(0x80000006) & 0xf000))
178 num_cache_leaves = 4;
179 else
180 num_cache_leaves = 3;
181
182 if (c->x86 >= 0xf && c->x86 <= 0x11)
183 set_cpu_cap(c, X86_FEATURE_K8);
184
185 /* MFENCE stops RDTSC speculation */
186 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
187
188 if (c->x86 == 0x10) {
189 /* do this for boot cpu */
190 if (c == &boot_cpu_data)
191 check_enable_amd_mmconf_dmi();
192
193 fam10h_check_enable_mmcfg();
194 }
195
196 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
197 unsigned long long tseg;
198
199 /*
200 * Split up direct mapping around the TSEG SMM area.
201 * Don't do it for gbpages because there seems very little
202 * benefit in doing so.
203 */
204 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
205 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
206 if ((tseg>>PMD_SHIFT) <
207 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
208 ((tseg>>PMD_SHIFT) <
209 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
210 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
211 set_memory_4k((unsigned long)__va(tseg), 1);
212 }
213 }
214}
215
216static struct cpu_dev amd_cpu_dev __cpuinitdata = {
217 .c_vendor = "AMD",
218 .c_ident = { "AuthenticAMD" },
219 .c_early_init = early_init_amd,
220 .c_init = init_amd,
221};
222
223cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
224
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 170d2f5523b2..c9b58a806e85 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -59,8 +59,12 @@ static void __init check_fpu(void)
59 return; 59 return;
60 } 60 }
61 61
62/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ 62 /*
63 /* Test for the divl bug.. */ 63 * trap_init() enabled FXSR and company _before_ testing for FP
64 * problems here.
65 *
66 * Test for the divl bug..
67 */
64 __asm__("fninit\n\t" 68 __asm__("fninit\n\t"
65 "fldl %1\n\t" 69 "fldl %1\n\t"
66 "fdivl %2\n\t" 70 "fdivl %2\n\t"
@@ -108,10 +112,15 @@ static void __init check_popad(void)
108 "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " 112 "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
109 : "=&a" (res) 113 : "=&a" (res)
110 : "d" (inp) 114 : "d" (inp)
111 : "ecx", "edi" ); 115 : "ecx", "edi");
112 /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ 116 /*
113 if (res != 12345678) printk( "Buggy.\n" ); 117 * If this fails, it means that any user program may lock the
114 else printk( "OK.\n" ); 118 * CPU hard. Too bad.
119 */
120 if (res != 12345678)
121 printk("Buggy.\n");
122 else
123 printk("OK.\n");
115#endif 124#endif
116} 125}
117 126
@@ -122,13 +131,7 @@ static void __init check_popad(void)
122 * (for due to lack of "invlpg" and working WP on a i386) 131 * (for due to lack of "invlpg" and working WP on a i386)
123 * - In order to run on anything without a TSC, we need to be 132 * - In order to run on anything without a TSC, we need to be
124 * compiled for a i486. 133 * compiled for a i486.
125 * - In order to support the local APIC on a buggy Pentium machine, 134 */
126 * we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
127 * which happens implicitly if compiled for a Pentium or lower
128 * (unless an advanced selection of CPU features is used) as an
129 * otherwise config implies a properly working local APIC without
130 * the need to do extra reads from the APIC.
131*/
132 135
133static void __init check_config(void) 136static void __init check_config(void)
134{ 137{
@@ -137,25 +140,11 @@ static void __init check_config(void)
137 * i486+ only features! (WP works in supervisor mode and the 140 * i486+ only features! (WP works in supervisor mode and the
138 * new "invlpg" and "bswap" instructions) 141 * new "invlpg" and "bswap" instructions)
139 */ 142 */
140#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) 143#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
144 defined(CONFIG_X86_BSWAP)
141 if (boot_cpu_data.x86 == 3) 145 if (boot_cpu_data.x86 == 3)
142 panic("Kernel requires i486+ for 'invlpg' and other features"); 146 panic("Kernel requires i486+ for 'invlpg' and other features");
143#endif 147#endif
144
145/*
146 * If we were told we had a good local APIC, check for buggy Pentia,
147 * i.e. all B steppings and the C2 stepping of P54C when using their
148 * integrated APIC (see 11AP erratum in "Pentium Processor
149 * Specification Update").
150 */
151#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
152 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
153 && cpu_has_apic
154 && boot_cpu_data.x86 == 5
155 && boot_cpu_data.x86_model == 2
156 && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
157 panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
158#endif
159} 148}
160 149
161 150
@@ -170,6 +159,7 @@ void __init check_bugs(void)
170 check_fpu(); 159 check_fpu();
171 check_hlt(); 160 check_hlt();
172 check_popad(); 161 check_popad();
173 init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); 162 init_utsname()->machine[1] =
163 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
174 alternative_instructions(); 164 alternative_instructions();
175} 165}
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
new file mode 100644
index 000000000000..9a3ed0649d4e
--- /dev/null
+++ b/arch/x86/kernel/cpu/bugs_64.c
@@ -0,0 +1,33 @@
1/*
2 * Copyright (C) 1994 Linus Torvalds
3 * Copyright (C) 2000 SuSE
4 */
5
6#include <linux/kernel.h>
7#include <linux/init.h>
8#include <asm/alternative.h>
9#include <asm/bugs.h>
10#include <asm/processor.h>
11#include <asm/mtrr.h>
12#include <asm/cacheflush.h>
13
14void __init check_bugs(void)
15{
16 identify_boot_cpu();
17#if !defined(CONFIG_SMP)
18 printk("CPU: ");
19 print_cpu_info(&boot_cpu_data);
20#endif
21 alternative_instructions();
22
23 /*
24 * Make sure the first 2MB area is not mapped by huge pages
25 * There are typically fixed size MTRRs in there and overlapping
26 * MTRRs into large pages causes slow downs.
27 *
28 * Right now we don't do that with gbpages because there seems
29 * very little benefit for that case.
30 */
31 if (!direct_gbpages)
32 set_memory_4k((unsigned long)__va(0), 1);
33}
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
new file mode 100644
index 000000000000..1d181c40e2e1
--- /dev/null
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -0,0 +1,35 @@
1#include <linux/init.h>
2#include <linux/smp.h>
3
4#include <asm/cpufeature.h>
5#include <asm/processor.h>
6
7#include "cpu.h"
8
9static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
10{
11 if (c->x86 == 0x6 && c->x86_model >= 0xf)
12 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
13
14 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
15}
16
17static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
18{
19 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
20 c->x86_cache_alignment = c->x86_clflush_size * 2;
21 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
22 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
23 }
24 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
25}
26
27static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
28 .c_vendor = "Centaur",
29 .c_ident = { "CentaurHauls" },
30 .c_early_init = early_init_centaur,
31 .c_init = init_centaur,
32};
33
34cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev);
35
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d0463a946247..80ab20d4fa39 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -427,7 +427,7 @@ __setup("serialnumber", x86_serial_nr_setup);
427/* 427/*
428 * This does the hard work of actually picking apart the CPU stuff... 428 * This does the hard work of actually picking apart the CPU stuff...
429 */ 429 */
430void __cpuinit identify_cpu(struct cpuinfo_x86 *c) 430static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
431{ 431{
432 int i; 432 int i;
433 433
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
new file mode 100644
index 000000000000..dd6e3f15017e
--- /dev/null
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -0,0 +1,670 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/string.h>
5#include <linux/bootmem.h>
6#include <linux/bitops.h>
7#include <linux/module.h>
8#include <linux/kgdb.h>
9#include <linux/topology.h>
10#include <linux/delay.h>
11#include <linux/smp.h>
12#include <linux/percpu.h>
13#include <asm/i387.h>
14#include <asm/msr.h>
15#include <asm/io.h>
16#include <asm/linkage.h>
17#include <asm/mmu_context.h>
18#include <asm/mtrr.h>
19#include <asm/mce.h>
20#include <asm/pat.h>
21#include <asm/numa.h>
22#ifdef CONFIG_X86_LOCAL_APIC
23#include <asm/mpspec.h>
24#include <asm/apic.h>
25#include <mach_apic.h>
26#endif
27#include <asm/pda.h>
28#include <asm/pgtable.h>
29#include <asm/processor.h>
30#include <asm/desc.h>
31#include <asm/atomic.h>
32#include <asm/proto.h>
33#include <asm/sections.h>
34#include <asm/setup.h>
35#include <asm/genapic.h>
36
37#include "cpu.h"
38
39/* We need valid kernel segments for data and code in long mode too
40 * IRET will check the segment types kkeil 2000/10/28
41 * Also sysret mandates a special GDT layout
42 */
43/* The TLS descriptors are currently at a different place compared to i386.
44 Hopefully nobody expects them at a fixed place (Wine?) */
45DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
46 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
47 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
48 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
49 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
50 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
51 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
52} };
53EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
54
55__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
56
57/* Current gdt points %fs at the "master" per-cpu area: after this,
58 * it's on the real one. */
59void switch_to_new_gdt(void)
60{
61 struct desc_ptr gdt_descr;
62
63 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
64 gdt_descr.size = GDT_SIZE - 1;
65 load_gdt(&gdt_descr);
66}
67
68struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
69
70static void __cpuinit default_init(struct cpuinfo_x86 *c)
71{
72 display_cacheinfo(c);
73}
74
75static struct cpu_dev __cpuinitdata default_cpu = {
76 .c_init = default_init,
77 .c_vendor = "Unknown",
78};
79static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
80
81int __cpuinit get_model_name(struct cpuinfo_x86 *c)
82{
83 unsigned int *v;
84
85 if (c->extended_cpuid_level < 0x80000004)
86 return 0;
87
88 v = (unsigned int *) c->x86_model_id;
89 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
90 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
91 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
92 c->x86_model_id[48] = 0;
93 return 1;
94}
95
96
97void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
98{
99 unsigned int n, dummy, ebx, ecx, edx;
100
101 n = c->extended_cpuid_level;
102
103 if (n >= 0x80000005) {
104 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
105 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
106 "D cache %dK (%d bytes/line)\n",
107 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
108 c->x86_cache_size = (ecx>>24) + (edx>>24);
109 /* On K8 L1 TLB is inclusive, so don't count it */
110 c->x86_tlbsize = 0;
111 }
112
113 if (n >= 0x80000006) {
114 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
115 ecx = cpuid_ecx(0x80000006);
116 c->x86_cache_size = ecx >> 16;
117 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
118
119 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
120 c->x86_cache_size, ecx & 0xFF);
121 }
122}
123
124void __cpuinit detect_ht(struct cpuinfo_x86 *c)
125{
126#ifdef CONFIG_SMP
127 u32 eax, ebx, ecx, edx;
128 int index_msb, core_bits;
129
130 cpuid(1, &eax, &ebx, &ecx, &edx);
131
132
133 if (!cpu_has(c, X86_FEATURE_HT))
134 return;
135 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
136 goto out;
137
138 smp_num_siblings = (ebx & 0xff0000) >> 16;
139
140 if (smp_num_siblings == 1) {
141 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
142 } else if (smp_num_siblings > 1) {
143
144 if (smp_num_siblings > NR_CPUS) {
145 printk(KERN_WARNING "CPU: Unsupported number of "
146 "siblings %d", smp_num_siblings);
147 smp_num_siblings = 1;
148 return;
149 }
150
151 index_msb = get_count_order(smp_num_siblings);
152 c->phys_proc_id = phys_pkg_id(index_msb);
153
154 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
155
156 index_msb = get_count_order(smp_num_siblings);
157
158 core_bits = get_count_order(c->x86_max_cores);
159
160 c->cpu_core_id = phys_pkg_id(index_msb) &
161 ((1 << core_bits) - 1);
162 }
163out:
164 if ((c->x86_max_cores * smp_num_siblings) > 1) {
165 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
166 c->phys_proc_id);
167 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
168 c->cpu_core_id);
169 }
170
171#endif
172}
173
174static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
175{
176 char *v = c->x86_vendor_id;
177 int i;
178 static int printed;
179
180 for (i = 0; i < X86_VENDOR_NUM; i++) {
181 if (cpu_devs[i]) {
182 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
183 (cpu_devs[i]->c_ident[1] &&
184 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
185 c->x86_vendor = i;
186 this_cpu = cpu_devs[i];
187 return;
188 }
189 }
190 }
191 if (!printed) {
192 printed++;
193 printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
194 printk(KERN_ERR "CPU: Your system may be unstable.\n");
195 }
196 c->x86_vendor = X86_VENDOR_UNKNOWN;
197}
198
199static void __init early_cpu_support_print(void)
200{
201 int i,j;
202 struct cpu_dev *cpu_devx;
203
204 printk("KERNEL supported cpus:\n");
205 for (i = 0; i < X86_VENDOR_NUM; i++) {
206 cpu_devx = cpu_devs[i];
207 if (!cpu_devx)
208 continue;
209 for (j = 0; j < 2; j++) {
210 if (!cpu_devx->c_ident[j])
211 continue;
212 printk(" %s %s\n", cpu_devx->c_vendor,
213 cpu_devx->c_ident[j]);
214 }
215 }
216}
217
218static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
219
220void __init early_cpu_init(void)
221{
222 struct cpu_vendor_dev *cvdev;
223
224 for (cvdev = __x86cpuvendor_start ;
225 cvdev < __x86cpuvendor_end ;
226 cvdev++)
227 cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
228 early_cpu_support_print();
229 early_identify_cpu(&boot_cpu_data);
230}
231
232/* Do some early cpuid on the boot CPU to get some parameter that are
233 needed before check_bugs. Everything advanced is in identify_cpu
234 below. */
235static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
236{
237 u32 tfms, xlvl;
238
239 c->loops_per_jiffy = loops_per_jiffy;
240 c->x86_cache_size = -1;
241 c->x86_vendor = X86_VENDOR_UNKNOWN;
242 c->x86_model = c->x86_mask = 0; /* So far unknown... */
243 c->x86_vendor_id[0] = '\0'; /* Unset */
244 c->x86_model_id[0] = '\0'; /* Unset */
245 c->x86_clflush_size = 64;
246 c->x86_cache_alignment = c->x86_clflush_size;
247 c->x86_max_cores = 1;
248 c->x86_coreid_bits = 0;
249 c->extended_cpuid_level = 0;
250 memset(&c->x86_capability, 0, sizeof c->x86_capability);
251
252 /* Get vendor name */
253 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
254 (unsigned int *)&c->x86_vendor_id[0],
255 (unsigned int *)&c->x86_vendor_id[8],
256 (unsigned int *)&c->x86_vendor_id[4]);
257
258 get_cpu_vendor(c);
259
260 /* Initialize the standard set of capabilities */
261 /* Note that the vendor-specific code below might override */
262
263 /* Intel-defined flags: level 0x00000001 */
264 if (c->cpuid_level >= 0x00000001) {
265 __u32 misc;
266 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
267 &c->x86_capability[0]);
268 c->x86 = (tfms >> 8) & 0xf;
269 c->x86_model = (tfms >> 4) & 0xf;
270 c->x86_mask = tfms & 0xf;
271 if (c->x86 == 0xf)
272 c->x86 += (tfms >> 20) & 0xff;
273 if (c->x86 >= 0x6)
274 c->x86_model += ((tfms >> 16) & 0xF) << 4;
275 if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
276 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
277 } else {
278 /* Have CPUID level 0 only - unheard of */
279 c->x86 = 4;
280 }
281
282 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
283#ifdef CONFIG_SMP
284 c->phys_proc_id = c->initial_apicid;
285#endif
286 /* AMD-defined flags: level 0x80000001 */
287 xlvl = cpuid_eax(0x80000000);
288 c->extended_cpuid_level = xlvl;
289 if ((xlvl & 0xffff0000) == 0x80000000) {
290 if (xlvl >= 0x80000001) {
291 c->x86_capability[1] = cpuid_edx(0x80000001);
292 c->x86_capability[6] = cpuid_ecx(0x80000001);
293 }
294 if (xlvl >= 0x80000004)
295 get_model_name(c); /* Default name */
296 }
297
298 /* Transmeta-defined flags: level 0x80860001 */
299 xlvl = cpuid_eax(0x80860000);
300 if ((xlvl & 0xffff0000) == 0x80860000) {
301 /* Don't set x86_cpuid_level here for now to not confuse. */
302 if (xlvl >= 0x80860001)
303 c->x86_capability[2] = cpuid_edx(0x80860001);
304 }
305
306 if (c->extended_cpuid_level >= 0x80000007)
307 c->x86_power = cpuid_edx(0x80000007);
308
309 if (c->extended_cpuid_level >= 0x80000008) {
310 u32 eax = cpuid_eax(0x80000008);
311
312 c->x86_virt_bits = (eax >> 8) & 0xff;
313 c->x86_phys_bits = eax & 0xff;
314 }
315
316 if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
317 cpu_devs[c->x86_vendor]->c_early_init)
318 cpu_devs[c->x86_vendor]->c_early_init(c);
319
320 validate_pat_support(c);
321}
322
323/*
324 * This does the hard work of actually picking apart the CPU stuff...
325 */
326static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
327{
328 int i;
329
330 early_identify_cpu(c);
331
332 init_scattered_cpuid_features(c);
333
334 c->apicid = phys_pkg_id(0);
335
336 /*
337 * Vendor-specific initialization. In this section we
338 * canonicalize the feature flags, meaning if there are
339 * features a certain CPU supports which CPUID doesn't
340 * tell us, CPUID claiming incorrect flags, or other bugs,
341 * we handle them here.
342 *
343 * At the end of this section, c->x86_capability better
344 * indicate the features this CPU genuinely supports!
345 */
346 if (this_cpu->c_init)
347 this_cpu->c_init(c);
348
349 detect_ht(c);
350
351 /*
352 * On SMP, boot_cpu_data holds the common feature set between
353 * all CPUs; so make sure that we indicate which features are
354 * common between the CPUs. The first time this routine gets
355 * executed, c == &boot_cpu_data.
356 */
357 if (c != &boot_cpu_data) {
358 /* AND the already accumulated flags with these */
359 for (i = 0; i < NCAPINTS; i++)
360 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
361 }
362
363 /* Clear all flags overriden by options */
364 for (i = 0; i < NCAPINTS; i++)
365 c->x86_capability[i] &= ~cleared_cpu_caps[i];
366
367#ifdef CONFIG_X86_MCE
368 mcheck_init(c);
369#endif
370 select_idle_routine(c);
371
372#ifdef CONFIG_NUMA
373 numa_add_cpu(smp_processor_id());
374#endif
375
376}
377
378void __cpuinit identify_boot_cpu(void)
379{
380 identify_cpu(&boot_cpu_data);
381}
382
383void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
384{
385 BUG_ON(c == &boot_cpu_data);
386 identify_cpu(c);
387 mtrr_ap_init();
388}
389
390static __init int setup_noclflush(char *arg)
391{
392 setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
393 return 1;
394}
395__setup("noclflush", setup_noclflush);
396
397void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
398{
399 if (c->x86_model_id[0])
400 printk(KERN_CONT "%s", c->x86_model_id);
401
402 if (c->x86_mask || c->cpuid_level >= 0)
403 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
404 else
405 printk(KERN_CONT "\n");
406}
407
408static __init int setup_disablecpuid(char *arg)
409{
410 int bit;
411 if (get_option(&arg, &bit) && bit < NCAPINTS*32)
412 setup_clear_cpu_cap(bit);
413 else
414 return 0;
415 return 1;
416}
417__setup("clearcpuid=", setup_disablecpuid);
418
419cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
420
421struct x8664_pda **_cpu_pda __read_mostly;
422EXPORT_SYMBOL(_cpu_pda);
423
424struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
425
426char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
427
428unsigned long __supported_pte_mask __read_mostly = ~0UL;
429EXPORT_SYMBOL_GPL(__supported_pte_mask);
430
431static int do_not_nx __cpuinitdata;
432
433/* noexec=on|off
434Control non executable mappings for 64bit processes.
435
436on Enable(default)
437off Disable
438*/
439static int __init nonx_setup(char *str)
440{
441 if (!str)
442 return -EINVAL;
443 if (!strncmp(str, "on", 2)) {
444 __supported_pte_mask |= _PAGE_NX;
445 do_not_nx = 0;
446 } else if (!strncmp(str, "off", 3)) {
447 do_not_nx = 1;
448 __supported_pte_mask &= ~_PAGE_NX;
449 }
450 return 0;
451}
452early_param("noexec", nonx_setup);
453
454int force_personality32;
455
456/* noexec32=on|off
457Control non executable heap for 32bit processes.
458To control the stack too use noexec=off
459
460on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
461off PROT_READ implies PROT_EXEC
462*/
463static int __init nonx32_setup(char *str)
464{
465 if (!strcmp(str, "on"))
466 force_personality32 &= ~READ_IMPLIES_EXEC;
467 else if (!strcmp(str, "off"))
468 force_personality32 |= READ_IMPLIES_EXEC;
469 return 1;
470}
471__setup("noexec32=", nonx32_setup);
472
473void pda_init(int cpu)
474{
475 struct x8664_pda *pda = cpu_pda(cpu);
476
477 /* Setup up data that may be needed in __get_free_pages early */
478 loadsegment(fs, 0);
479 loadsegment(gs, 0);
480 /* Memory clobbers used to order PDA accessed */
481 mb();
482 wrmsrl(MSR_GS_BASE, pda);
483 mb();
484
485 pda->cpunumber = cpu;
486 pda->irqcount = -1;
487 pda->kernelstack = (unsigned long)stack_thread_info() -
488 PDA_STACKOFFSET + THREAD_SIZE;
489 pda->active_mm = &init_mm;
490 pda->mmu_state = 0;
491
492 if (cpu == 0) {
493 /* others are initialized in smpboot.c */
494 pda->pcurrent = &init_task;
495 pda->irqstackptr = boot_cpu_stack;
496 } else {
497 pda->irqstackptr = (char *)
498 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
499 if (!pda->irqstackptr)
500 panic("cannot allocate irqstack for cpu %d", cpu);
501
502 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
503 pda->nodenumber = cpu_to_node(cpu);
504 }
505
506 pda->irqstackptr += IRQSTACKSIZE-64;
507}
508
509char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
510 DEBUG_STKSZ] __page_aligned_bss;
511
512extern asmlinkage void ignore_sysret(void);
513
514/* May not be marked __init: used by software suspend */
515void syscall_init(void)
516{
517 /*
518 * LSTAR and STAR live in a bit strange symbiosis.
519 * They both write to the same internal register. STAR allows to
520 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
521 */
522 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
523 wrmsrl(MSR_LSTAR, system_call);
524 wrmsrl(MSR_CSTAR, ignore_sysret);
525
526#ifdef CONFIG_IA32_EMULATION
527 syscall32_cpu_init();
528#endif
529
530 /* Flags to clear on syscall */
531 wrmsrl(MSR_SYSCALL_MASK,
532 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
533}
534
535void __cpuinit check_efer(void)
536{
537 unsigned long efer;
538
539 rdmsrl(MSR_EFER, efer);
540 if (!(efer & EFER_NX) || do_not_nx)
541 __supported_pte_mask &= ~_PAGE_NX;
542}
543
544unsigned long kernel_eflags;
545
546/*
547 * Copies of the original ist values from the tss are only accessed during
548 * debugging, no special alignment required.
549 */
550DEFINE_PER_CPU(struct orig_ist, orig_ist);
551
552/*
553 * cpu_init() initializes state that is per-CPU. Some data is already
554 * initialized (naturally) in the bootstrap process, such as the GDT
555 * and IDT. We reload them nevertheless, this function acts as a
556 * 'CPU state barrier', nothing should get across.
557 * A lot of state is already set up in PDA init.
558 */
559void __cpuinit cpu_init(void)
560{
561 int cpu = stack_smp_processor_id();
562 struct tss_struct *t = &per_cpu(init_tss, cpu);
563 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
564 unsigned long v;
565 char *estacks = NULL;
566 struct task_struct *me;
567 int i;
568
569 /* CPU 0 is initialised in head64.c */
570 if (cpu != 0)
571 pda_init(cpu);
572 else
573 estacks = boot_exception_stacks;
574
575 me = current;
576
577 if (cpu_test_and_set(cpu, cpu_initialized))
578 panic("CPU#%d already initialized!\n", cpu);
579
580 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
581
582 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
583
584 /*
585 * Initialize the per-CPU GDT with the boot GDT,
586 * and set up the GDT descriptor:
587 */
588
589 switch_to_new_gdt();
590 load_idt((const struct desc_ptr *)&idt_descr);
591
592 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
593 syscall_init();
594
595 wrmsrl(MSR_FS_BASE, 0);
596 wrmsrl(MSR_KERNEL_GS_BASE, 0);
597 barrier();
598
599 check_efer();
600
601 /*
602 * set up and load the per-CPU TSS
603 */
604 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
605 static const unsigned int order[N_EXCEPTION_STACKS] = {
606 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
607 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
608 };
609 if (cpu) {
610 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
611 if (!estacks)
612 panic("Cannot allocate exception stack %ld %d\n",
613 v, cpu);
614 }
615 estacks += PAGE_SIZE << order[v];
616 orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
617 }
618
619 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
620 /*
621 * <= is required because the CPU will access up to
622 * 8 bits beyond the end of the IO permission bitmap.
623 */
624 for (i = 0; i <= IO_BITMAP_LONGS; i++)
625 t->io_bitmap[i] = ~0UL;
626
627 atomic_inc(&init_mm.mm_count);
628 me->active_mm = &init_mm;
629 if (me->mm)
630 BUG();
631 enter_lazy_tlb(&init_mm, me);
632
633 load_sp0(t, &current->thread);
634 set_tss_desc(cpu, t);
635 load_TR_desc();
636 load_LDT(&init_mm.context);
637
638#ifdef CONFIG_KGDB
639 /*
640 * If the kgdb is connected no debug regs should be altered. This
641 * is only applicable when KGDB and a KGDB I/O module are built
642 * into the kernel and you are using early debugging with
643 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
644 */
645 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
646 arch_kgdb_ops.correct_hw_break();
647 else {
648#endif
649 /*
650 * Clear all 6 debug registers:
651 */
652
653 set_debugreg(0UL, 0);
654 set_debugreg(0UL, 1);
655 set_debugreg(0UL, 2);
656 set_debugreg(0UL, 3);
657 set_debugreg(0UL, 6);
658 set_debugreg(0UL, 7);
659#ifdef CONFIG_KGDB
660 /* If the kgdb is connected no debug regs should be altered. */
661 }
662#endif
663
664 fpu_init();
665
666 raw_local_save_flags(kernel_eflags);
667
668 if (is_uv_system())
669 uv_cpu_init();
670}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 783691b2a738..4d894e8565fe 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,3 +1,6 @@
1#ifndef ARCH_X86_CPU_H
2
3#define ARCH_X86_CPU_H
1 4
2struct cpu_model_info { 5struct cpu_model_info {
3 int vendor; 6 int vendor;
@@ -36,3 +39,5 @@ extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
36 39
37extern int get_model_name(struct cpuinfo_x86 *c); 40extern int get_model_name(struct cpuinfo_x86 *c);
38extern void display_cacheinfo(struct cpuinfo_x86 *c); 41extern void display_cacheinfo(struct cpuinfo_x86 *c);
42
43#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index b0c8208df9fa..ff2fff56f0a8 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -200,10 +200,12 @@ static void drv_read(struct drv_cmd *cmd)
200static void drv_write(struct drv_cmd *cmd) 200static void drv_write(struct drv_cmd *cmd)
201{ 201{
202 cpumask_t saved_mask = current->cpus_allowed; 202 cpumask_t saved_mask = current->cpus_allowed;
203 cpumask_of_cpu_ptr_declare(cpu_mask);
203 unsigned int i; 204 unsigned int i;
204 205
205 for_each_cpu_mask(i, cmd->mask) { 206 for_each_cpu_mask_nr(i, cmd->mask) {
206 set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); 207 cpumask_of_cpu_ptr_next(cpu_mask, i);
208 set_cpus_allowed_ptr(current, cpu_mask);
207 do_drv_write(cmd); 209 do_drv_write(cmd);
208 } 210 }
209 211
@@ -267,11 +269,12 @@ static unsigned int get_measured_perf(unsigned int cpu)
267 } aperf_cur, mperf_cur; 269 } aperf_cur, mperf_cur;
268 270
269 cpumask_t saved_mask; 271 cpumask_t saved_mask;
272 cpumask_of_cpu_ptr(cpu_mask, cpu);
270 unsigned int perf_percent; 273 unsigned int perf_percent;
271 unsigned int retval; 274 unsigned int retval;
272 275
273 saved_mask = current->cpus_allowed; 276 saved_mask = current->cpus_allowed;
274 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 277 set_cpus_allowed_ptr(current, cpu_mask);
275 if (get_cpu() != cpu) { 278 if (get_cpu() != cpu) {
276 /* We were not able to run on requested processor */ 279 /* We were not able to run on requested processor */
277 put_cpu(); 280 put_cpu();
@@ -337,6 +340,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
337 340
338static unsigned int get_cur_freq_on_cpu(unsigned int cpu) 341static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
339{ 342{
343 cpumask_of_cpu_ptr(cpu_mask, cpu);
340 struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); 344 struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
341 unsigned int freq; 345 unsigned int freq;
342 unsigned int cached_freq; 346 unsigned int cached_freq;
@@ -349,7 +353,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
349 } 353 }
350 354
351 cached_freq = data->freq_table[data->acpi_data->state].frequency; 355 cached_freq = data->freq_table[data->acpi_data->state].frequency;
352 freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); 356 freq = extract_freq(get_cur_val(cpu_mask), data);
353 if (freq != cached_freq) { 357 if (freq != cached_freq) {
354 /* 358 /*
355 * The dreaded BIOS frequency change behind our back. 359 * The dreaded BIOS frequency change behind our back.
@@ -451,7 +455,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
451 455
452 freqs.old = perf->states[perf->state].core_frequency * 1000; 456 freqs.old = perf->states[perf->state].core_frequency * 1000;
453 freqs.new = data->freq_table[next_state].frequency; 457 freqs.new = data->freq_table[next_state].frequency;
454 for_each_cpu_mask(i, cmd.mask) { 458 for_each_cpu_mask_nr(i, cmd.mask) {
455 freqs.cpu = i; 459 freqs.cpu = i;
456 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 460 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
457 } 461 }
@@ -466,7 +470,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
466 } 470 }
467 } 471 }
468 472
469 for_each_cpu_mask(i, cmd.mask) { 473 for_each_cpu_mask_nr(i, cmd.mask) {
470 freqs.cpu = i; 474 freqs.cpu = i;
471 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 475 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
472 } 476 }
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index f03e9153618e..965ea52767ac 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -26,9 +26,10 @@
26#define NFORCE2_SAFE_DISTANCE 50 26#define NFORCE2_SAFE_DISTANCE 50
27 27
28/* Delay in ms between FSB changes */ 28/* Delay in ms between FSB changes */
29//#define NFORCE2_DELAY 10 29/* #define NFORCE2_DELAY 10 */
30 30
31/* nforce2_chipset: 31/*
32 * nforce2_chipset:
32 * FSB is changed using the chipset 33 * FSB is changed using the chipset
33 */ 34 */
34static struct pci_dev *nforce2_chipset_dev; 35static struct pci_dev *nforce2_chipset_dev;
@@ -36,13 +37,13 @@ static struct pci_dev *nforce2_chipset_dev;
36/* fid: 37/* fid:
37 * multiplier * 10 38 * multiplier * 10
38 */ 39 */
39static int fid = 0; 40static int fid;
40 41
41/* min_fsb, max_fsb: 42/* min_fsb, max_fsb:
42 * minimum and maximum FSB (= FSB at boot time) 43 * minimum and maximum FSB (= FSB at boot time)
43 */ 44 */
44static int min_fsb = 0; 45static int min_fsb;
45static int max_fsb = 0; 46static int max_fsb;
46 47
47MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); 48MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
48MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); 49MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
@@ -53,7 +54,7 @@ module_param(min_fsb, int, 0444);
53 54
54MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); 55MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
55MODULE_PARM_DESC(min_fsb, 56MODULE_PARM_DESC(min_fsb,
56 "Minimum FSB to use, if not defined: current FSB - 50"); 57 "Minimum FSB to use, if not defined: current FSB - 50");
57 58
58#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) 59#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
59 60
@@ -139,7 +140,7 @@ static unsigned int nforce2_fsb_read(int bootfsb)
139 140
140 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ 141 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
141 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 142 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
142 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); 143 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL);
143 if (!nforce2_sub5) 144 if (!nforce2_sub5)
144 return 0; 145 return 0;
145 146
@@ -147,13 +148,13 @@ static unsigned int nforce2_fsb_read(int bootfsb)
147 fsb /= 1000000; 148 fsb /= 1000000;
148 149
149 /* Check if PLL register is already set */ 150 /* Check if PLL register is already set */
150 pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); 151 pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
151 152
152 if(bootfsb || !temp) 153 if (bootfsb || !temp)
153 return fsb; 154 return fsb;
154 155
155 /* Use PLL register FSB value */ 156 /* Use PLL register FSB value */
156 pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); 157 pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp);
157 fsb = nforce2_calc_fsb(temp); 158 fsb = nforce2_calc_fsb(temp);
158 159
159 return fsb; 160 return fsb;
@@ -184,7 +185,7 @@ static int nforce2_set_fsb(unsigned int fsb)
184 } 185 }
185 186
186 /* First write? Then set actual value */ 187 /* First write? Then set actual value */
187 pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); 188 pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
188 if (!temp) { 189 if (!temp) {
189 pll = nforce2_calc_pll(tfsb); 190 pll = nforce2_calc_pll(tfsb);
190 191
@@ -210,7 +211,8 @@ static int nforce2_set_fsb(unsigned int fsb)
210 tfsb--; 211 tfsb--;
211 212
212 /* Calculate the PLL reg. value */ 213 /* Calculate the PLL reg. value */
213 if ((pll = nforce2_calc_pll(tfsb)) == -1) 214 pll = nforce2_calc_pll(tfsb);
215 if (pll == -1)
214 return -EINVAL; 216 return -EINVAL;
215 217
216 nforce2_write_pll(pll); 218 nforce2_write_pll(pll);
@@ -249,7 +251,7 @@ static unsigned int nforce2_get(unsigned int cpu)
249static int nforce2_target(struct cpufreq_policy *policy, 251static int nforce2_target(struct cpufreq_policy *policy,
250 unsigned int target_freq, unsigned int relation) 252 unsigned int target_freq, unsigned int relation)
251{ 253{
252// unsigned long flags; 254/* unsigned long flags; */
253 struct cpufreq_freqs freqs; 255 struct cpufreq_freqs freqs;
254 unsigned int target_fsb; 256 unsigned int target_fsb;
255 257
@@ -271,17 +273,17 @@ static int nforce2_target(struct cpufreq_policy *policy,
271 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 273 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
272 274
273 /* Disable IRQs */ 275 /* Disable IRQs */
274 //local_irq_save(flags); 276 /* local_irq_save(flags); */
275 277
276 if (nforce2_set_fsb(target_fsb) < 0) 278 if (nforce2_set_fsb(target_fsb) < 0)
277 printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", 279 printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
278 target_fsb); 280 target_fsb);
279 else 281 else
280 dprintk("Changed FSB successfully to %d\n", 282 dprintk("Changed FSB successfully to %d\n",
281 target_fsb); 283 target_fsb);
282 284
283 /* Enable IRQs */ 285 /* Enable IRQs */
284 //local_irq_restore(flags); 286 /* local_irq_restore(flags); */
285 287
286 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 288 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
287 289
@@ -302,8 +304,8 @@ static int nforce2_verify(struct cpufreq_policy *policy)
302 policy->max = (fsb_pol_max + 1) * fid * 100; 304 policy->max = (fsb_pol_max + 1) * fid * 100;
303 305
304 cpufreq_verify_within_limits(policy, 306 cpufreq_verify_within_limits(policy,
305 policy->cpuinfo.min_freq, 307 policy->cpuinfo.min_freq,
306 policy->cpuinfo.max_freq); 308 policy->cpuinfo.max_freq);
307 return 0; 309 return 0;
308} 310}
309 311
@@ -347,7 +349,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
347 /* Set maximum FSB to FSB at boot time */ 349 /* Set maximum FSB to FSB at boot time */
348 max_fsb = nforce2_fsb_read(1); 350 max_fsb = nforce2_fsb_read(1);
349 351
350 if(!max_fsb) 352 if (!max_fsb)
351 return -EIO; 353 return -EIO;
352 354
353 if (!min_fsb) 355 if (!min_fsb)
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 199e4e05e5dc..f1685fb91fbd 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
122 return 0; 122 return 0;
123 123
124 /* notifiers */ 124 /* notifiers */
125 for_each_cpu_mask(i, policy->cpus) { 125 for_each_cpu_mask_nr(i, policy->cpus) {
126 freqs.cpu = i; 126 freqs.cpu = i;
127 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 127 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
128 } 128 }
@@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
130 /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software 130 /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
131 * Developer's Manual, Volume 3 131 * Developer's Manual, Volume 3
132 */ 132 */
133 for_each_cpu_mask(i, policy->cpus) 133 for_each_cpu_mask_nr(i, policy->cpus)
134 cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); 134 cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
135 135
136 /* notifiers */ 136 /* notifiers */
137 for_each_cpu_mask(i, policy->cpus) { 137 for_each_cpu_mask_nr(i, policy->cpus) {
138 freqs.cpu = i; 138 freqs.cpu = i;
139 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 139 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
140 } 140 }
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
index f8a63b3664e3..35fb4eaf6e1c 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
@@ -1,5 +1,4 @@
1/* 1/*
2 * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $
3 * (C) 2003 Dave Jones. 2 * (C) 2003 Dave Jones.
4 * 3 *
5 * Licensed under the terms of the GNU GPL License version 2. 4 * Licensed under the terms of the GNU GPL License version 2.
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 206791eb46e3..53c7b6936973 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -479,11 +479,12 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
479static int check_supported_cpu(unsigned int cpu) 479static int check_supported_cpu(unsigned int cpu)
480{ 480{
481 cpumask_t oldmask; 481 cpumask_t oldmask;
482 cpumask_of_cpu_ptr(cpu_mask, cpu);
482 u32 eax, ebx, ecx, edx; 483 u32 eax, ebx, ecx, edx;
483 unsigned int rc = 0; 484 unsigned int rc = 0;
484 485
485 oldmask = current->cpus_allowed; 486 oldmask = current->cpus_allowed;
486 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 487 set_cpus_allowed_ptr(current, cpu_mask);
487 488
488 if (smp_processor_id() != cpu) { 489 if (smp_processor_id() != cpu) {
489 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); 490 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
@@ -966,7 +967,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
966 freqs.old = find_khz_freq_from_fid(data->currfid); 967 freqs.old = find_khz_freq_from_fid(data->currfid);
967 freqs.new = find_khz_freq_from_fid(fid); 968 freqs.new = find_khz_freq_from_fid(fid);
968 969
969 for_each_cpu_mask(i, *(data->available_cores)) { 970 for_each_cpu_mask_nr(i, *(data->available_cores)) {
970 freqs.cpu = i; 971 freqs.cpu = i;
971 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 972 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
972 } 973 }
@@ -974,7 +975,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
974 res = transition_fid_vid(data, fid, vid); 975 res = transition_fid_vid(data, fid, vid);
975 freqs.new = find_khz_freq_from_fid(data->currfid); 976 freqs.new = find_khz_freq_from_fid(data->currfid);
976 977
977 for_each_cpu_mask(i, *(data->available_cores)) { 978 for_each_cpu_mask_nr(i, *(data->available_cores)) {
978 freqs.cpu = i; 979 freqs.cpu = i;
979 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 980 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
980 } 981 }
@@ -997,7 +998,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
997 freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); 998 freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
998 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 999 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
999 1000
1000 for_each_cpu_mask(i, *(data->available_cores)) { 1001 for_each_cpu_mask_nr(i, *(data->available_cores)) {
1001 freqs.cpu = i; 1002 freqs.cpu = i;
1002 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1003 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1003 } 1004 }
@@ -1005,7 +1006,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
1005 res = transition_pstate(data, pstate); 1006 res = transition_pstate(data, pstate);
1006 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1007 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1007 1008
1008 for_each_cpu_mask(i, *(data->available_cores)) { 1009 for_each_cpu_mask_nr(i, *(data->available_cores)) {
1009 freqs.cpu = i; 1010 freqs.cpu = i;
1010 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1011 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1011 } 1012 }
@@ -1016,6 +1017,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
1016static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) 1017static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
1017{ 1018{
1018 cpumask_t oldmask; 1019 cpumask_t oldmask;
1020 cpumask_of_cpu_ptr(cpu_mask, pol->cpu);
1019 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1021 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1020 u32 checkfid; 1022 u32 checkfid;
1021 u32 checkvid; 1023 u32 checkvid;
@@ -1030,7 +1032,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
1030 1032
1031 /* only run on specific CPU from here on */ 1033 /* only run on specific CPU from here on */
1032 oldmask = current->cpus_allowed; 1034 oldmask = current->cpus_allowed;
1033 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); 1035 set_cpus_allowed_ptr(current, cpu_mask);
1034 1036
1035 if (smp_processor_id() != pol->cpu) { 1037 if (smp_processor_id() != pol->cpu) {
1036 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1038 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1105,6 +1107,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1105{ 1107{
1106 struct powernow_k8_data *data; 1108 struct powernow_k8_data *data;
1107 cpumask_t oldmask; 1109 cpumask_t oldmask;
1110 cpumask_of_cpu_ptr_declare(newmask);
1108 int rc; 1111 int rc;
1109 1112
1110 if (!cpu_online(pol->cpu)) 1113 if (!cpu_online(pol->cpu))
@@ -1156,7 +1159,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1156 1159
1157 /* only run on specific CPU from here on */ 1160 /* only run on specific CPU from here on */
1158 oldmask = current->cpus_allowed; 1161 oldmask = current->cpus_allowed;
1159 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); 1162 cpumask_of_cpu_ptr_next(newmask, pol->cpu);
1163 set_cpus_allowed_ptr(current, newmask);
1160 1164
1161 if (smp_processor_id() != pol->cpu) { 1165 if (smp_processor_id() != pol->cpu) {
1162 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1166 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1178,7 +1182,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1178 set_cpus_allowed_ptr(current, &oldmask); 1182 set_cpus_allowed_ptr(current, &oldmask);
1179 1183
1180 if (cpu_family == CPU_HW_PSTATE) 1184 if (cpu_family == CPU_HW_PSTATE)
1181 pol->cpus = cpumask_of_cpu(pol->cpu); 1185 pol->cpus = *newmask;
1182 else 1186 else
1183 pol->cpus = per_cpu(cpu_core_map, pol->cpu); 1187 pol->cpus = per_cpu(cpu_core_map, pol->cpu);
1184 data->available_cores = &(pol->cpus); 1188 data->available_cores = &(pol->cpus);
@@ -1244,6 +1248,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
1244{ 1248{
1245 struct powernow_k8_data *data; 1249 struct powernow_k8_data *data;
1246 cpumask_t oldmask = current->cpus_allowed; 1250 cpumask_t oldmask = current->cpus_allowed;
1251 cpumask_of_cpu_ptr(newmask, cpu);
1247 unsigned int khz = 0; 1252 unsigned int khz = 0;
1248 unsigned int first; 1253 unsigned int first;
1249 1254
@@ -1253,7 +1258,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
1253 if (!data) 1258 if (!data)
1254 return -EINVAL; 1259 return -EINVAL;
1255 1260
1256 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 1261 set_cpus_allowed_ptr(current, newmask);
1257 if (smp_processor_id() != cpu) { 1262 if (smp_processor_id() != cpu) {
1258 printk(KERN_ERR PFX 1263 printk(KERN_ERR PFX
1259 "limiting to CPU %d failed in powernowk8_get\n", cpu); 1264 "limiting to CPU %d failed in powernowk8_get\n", cpu);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 908dd347c67e..ca2ac13b7af2 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -28,7 +28,8 @@
28#define PFX "speedstep-centrino: " 28#define PFX "speedstep-centrino: "
29#define MAINTAINER "cpufreq@lists.linux.org.uk" 29#define MAINTAINER "cpufreq@lists.linux.org.uk"
30 30
31#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) 31#define dprintk(msg...) \
32 cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
32 33
33#define INTEL_MSR_RANGE (0xffff) 34#define INTEL_MSR_RANGE (0xffff)
34 35
@@ -66,11 +67,12 @@ struct cpu_model
66 67
67 struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ 68 struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
68}; 69};
69static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); 70static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
71 const struct cpu_id *x);
70 72
71/* Operating points for current CPU */ 73/* Operating points for current CPU */
72static struct cpu_model *centrino_model[NR_CPUS]; 74static DEFINE_PER_CPU(struct cpu_model *, centrino_model);
73static const struct cpu_id *centrino_cpu[NR_CPUS]; 75static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu);
74 76
75static struct cpufreq_driver centrino_driver; 77static struct cpufreq_driver centrino_driver;
76 78
@@ -255,7 +257,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
255 return -ENOENT; 257 return -ENOENT;
256 } 258 }
257 259
258 centrino_model[policy->cpu] = model; 260 per_cpu(centrino_model, policy->cpu) = model;
259 261
260 dprintk("found \"%s\": max frequency: %dkHz\n", 262 dprintk("found \"%s\": max frequency: %dkHz\n",
261 model->model_name, model->max_freq); 263 model->model_name, model->max_freq);
@@ -264,10 +266,14 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
264} 266}
265 267
266#else 268#else
267static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } 269static inline int centrino_cpu_init_table(struct cpufreq_policy *policy)
270{
271 return -ENODEV;
272}
268#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ 273#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
269 274
270static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) 275static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
276 const struct cpu_id *x)
271{ 277{
272 if ((c->x86 == x->x86) && 278 if ((c->x86 == x->x86) &&
273 (c->x86_model == x->x86_model) && 279 (c->x86_model == x->x86_model) &&
@@ -286,23 +292,28 @@ static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
286 * for centrino, as some DSDTs are buggy. 292 * for centrino, as some DSDTs are buggy.
287 * Ideally, this can be done using the acpi_data structure. 293 * Ideally, this can be done using the acpi_data structure.
288 */ 294 */
289 if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || 295 if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) ||
290 (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || 296 (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) ||
291 (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { 297 (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) {
292 msr = (msr >> 8) & 0xff; 298 msr = (msr >> 8) & 0xff;
293 return msr * 100000; 299 return msr * 100000;
294 } 300 }
295 301
296 if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) 302 if ((!per_cpu(centrino_model, cpu)) ||
303 (!per_cpu(centrino_model, cpu)->op_points))
297 return 0; 304 return 0;
298 305
299 msr &= 0xffff; 306 msr &= 0xffff;
300 for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { 307 for (i = 0;
301 if (msr == centrino_model[cpu]->op_points[i].index) 308 per_cpu(centrino_model, cpu)->op_points[i].frequency
302 return centrino_model[cpu]->op_points[i].frequency; 309 != CPUFREQ_TABLE_END;
310 i++) {
311 if (msr == per_cpu(centrino_model, cpu)->op_points[i].index)
312 return per_cpu(centrino_model, cpu)->
313 op_points[i].frequency;
303 } 314 }
304 if (failsafe) 315 if (failsafe)
305 return centrino_model[cpu]->op_points[i-1].frequency; 316 return per_cpu(centrino_model, cpu)->op_points[i-1].frequency;
306 else 317 else
307 return 0; 318 return 0;
308} 319}
@@ -313,9 +324,10 @@ static unsigned int get_cur_freq(unsigned int cpu)
313 unsigned l, h; 324 unsigned l, h;
314 unsigned clock_freq; 325 unsigned clock_freq;
315 cpumask_t saved_mask; 326 cpumask_t saved_mask;
327 cpumask_of_cpu_ptr(new_mask, cpu);
316 328
317 saved_mask = current->cpus_allowed; 329 saved_mask = current->cpus_allowed;
318 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 330 set_cpus_allowed_ptr(current, new_mask);
319 if (smp_processor_id() != cpu) 331 if (smp_processor_id() != cpu)
320 return 0; 332 return 0;
321 333
@@ -347,7 +359,8 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
347 int i; 359 int i;
348 360
349 /* Only Intel makes Enhanced Speedstep-capable CPUs */ 361 /* Only Intel makes Enhanced Speedstep-capable CPUs */
350 if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) 362 if (cpu->x86_vendor != X86_VENDOR_INTEL ||
363 !cpu_has(cpu, X86_FEATURE_EST))
351 return -ENODEV; 364 return -ENODEV;
352 365
353 if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) 366 if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
@@ -361,9 +374,9 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
361 break; 374 break;
362 375
363 if (i != N_IDS) 376 if (i != N_IDS)
364 centrino_cpu[policy->cpu] = &cpu_ids[i]; 377 per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
365 378
366 if (!centrino_cpu[policy->cpu]) { 379 if (!per_cpu(centrino_cpu, policy->cpu)) {
367 dprintk("found unsupported CPU with " 380 dprintk("found unsupported CPU with "
368 "Enhanced SpeedStep: send /proc/cpuinfo to " 381 "Enhanced SpeedStep: send /proc/cpuinfo to "
369 MAINTAINER "\n"); 382 MAINTAINER "\n");
@@ -386,23 +399,26 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
386 /* check to see if it stuck */ 399 /* check to see if it stuck */
387 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 400 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
388 if (!(l & (1<<16))) { 401 if (!(l & (1<<16))) {
389 printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); 402 printk(KERN_INFO PFX
403 "couldn't enable Enhanced SpeedStep\n");
390 return -ENODEV; 404 return -ENODEV;
391 } 405 }
392 } 406 }
393 407
394 freq = get_cur_freq(policy->cpu); 408 freq = get_cur_freq(policy->cpu);
395 409 policy->cpuinfo.transition_latency = 10000;
396 policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ 410 /* 10uS transition latency */
397 policy->cur = freq; 411 policy->cur = freq;
398 412
399 dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); 413 dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
400 414
401 ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); 415 ret = cpufreq_frequency_table_cpuinfo(policy,
416 per_cpu(centrino_model, policy->cpu)->op_points);
402 if (ret) 417 if (ret)
403 return (ret); 418 return (ret);
404 419
405 cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); 420 cpufreq_frequency_table_get_attr(
421 per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu);
406 422
407 return 0; 423 return 0;
408} 424}
@@ -411,12 +427,12 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
411{ 427{
412 unsigned int cpu = policy->cpu; 428 unsigned int cpu = policy->cpu;
413 429
414 if (!centrino_model[cpu]) 430 if (!per_cpu(centrino_model, cpu))
415 return -ENODEV; 431 return -ENODEV;
416 432
417 cpufreq_frequency_table_put_attr(cpu); 433 cpufreq_frequency_table_put_attr(cpu);
418 434
419 centrino_model[cpu] = NULL; 435 per_cpu(centrino_model, cpu) = NULL;
420 436
421 return 0; 437 return 0;
422} 438}
@@ -430,17 +446,26 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
430 */ 446 */
431static int centrino_verify (struct cpufreq_policy *policy) 447static int centrino_verify (struct cpufreq_policy *policy)
432{ 448{
433 return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); 449 return cpufreq_frequency_table_verify(policy,
450 per_cpu(centrino_model, policy->cpu)->op_points);
434} 451}
435 452
436/** 453/**
437 * centrino_setpolicy - set a new CPUFreq policy 454 * centrino_setpolicy - set a new CPUFreq policy
438 * @policy: new policy 455 * @policy: new policy
439 * @target_freq: the target frequency 456 * @target_freq: the target frequency
440 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) 457 * @relation: how that frequency relates to achieved frequency
458 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
441 * 459 *
442 * Sets a new CPUFreq policy. 460 * Sets a new CPUFreq policy.
443 */ 461 */
462struct allmasks {
463 cpumask_t online_policy_cpus;
464 cpumask_t saved_mask;
465 cpumask_t set_mask;
466 cpumask_t covered_cpus;
467};
468
444static int centrino_target (struct cpufreq_policy *policy, 469static int centrino_target (struct cpufreq_policy *policy,
445 unsigned int target_freq, 470 unsigned int target_freq,
446 unsigned int relation) 471 unsigned int relation)
@@ -448,48 +473,55 @@ static int centrino_target (struct cpufreq_policy *policy,
448 unsigned int newstate = 0; 473 unsigned int newstate = 0;
449 unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; 474 unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu;
450 struct cpufreq_freqs freqs; 475 struct cpufreq_freqs freqs;
451 cpumask_t online_policy_cpus;
452 cpumask_t saved_mask;
453 cpumask_t set_mask;
454 cpumask_t covered_cpus;
455 int retval = 0; 476 int retval = 0;
456 unsigned int j, k, first_cpu, tmp; 477 unsigned int j, k, first_cpu, tmp;
457 478 CPUMASK_ALLOC(allmasks);
458 if (unlikely(centrino_model[cpu] == NULL)) 479 CPUMASK_PTR(online_policy_cpus, allmasks);
459 return -ENODEV; 480 CPUMASK_PTR(saved_mask, allmasks);
481 CPUMASK_PTR(set_mask, allmasks);
482 CPUMASK_PTR(covered_cpus, allmasks);
483
484 if (unlikely(allmasks == NULL))
485 return -ENOMEM;
486
487 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
488 retval = -ENODEV;
489 goto out;
490 }
460 491
461 if (unlikely(cpufreq_frequency_table_target(policy, 492 if (unlikely(cpufreq_frequency_table_target(policy,
462 centrino_model[cpu]->op_points, 493 per_cpu(centrino_model, cpu)->op_points,
463 target_freq, 494 target_freq,
464 relation, 495 relation,
465 &newstate))) { 496 &newstate))) {
466 return -EINVAL; 497 retval = -EINVAL;
498 goto out;
467 } 499 }
468 500
469#ifdef CONFIG_HOTPLUG_CPU 501#ifdef CONFIG_HOTPLUG_CPU
470 /* cpufreq holds the hotplug lock, so we are safe from here on */ 502 /* cpufreq holds the hotplug lock, so we are safe from here on */
471 cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); 503 cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus);
472#else 504#else
473 online_policy_cpus = policy->cpus; 505 *online_policy_cpus = policy->cpus;
474#endif 506#endif
475 507
476 saved_mask = current->cpus_allowed; 508 *saved_mask = current->cpus_allowed;
477 first_cpu = 1; 509 first_cpu = 1;
478 cpus_clear(covered_cpus); 510 cpus_clear(*covered_cpus);
479 for_each_cpu_mask(j, online_policy_cpus) { 511 for_each_cpu_mask_nr(j, *online_policy_cpus) {
480 /* 512 /*
481 * Support for SMP systems. 513 * Support for SMP systems.
482 * Make sure we are running on CPU that wants to change freq 514 * Make sure we are running on CPU that wants to change freq
483 */ 515 */
484 cpus_clear(set_mask); 516 cpus_clear(*set_mask);
485 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) 517 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
486 cpus_or(set_mask, set_mask, online_policy_cpus); 518 cpus_or(*set_mask, *set_mask, *online_policy_cpus);
487 else 519 else
488 cpu_set(j, set_mask); 520 cpu_set(j, *set_mask);
489 521
490 set_cpus_allowed_ptr(current, &set_mask); 522 set_cpus_allowed_ptr(current, set_mask);
491 preempt_disable(); 523 preempt_disable();
492 if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { 524 if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) {
493 dprintk("couldn't limit to CPUs in this domain\n"); 525 dprintk("couldn't limit to CPUs in this domain\n");
494 retval = -EAGAIN; 526 retval = -EAGAIN;
495 if (first_cpu) { 527 if (first_cpu) {
@@ -500,7 +532,7 @@ static int centrino_target (struct cpufreq_policy *policy,
500 break; 532 break;
501 } 533 }
502 534
503 msr = centrino_model[cpu]->op_points[newstate].index; 535 msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
504 536
505 if (first_cpu) { 537 if (first_cpu) {
506 rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); 538 rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
@@ -517,7 +549,7 @@ static int centrino_target (struct cpufreq_policy *policy,
517 dprintk("target=%dkHz old=%d new=%d msr=%04x\n", 549 dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
518 target_freq, freqs.old, freqs.new, msr); 550 target_freq, freqs.old, freqs.new, msr);
519 551
520 for_each_cpu_mask(k, online_policy_cpus) { 552 for_each_cpu_mask_nr(k, *online_policy_cpus) {
521 freqs.cpu = k; 553 freqs.cpu = k;
522 cpufreq_notify_transition(&freqs, 554 cpufreq_notify_transition(&freqs,
523 CPUFREQ_PRECHANGE); 555 CPUFREQ_PRECHANGE);
@@ -536,11 +568,11 @@ static int centrino_target (struct cpufreq_policy *policy,
536 break; 568 break;
537 } 569 }
538 570
539 cpu_set(j, covered_cpus); 571 cpu_set(j, *covered_cpus);
540 preempt_enable(); 572 preempt_enable();
541 } 573 }
542 574
543 for_each_cpu_mask(k, online_policy_cpus) { 575 for_each_cpu_mask_nr(k, *online_policy_cpus) {
544 freqs.cpu = k; 576 freqs.cpu = k;
545 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 577 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
546 } 578 }
@@ -553,10 +585,12 @@ static int centrino_target (struct cpufreq_policy *policy,
553 * Best effort undo.. 585 * Best effort undo..
554 */ 586 */
555 587
556 if (!cpus_empty(covered_cpus)) { 588 if (!cpus_empty(*covered_cpus)) {
557 for_each_cpu_mask(j, covered_cpus) { 589 cpumask_of_cpu_ptr_declare(new_mask);
558 set_cpus_allowed_ptr(current, 590
559 &cpumask_of_cpu(j)); 591 for_each_cpu_mask_nr(j, *covered_cpus) {
592 cpumask_of_cpu_ptr_next(new_mask, j);
593 set_cpus_allowed_ptr(current, new_mask);
560 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); 594 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
561 } 595 }
562 } 596 }
@@ -564,19 +598,22 @@ static int centrino_target (struct cpufreq_policy *policy,
564 tmp = freqs.new; 598 tmp = freqs.new;
565 freqs.new = freqs.old; 599 freqs.new = freqs.old;
566 freqs.old = tmp; 600 freqs.old = tmp;
567 for_each_cpu_mask(j, online_policy_cpus) { 601 for_each_cpu_mask_nr(j, *online_policy_cpus) {
568 freqs.cpu = j; 602 freqs.cpu = j;
569 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 603 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
570 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 604 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
571 } 605 }
572 } 606 }
573 set_cpus_allowed_ptr(current, &saved_mask); 607 set_cpus_allowed_ptr(current, saved_mask);
574 return 0; 608 retval = 0;
609 goto out;
575 610
576migrate_end: 611migrate_end:
577 preempt_enable(); 612 preempt_enable();
578 set_cpus_allowed_ptr(current, &saved_mask); 613 set_cpus_allowed_ptr(current, saved_mask);
579 return 0; 614out:
615 CPUMASK_FREE(allmasks);
616 return retval;
580} 617}
581 618
582static struct freq_attr* centrino_attr[] = { 619static struct freq_attr* centrino_attr[] = {
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 1b50244b1fdf..2f3728dc24f6 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -244,7 +244,8 @@ static unsigned int _speedstep_get(const cpumask_t *cpus)
244 244
245static unsigned int speedstep_get(unsigned int cpu) 245static unsigned int speedstep_get(unsigned int cpu)
246{ 246{
247 return _speedstep_get(&cpumask_of_cpu(cpu)); 247 cpumask_of_cpu_ptr(newmask, cpu);
248 return _speedstep_get(newmask);
248} 249}
249 250
250/** 251/**
@@ -279,7 +280,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
279 280
280 cpus_allowed = current->cpus_allowed; 281 cpus_allowed = current->cpus_allowed;
281 282
282 for_each_cpu_mask(i, policy->cpus) { 283 for_each_cpu_mask_nr(i, policy->cpus) {
283 freqs.cpu = i; 284 freqs.cpu = i;
284 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 285 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
285 } 286 }
@@ -292,7 +293,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
292 /* allow to be run on all CPUs */ 293 /* allow to be run on all CPUs */
293 set_cpus_allowed_ptr(current, &cpus_allowed); 294 set_cpus_allowed_ptr(current, &cpus_allowed);
294 295
295 for_each_cpu_mask(i, policy->cpus) { 296 for_each_cpu_mask_nr(i, policy->cpus) {
296 freqs.cpu = i; 297 freqs.cpu = i;
297 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 298 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
298 } 299 }
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cbffa2a25a13..f113ef4595f6 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -227,6 +227,20 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
227 227
228 if (cpu_has_bts) 228 if (cpu_has_bts)
229 ptrace_bts_init_intel(c); 229 ptrace_bts_init_intel(c);
230
231 /*
232 * See if we have a good local APIC by checking for buggy Pentia,
233 * i.e. all B steppings and the C2 stepping of P54C when using their
234 * integrated APIC (see 11AP erratum in "Pentium Processor
235 * Specification Update").
236 */
237 if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
238 (c->x86_mask < 0x6 || c->x86_mask == 0xb))
239 set_cpu_cap(c, X86_FEATURE_11AP);
240
241#ifdef CONFIG_X86_NUMAQ
242 numaq_tsc_disable();
243#endif
230} 244}
231 245
232static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) 246static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
new file mode 100644
index 000000000000..1019c58d39f0
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -0,0 +1,95 @@
1#include <linux/init.h>
2#include <linux/smp.h>
3#include <asm/processor.h>
4#include <asm/ptrace.h>
5#include <asm/topology.h>
6#include <asm/numa_64.h>
7
8#include "cpu.h"
9
10static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
11{
12 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
13 (c->x86 == 0x6 && c->x86_model >= 0x0e))
14 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
15
16 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
17}
18
19/*
20 * find out the number of processor cores on the die
21 */
22static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
23{
24 unsigned int eax, t;
25
26 if (c->cpuid_level < 4)
27 return 1;
28
29 cpuid_count(4, 0, &eax, &t, &t, &t);
30
31 if (eax & 0x1f)
32 return ((eax >> 26) + 1);
33 else
34 return 1;
35}
36
37static void __cpuinit srat_detect_node(void)
38{
39#ifdef CONFIG_NUMA
40 unsigned node;
41 int cpu = smp_processor_id();
42 int apicid = hard_smp_processor_id();
43
44 /* Don't do the funky fallback heuristics the AMD version employs
45 for now. */
46 node = apicid_to_node[apicid];
47 if (node == NUMA_NO_NODE || !node_online(node))
48 node = first_node(node_online_map);
49 numa_set_node(cpu, node);
50
51 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
52#endif
53}
54
55static void __cpuinit init_intel(struct cpuinfo_x86 *c)
56{
57 init_intel_cacheinfo(c);
58 if (c->cpuid_level > 9) {
59 unsigned eax = cpuid_eax(10);
60 /* Check for version and the number of counters */
61 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
62 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
63 }
64
65 if (cpu_has_ds) {
66 unsigned int l1, l2;
67 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
68 if (!(l1 & (1<<11)))
69 set_cpu_cap(c, X86_FEATURE_BTS);
70 if (!(l1 & (1<<12)))
71 set_cpu_cap(c, X86_FEATURE_PEBS);
72 }
73
74
75 if (cpu_has_bts)
76 ds_init_intel(c);
77
78 if (c->x86 == 15)
79 c->x86_cache_alignment = c->x86_clflush_size * 2;
80 if (c->x86 == 6)
81 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
82 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
83 c->x86_max_cores = intel_num_cpu_cores(c);
84
85 srat_detect_node();
86}
87
88static struct cpu_dev intel_cpu_dev __cpuinitdata = {
89 .c_vendor = "Intel",
90 .c_ident = { "GenuineIntel" },
91 .c_early_init = early_init_intel,
92 .c_init = init_intel,
93};
94cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
95
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 26d615dcb149..650d40f7912b 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -62,6 +62,7 @@ static struct _cache_table cache_table[] __cpuinitdata =
62 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 62 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
63 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ 63 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */
64 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ 64 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */
65 { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */
65 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 66 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
66 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 67 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
67 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 68 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -488,7 +489,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
488 int sibling; 489 int sibling;
489 490
490 this_leaf = CPUID4_INFO_IDX(cpu, index); 491 this_leaf = CPUID4_INFO_IDX(cpu, index);
491 for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { 492 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
492 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 493 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
493 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 494 cpu_clear(cpu, sibling_leaf->shared_cpu_map);
494 } 495 }
@@ -515,6 +516,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
515 unsigned long j; 516 unsigned long j;
516 int retval; 517 int retval;
517 cpumask_t oldmask; 518 cpumask_t oldmask;
519 cpumask_of_cpu_ptr(newmask, cpu);
518 520
519 if (num_cache_leaves == 0) 521 if (num_cache_leaves == 0)
520 return -ENOENT; 522 return -ENOENT;
@@ -525,7 +527,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
525 return -ENOMEM; 527 return -ENOMEM;
526 528
527 oldmask = current->cpus_allowed; 529 oldmask = current->cpus_allowed;
528 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 530 retval = set_cpus_allowed_ptr(current, newmask);
529 if (retval) 531 if (retval)
530 goto out; 532 goto out;
531 533
@@ -779,15 +781,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
779 } 781 }
780 kobject_put(per_cpu(cache_kobject, cpu)); 782 kobject_put(per_cpu(cache_kobject, cpu));
781 cpuid4_cache_sysfs_exit(cpu); 783 cpuid4_cache_sysfs_exit(cpu);
782 break; 784 return retval;
783 } 785 }
784 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 786 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
785 } 787 }
786 if (!retval) 788 cpu_set(cpu, cache_dev_map);
787 cpu_set(cpu, cache_dev_map);
788 789
789 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 790 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
790 return retval; 791 return 0;
791} 792}
792 793
793static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) 794static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index e633c9c2b764..f390c9f66351 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -9,23 +9,23 @@
9#include <linux/interrupt.h> 9#include <linux/interrupt.h>
10#include <linux/smp.h> 10#include <linux/smp.h>
11 11
12#include <asm/processor.h> 12#include <asm/processor.h>
13#include <asm/system.h> 13#include <asm/system.h>
14#include <asm/msr.h> 14#include <asm/msr.h>
15 15
16#include "mce.h" 16#include "mce.h"
17 17
18/* Machine Check Handler For AMD Athlon/Duron */ 18/* Machine Check Handler For AMD Athlon/Duron */
19static void k7_machine_check(struct pt_regs * regs, long error_code) 19static void k7_machine_check(struct pt_regs *regs, long error_code)
20{ 20{
21 int recover=1; 21 int recover = 1;
22 u32 alow, ahigh, high, low; 22 u32 alow, ahigh, high, low;
23 u32 mcgstl, mcgsth; 23 u32 mcgstl, mcgsth;
24 int i; 24 int i;
25 25
26 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 26 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
27 if (mcgstl & (1<<0)) /* Recoverable ? */ 27 if (mcgstl & (1<<0)) /* Recoverable ? */
28 recover=0; 28 recover = 0;
29 29
30 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", 30 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
31 smp_processor_id(), mcgsth, mcgstl); 31 smp_processor_id(), mcgsth, mcgstl);
@@ -60,12 +60,12 @@ static void k7_machine_check(struct pt_regs * regs, long error_code)
60 } 60 }
61 61
62 if (recover&2) 62 if (recover&2)
63 panic ("CPU context corrupt"); 63 panic("CPU context corrupt");
64 if (recover&1) 64 if (recover&1)
65 panic ("Unable to continue"); 65 panic("Unable to continue");
66 printk (KERN_EMERG "Attempting to continue.\n"); 66 printk(KERN_EMERG "Attempting to continue.\n");
67 mcgstl &= ~(1<<2); 67 mcgstl &= ~(1<<2);
68 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); 68 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
69} 69}
70 70
71 71
@@ -81,25 +81,25 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
81 machine_check_vector = k7_machine_check; 81 machine_check_vector = k7_machine_check;
82 wmb(); 82 wmb();
83 83
84 printk (KERN_INFO "Intel machine check architecture supported.\n"); 84 printk(KERN_INFO "Intel machine check architecture supported.\n");
85 rdmsr (MSR_IA32_MCG_CAP, l, h); 85 rdmsr(MSR_IA32_MCG_CAP, l, h);
86 if (l & (1<<8)) /* Control register present ? */ 86 if (l & (1<<8)) /* Control register present ? */
87 wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 87 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
88 nr_mce_banks = l & 0xff; 88 nr_mce_banks = l & 0xff;
89 89
90 /* Clear status for MC index 0 separately, we don't touch CTL, 90 /* Clear status for MC index 0 separately, we don't touch CTL,
91 * as some K7 Athlons cause spurious MCEs when its enabled. */ 91 * as some K7 Athlons cause spurious MCEs when its enabled. */
92 if (boot_cpu_data.x86 == 6) { 92 if (boot_cpu_data.x86 == 6) {
93 wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); 93 wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
94 i = 1; 94 i = 1;
95 } else 95 } else
96 i = 0; 96 i = 0;
97 for (; i<nr_mce_banks; i++) { 97 for (; i < nr_mce_banks; i++) {
98 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); 98 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
99 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); 99 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
100 } 100 }
101 101
102 set_in_cr4 (X86_CR4_MCE); 102 set_in_cr4(X86_CR4_MCE);
103 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", 103 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
104 smp_processor_id()); 104 smp_processor_id());
105} 105}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index e07e8c068ae0..65a339678ece 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -9,6 +9,7 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/smp_lock.h>
12#include <linux/string.h> 13#include <linux/string.h>
13#include <linux/rcupdate.h> 14#include <linux/rcupdate.h>
14#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
@@ -31,7 +32,7 @@
31#include <asm/idle.h> 32#include <asm/idle.h>
32 33
33#define MISC_MCELOG_MINOR 227 34#define MISC_MCELOG_MINOR 227
34#define NR_BANKS 6 35#define NR_SYSFS_BANKS 6
35 36
36atomic_t mce_entry; 37atomic_t mce_entry;
37 38
@@ -46,7 +47,7 @@ static int mce_dont_init;
46 */ 47 */
47static int tolerant = 1; 48static int tolerant = 1;
48static int banks; 49static int banks;
49static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; 50static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL };
50static unsigned long notify_user; 51static unsigned long notify_user;
51static int rip_msr; 52static int rip_msr;
52static int mce_bootlog = -1; 53static int mce_bootlog = -1;
@@ -209,7 +210,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
209 barrier(); 210 barrier();
210 211
211 for (i = 0; i < banks; i++) { 212 for (i = 0; i < banks; i++) {
212 if (!bank[i]) 213 if (i < NR_SYSFS_BANKS && !bank[i])
213 continue; 214 continue;
214 215
215 m.misc = 0; 216 m.misc = 0;
@@ -363,7 +364,7 @@ static void mcheck_check_cpu(void *info)
363 364
364static void mcheck_timer(struct work_struct *work) 365static void mcheck_timer(struct work_struct *work)
365{ 366{
366 on_each_cpu(mcheck_check_cpu, NULL, 1, 1); 367 on_each_cpu(mcheck_check_cpu, NULL, 1);
367 368
368 /* 369 /*
369 * Alert userspace if needed. If we logged an MCE, reduce the 370 * Alert userspace if needed. If we logged an MCE, reduce the
@@ -444,9 +445,10 @@ static void mce_init(void *dummy)
444 445
445 rdmsrl(MSR_IA32_MCG_CAP, cap); 446 rdmsrl(MSR_IA32_MCG_CAP, cap);
446 banks = cap & 0xff; 447 banks = cap & 0xff;
447 if (banks > NR_BANKS) { 448 if (banks > MCE_EXTENDED_BANK) {
448 printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); 449 banks = MCE_EXTENDED_BANK;
449 banks = NR_BANKS; 450 printk(KERN_INFO "MCE: warning: using only %d banks\n",
451 MCE_EXTENDED_BANK);
450 } 452 }
451 /* Use accurate RIP reporting if available. */ 453 /* Use accurate RIP reporting if available. */
452 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) 454 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
@@ -462,7 +464,11 @@ static void mce_init(void *dummy)
462 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 464 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
463 465
464 for (i = 0; i < banks; i++) { 466 for (i = 0; i < banks; i++) {
465 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); 467 if (i < NR_SYSFS_BANKS)
468 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
469 else
470 wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
471
466 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 472 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
467 } 473 }
468} 474}
@@ -527,10 +533,12 @@ static int open_exclu; /* already open exclusive? */
527 533
528static int mce_open(struct inode *inode, struct file *file) 534static int mce_open(struct inode *inode, struct file *file)
529{ 535{
536 lock_kernel();
530 spin_lock(&mce_state_lock); 537 spin_lock(&mce_state_lock);
531 538
532 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { 539 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
533 spin_unlock(&mce_state_lock); 540 spin_unlock(&mce_state_lock);
541 unlock_kernel();
534 return -EBUSY; 542 return -EBUSY;
535 } 543 }
536 544
@@ -539,6 +547,7 @@ static int mce_open(struct inode *inode, struct file *file)
539 open_count++; 547 open_count++;
540 548
541 spin_unlock(&mce_state_lock); 549 spin_unlock(&mce_state_lock);
550 unlock_kernel();
542 551
543 return nonseekable_open(inode, file); 552 return nonseekable_open(inode, file);
544} 553}
@@ -571,7 +580,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
571 char __user *buf = ubuf; 580 char __user *buf = ubuf;
572 int i, err; 581 int i, err;
573 582
574 cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL); 583 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
575 if (!cpu_tsc) 584 if (!cpu_tsc)
576 return -ENOMEM; 585 return -ENOMEM;
577 586
@@ -612,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
612 * Collect entries that were still getting written before the 621 * Collect entries that were still getting written before the
613 * synchronize. 622 * synchronize.
614 */ 623 */
615 on_each_cpu(collect_tscs, cpu_tsc, 1, 1); 624 on_each_cpu(collect_tscs, cpu_tsc, 1);
616 for (i = next; i < MCE_LOG_LEN; i++) { 625 for (i = next; i < MCE_LOG_LEN; i++) {
617 if (mcelog.entry[i].finished && 626 if (mcelog.entry[i].finished &&
618 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { 627 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@ -737,7 +746,7 @@ static void mce_restart(void)
737 if (next_interval) 746 if (next_interval)
738 cancel_delayed_work(&mcheck_work); 747 cancel_delayed_work(&mcheck_work);
739 /* Timer race is harmless here */ 748 /* Timer race is harmless here */
740 on_each_cpu(mce_init, NULL, 1, 1); 749 on_each_cpu(mce_init, NULL, 1);
741 next_interval = check_interval * HZ; 750 next_interval = check_interval * HZ;
742 if (next_interval) 751 if (next_interval)
743 schedule_delayed_work(&mcheck_work, 752 schedule_delayed_work(&mcheck_work,
@@ -753,10 +762,14 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
753 762
754/* Why are there no generic functions for this? */ 763/* Why are there no generic functions for this? */
755#define ACCESSOR(name, var, start) \ 764#define ACCESSOR(name, var, start) \
756 static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ 765 static ssize_t show_ ## name(struct sys_device *s, \
766 struct sysdev_attribute *attr, \
767 char *buf) { \
757 return sprintf(buf, "%lx\n", (unsigned long)var); \ 768 return sprintf(buf, "%lx\n", (unsigned long)var); \
758 } \ 769 } \
759 static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \ 770 static ssize_t set_ ## name(struct sys_device *s, \
771 struct sysdev_attribute *attr, \
772 const char *buf, size_t siz) { \
760 char *end; \ 773 char *end; \
761 unsigned long new = simple_strtoul(buf, &end, 0); \ 774 unsigned long new = simple_strtoul(buf, &end, 0); \
762 if (end == buf) return -EINVAL; \ 775 if (end == buf) return -EINVAL; \
@@ -766,7 +779,10 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
766 } \ 779 } \
767 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); 780 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
768 781
769/* TBD should generate these dynamically based on number of available banks */ 782/*
783 * TBD should generate these dynamically based on number of available banks.
784 * Have only 6 contol banks in /sysfs until then.
785 */
770ACCESSOR(bank0ctl,bank[0],mce_restart()) 786ACCESSOR(bank0ctl,bank[0],mce_restart())
771ACCESSOR(bank1ctl,bank[1],mce_restart()) 787ACCESSOR(bank1ctl,bank[1],mce_restart())
772ACCESSOR(bank2ctl,bank[2],mce_restart()) 788ACCESSOR(bank2ctl,bank[2],mce_restart())
@@ -774,14 +790,16 @@ ACCESSOR(bank3ctl,bank[3],mce_restart())
774ACCESSOR(bank4ctl,bank[4],mce_restart()) 790ACCESSOR(bank4ctl,bank[4],mce_restart())
775ACCESSOR(bank5ctl,bank[5],mce_restart()) 791ACCESSOR(bank5ctl,bank[5],mce_restart())
776 792
777static ssize_t show_trigger(struct sys_device *s, char *buf) 793static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
794 char *buf)
778{ 795{
779 strcpy(buf, trigger); 796 strcpy(buf, trigger);
780 strcat(buf, "\n"); 797 strcat(buf, "\n");
781 return strlen(trigger) + 1; 798 return strlen(trigger) + 1;
782} 799}
783 800
784static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) 801static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
802 const char *buf,size_t siz)
785{ 803{
786 char *p; 804 char *p;
787 int len; 805 int len;
@@ -794,12 +812,12 @@ static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz)
794} 812}
795 813
796static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); 814static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
797ACCESSOR(tolerant,tolerant,) 815static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
798ACCESSOR(check_interval,check_interval,mce_restart()) 816ACCESSOR(check_interval,check_interval,mce_restart())
799static struct sysdev_attribute *mce_attributes[] = { 817static struct sysdev_attribute *mce_attributes[] = {
800 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, 818 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
801 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, 819 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
802 &attr_tolerant, &attr_check_interval, &attr_trigger, 820 &attr_tolerant.attr, &attr_check_interval, &attr_trigger,
803 NULL 821 NULL
804}; 822};
805 823
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 7c9a813e1193..88736cadbaa6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -527,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
527 if (err) 527 if (err)
528 goto out_free; 528 goto out_free;
529 529
530 for_each_cpu_mask(i, b->cpus) { 530 for_each_cpu_mask_nr(i, b->cpus) {
531 if (i == cpu) 531 if (i == cpu)
532 continue; 532 continue;
533 533
@@ -617,7 +617,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
617#endif 617#endif
618 618
619 /* remove all sibling symlinks before unregistering */ 619 /* remove all sibling symlinks before unregistering */
620 for_each_cpu_mask(i, b->cpus) { 620 for_each_cpu_mask_nr(i, b->cpus) {
621 if (i == cpu) 621 if (i == cpu)
622 continue; 622 continue;
623 623
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 00ccb6c14ec2..cc1fccdd31e0 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -59,7 +59,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
59 59
60static void mce_work_fn(struct work_struct *work) 60static void mce_work_fn(struct work_struct *work)
61{ 61{
62 on_each_cpu(mce_checkregs, NULL, 1, 1); 62 on_each_cpu(mce_checkregs, NULL, 1);
63 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); 63 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
64} 64}
65 65
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index cb03345554a5..9b60fce09f75 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -8,7 +8,7 @@
8#include <linux/interrupt.h> 8#include <linux/interrupt.h>
9#include <linux/smp.h> 9#include <linux/smp.h>
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/apic.h> 14#include <asm/apic.h>
@@ -32,12 +32,12 @@ struct intel_mce_extended_msrs {
32 /* u32 *reserved[]; */ 32 /* u32 *reserved[]; */
33}; 33};
34 34
35static int mce_num_extended_msrs = 0; 35static int mce_num_extended_msrs;
36 36
37 37
38#ifdef CONFIG_X86_MCE_P4THERMAL 38#ifdef CONFIG_X86_MCE_P4THERMAL
39static void unexpected_thermal_interrupt(struct pt_regs *regs) 39static void unexpected_thermal_interrupt(struct pt_regs *regs)
40{ 40{
41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", 41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
42 smp_processor_id()); 42 smp_processor_id());
43 add_taint(TAINT_MACHINE_CHECK); 43 add_taint(TAINT_MACHINE_CHECK);
@@ -83,7 +83,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
83 * be some SMM goo which handles it, so we can't even put a handler 83 * be some SMM goo which handles it, so we can't even put a handler
84 * since it might be delivered via SMI already -zwanem. 84 * since it might be delivered via SMI already -zwanem.
85 */ 85 */
86 rdmsr (MSR_IA32_MISC_ENABLE, l, h); 86 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
87 h = apic_read(APIC_LVTTHMR); 87 h = apic_read(APIC_LVTTHMR);
88 if ((l & (1<<3)) && (h & APIC_DM_SMI)) { 88 if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", 89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
@@ -91,7 +91,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
91 return; /* -EBUSY */ 91 return; /* -EBUSY */
92 } 92 }
93 93
94 /* check whether a vector already exists, temporarily masked? */ 94 /* check whether a vector already exists, temporarily masked? */
95 if (h & APIC_VECTOR_MASK) { 95 if (h & APIC_VECTOR_MASK) {
96 printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " 96 printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
97 "installed\n", 97 "installed\n",
@@ -102,20 +102,20 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
102 /* The temperature transition interrupt handler setup */ 102 /* The temperature transition interrupt handler setup */
103 h = THERMAL_APIC_VECTOR; /* our delivery vector */ 103 h = THERMAL_APIC_VECTOR; /* our delivery vector */
104 h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ 104 h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
105 apic_write_around(APIC_LVTTHMR, h); 105 apic_write(APIC_LVTTHMR, h);
106 106
107 rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); 107 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
108 wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); 108 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
109 109
110 /* ok we're good to go... */ 110 /* ok we're good to go... */
111 vendor_thermal_interrupt = intel_thermal_interrupt; 111 vendor_thermal_interrupt = intel_thermal_interrupt;
112
113 rdmsr (MSR_IA32_MISC_ENABLE, l, h);
114 wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
115 112
116 l = apic_read (APIC_LVTTHMR); 113 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
117 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 114 wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
118 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); 115
116 l = apic_read(APIC_LVTTHMR);
117 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
118 printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
119 119
120 /* enable thermal throttle processing */ 120 /* enable thermal throttle processing */
121 atomic_set(&therm_throt_en, 1); 121 atomic_set(&therm_throt_en, 1);
@@ -129,28 +129,28 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
129{ 129{
130 u32 h; 130 u32 h;
131 131
132 rdmsr (MSR_IA32_MCG_EAX, r->eax, h); 132 rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
133 rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); 133 rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
134 rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); 134 rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
135 rdmsr (MSR_IA32_MCG_EDX, r->edx, h); 135 rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
136 rdmsr (MSR_IA32_MCG_ESI, r->esi, h); 136 rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
137 rdmsr (MSR_IA32_MCG_EDI, r->edi, h); 137 rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
138 rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); 138 rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
139 rdmsr (MSR_IA32_MCG_ESP, r->esp, h); 139 rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
140 rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); 140 rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
141 rdmsr (MSR_IA32_MCG_EIP, r->eip, h); 141 rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
142} 142}
143 143
144static void intel_machine_check(struct pt_regs * regs, long error_code) 144static void intel_machine_check(struct pt_regs *regs, long error_code)
145{ 145{
146 int recover=1; 146 int recover = 1;
147 u32 alow, ahigh, high, low; 147 u32 alow, ahigh, high, low;
148 u32 mcgstl, mcgsth; 148 u32 mcgstl, mcgsth;
149 int i; 149 int i;
150 150
151 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 151 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
152 if (mcgstl & (1<<0)) /* Recoverable ? */ 152 if (mcgstl & (1<<0)) /* Recoverable ? */
153 recover=0; 153 recover = 0;
154 154
155 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", 155 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
156 smp_processor_id(), mcgsth, mcgstl); 156 smp_processor_id(), mcgsth, mcgstl);
@@ -191,20 +191,20 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
191 } 191 }
192 192
193 if (recover & 2) 193 if (recover & 2)
194 panic ("CPU context corrupt"); 194 panic("CPU context corrupt");
195 if (recover & 1) 195 if (recover & 1)
196 panic ("Unable to continue"); 196 panic("Unable to continue");
197 197
198 printk(KERN_EMERG "Attempting to continue.\n"); 198 printk(KERN_EMERG "Attempting to continue.\n");
199 /* 199 /*
200 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 200 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
201 * recoverable/continuable.This will allow BIOS to look at the MSRs 201 * recoverable/continuable.This will allow BIOS to look at the MSRs
202 * for errors if the OS could not log the error. 202 * for errors if the OS could not log the error.
203 */ 203 */
204 for (i=0; i<nr_mce_banks; i++) { 204 for (i = 0; i < nr_mce_banks; i++) {
205 u32 msr; 205 u32 msr;
206 msr = MSR_IA32_MC0_STATUS+i*4; 206 msr = MSR_IA32_MC0_STATUS+i*4;
207 rdmsr (msr, low, high); 207 rdmsr(msr, low, high);
208 if (high&(1<<31)) { 208 if (high&(1<<31)) {
209 /* Clear it */ 209 /* Clear it */
210 wrmsr(msr, 0UL, 0UL); 210 wrmsr(msr, 0UL, 0UL);
@@ -214,7 +214,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
214 } 214 }
215 } 215 }
216 mcgstl &= ~(1<<2); 216 mcgstl &= ~(1<<2);
217 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); 217 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
218} 218}
219 219
220 220
@@ -222,30 +222,30 @@ void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
222{ 222{
223 u32 l, h; 223 u32 l, h;
224 int i; 224 int i;
225 225
226 machine_check_vector = intel_machine_check; 226 machine_check_vector = intel_machine_check;
227 wmb(); 227 wmb();
228 228
229 printk (KERN_INFO "Intel machine check architecture supported.\n"); 229 printk(KERN_INFO "Intel machine check architecture supported.\n");
230 rdmsr (MSR_IA32_MCG_CAP, l, h); 230 rdmsr(MSR_IA32_MCG_CAP, l, h);
231 if (l & (1<<8)) /* Control register present ? */ 231 if (l & (1<<8)) /* Control register present ? */
232 wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 232 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
233 nr_mce_banks = l & 0xff; 233 nr_mce_banks = l & 0xff;
234 234
235 for (i=0; i<nr_mce_banks; i++) { 235 for (i = 0; i < nr_mce_banks; i++) {
236 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); 236 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
237 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); 237 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
238 } 238 }
239 239
240 set_in_cr4 (X86_CR4_MCE); 240 set_in_cr4(X86_CR4_MCE);
241 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", 241 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
242 smp_processor_id()); 242 smp_processor_id());
243 243
244 /* Check for P4/Xeon extended MCE MSRs */ 244 /* Check for P4/Xeon extended MCE MSRs */
245 rdmsr (MSR_IA32_MCG_CAP, l, h); 245 rdmsr(MSR_IA32_MCG_CAP, l, h);
246 if (l & (1<<9)) {/* MCG_EXT_P */ 246 if (l & (1<<9)) {/* MCG_EXT_P */
247 mce_num_extended_msrs = (l >> 16) & 0xff; 247 mce_num_extended_msrs = (l >> 16) & 0xff;
248 printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" 248 printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
249 " available\n", 249 " available\n",
250 smp_processor_id(), mce_num_extended_msrs); 250 smp_processor_id(), mce_num_extended_msrs);
251 251
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 1f4cc48c14c6..d5ae2243f0b9 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -35,6 +35,7 @@ atomic_t therm_throt_en = ATOMIC_INIT(0);
35 35
36#define define_therm_throt_sysdev_show_func(name) \ 36#define define_therm_throt_sysdev_show_func(name) \
37static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ 37static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
38 struct sysdev_attribute *attr, \
38 char *buf) \ 39 char *buf) \
39{ \ 40{ \
40 unsigned int cpu = dev->id; \ 41 unsigned int cpu = dev->id; \
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 5d241ce94a44..509bd3d9eacd 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = {
37static unsigned long smp_changes_mask; 37static unsigned long smp_changes_mask;
38static struct mtrr_state mtrr_state = {}; 38static struct mtrr_state mtrr_state = {};
39static int mtrr_state_set; 39static int mtrr_state_set;
40static u64 tom2; 40u64 mtrr_tom2;
41 41
42#undef MODULE_PARAM_PREFIX 42#undef MODULE_PARAM_PREFIX
43#define MODULE_PARAM_PREFIX "mtrr." 43#define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
139 } 139 }
140 } 140 }
141 141
142 if (tom2) { 142 if (mtrr_tom2) {
143 if (start >= (1ULL<<32) && (end < tom2)) 143 if (start >= (1ULL<<32) && (end < mtrr_tom2))
144 return MTRR_TYPE_WRBACK; 144 return MTRR_TYPE_WRBACK;
145 } 145 }
146 146
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
158 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); 158 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
159} 159}
160 160
161/* fill the MSR pair relating to a var range */
162void fill_mtrr_var_range(unsigned int index,
163 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
164{
165 struct mtrr_var_range *vr;
166
167 vr = mtrr_state.var_ranges;
168
169 vr[index].base_lo = base_lo;
170 vr[index].base_hi = base_hi;
171 vr[index].mask_lo = mask_lo;
172 vr[index].mask_hi = mask_hi;
173}
174
161static void 175static void
162get_fixed_ranges(mtrr_type * frs) 176get_fixed_ranges(mtrr_type * frs)
163{ 177{
@@ -213,13 +227,13 @@ void __init get_mtrr_state(void)
213 mtrr_state.enabled = (lo & 0xc00) >> 10; 227 mtrr_state.enabled = (lo & 0xc00) >> 10;
214 228
215 if (amd_special_default_mtrr()) { 229 if (amd_special_default_mtrr()) {
216 unsigned lo, hi; 230 unsigned low, high;
217 /* TOP_MEM2 */ 231 /* TOP_MEM2 */
218 rdmsr(MSR_K8_TOP_MEM2, lo, hi); 232 rdmsr(MSR_K8_TOP_MEM2, low, high);
219 tom2 = hi; 233 mtrr_tom2 = high;
220 tom2 <<= 32; 234 mtrr_tom2 <<= 32;
221 tom2 |= lo; 235 mtrr_tom2 |= low;
222 tom2 &= 0xffffff8000000ULL; 236 mtrr_tom2 &= 0xffffff800000ULL;
223 } 237 }
224 if (mtrr_show) { 238 if (mtrr_show) {
225 int high_width; 239 int high_width;
@@ -251,9 +265,9 @@ void __init get_mtrr_state(void)
251 else 265 else
252 printk(KERN_INFO "MTRR %u disabled\n", i); 266 printk(KERN_INFO "MTRR %u disabled\n", i);
253 } 267 }
254 if (tom2) { 268 if (mtrr_tom2) {
255 printk(KERN_INFO "TOM2: %016llx aka %lldM\n", 269 printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
256 tom2, tom2>>20); 270 mtrr_tom2, mtrr_tom2>>20);
257 } 271 }
258 } 272 }
259 mtrr_state_set = 1; 273 mtrr_state_set = 1;
@@ -328,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
328 342
329 if (lo != msrwords[0] || hi != msrwords[1]) { 343 if (lo != msrwords[0] || hi != msrwords[1]) {
330 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && 344 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
331 boot_cpu_data.x86 == 15 && 345 (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
332 ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) 346 ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
333 k8_enable_fixed_iorrs(); 347 k8_enable_fixed_iorrs();
334 mtrr_wrmsr(msr, msrwords[0], msrwords[1]); 348 mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 6a1e278d9323..6f23969c8faf 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
37#include <linux/smp.h> 37#include <linux/smp.h>
38#include <linux/cpu.h> 38#include <linux/cpu.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <linux/sort.h>
40 41
41#include <asm/e820.h> 42#include <asm/e820.h>
42#include <asm/mtrr.h> 43#include <asm/mtrr.h>
@@ -222,7 +223,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
222 atomic_set(&data.gate,0); 223 atomic_set(&data.gate,0);
223 224
224 /* Start the ball rolling on other CPUs */ 225 /* Start the ball rolling on other CPUs */
225 if (smp_call_function(ipi_handler, &data, 1, 0) != 0) 226 if (smp_call_function(ipi_handler, &data, 0) != 0)
226 panic("mtrr: timed out waiting for other CPUs\n"); 227 panic("mtrr: timed out waiting for other CPUs\n");
227 228
228 local_irq_save(flags); 229 local_irq_save(flags);
@@ -609,6 +610,787 @@ static struct sysdev_driver mtrr_sysdev_driver = {
609 .resume = mtrr_restore, 610 .resume = mtrr_restore,
610}; 611};
611 612
613/* should be related to MTRR_VAR_RANGES nums */
614#define RANGE_NUM 256
615
616struct res_range {
617 unsigned long start;
618 unsigned long end;
619};
620
621static int __init
622add_range(struct res_range *range, int nr_range, unsigned long start,
623 unsigned long end)
624{
625 /* out of slots */
626 if (nr_range >= RANGE_NUM)
627 return nr_range;
628
629 range[nr_range].start = start;
630 range[nr_range].end = end;
631
632 nr_range++;
633
634 return nr_range;
635}
636
637static int __init
638add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
639 unsigned long end)
640{
641 int i;
642
643 /* try to merge it with old one */
644 for (i = 0; i < nr_range; i++) {
645 unsigned long final_start, final_end;
646 unsigned long common_start, common_end;
647
648 if (!range[i].end)
649 continue;
650
651 common_start = max(range[i].start, start);
652 common_end = min(range[i].end, end);
653 if (common_start > common_end + 1)
654 continue;
655
656 final_start = min(range[i].start, start);
657 final_end = max(range[i].end, end);
658
659 range[i].start = final_start;
660 range[i].end = final_end;
661 return nr_range;
662 }
663
664 /* need to add that */
665 return add_range(range, nr_range, start, end);
666}
667
668static void __init
669subtract_range(struct res_range *range, unsigned long start, unsigned long end)
670{
671 int i, j;
672
673 for (j = 0; j < RANGE_NUM; j++) {
674 if (!range[j].end)
675 continue;
676
677 if (start <= range[j].start && end >= range[j].end) {
678 range[j].start = 0;
679 range[j].end = 0;
680 continue;
681 }
682
683 if (start <= range[j].start && end < range[j].end &&
684 range[j].start < end + 1) {
685 range[j].start = end + 1;
686 continue;
687 }
688
689
690 if (start > range[j].start && end >= range[j].end &&
691 range[j].end > start - 1) {
692 range[j].end = start - 1;
693 continue;
694 }
695
696 if (start > range[j].start && end < range[j].end) {
697 /* find the new spare */
698 for (i = 0; i < RANGE_NUM; i++) {
699 if (range[i].end == 0)
700 break;
701 }
702 if (i < RANGE_NUM) {
703 range[i].end = range[j].end;
704 range[i].start = end + 1;
705 } else {
706 printk(KERN_ERR "run of slot in ranges\n");
707 }
708 range[j].end = start - 1;
709 continue;
710 }
711 }
712}
713
714static int __init cmp_range(const void *x1, const void *x2)
715{
716 const struct res_range *r1 = x1;
717 const struct res_range *r2 = x2;
718 long start1, start2;
719
720 start1 = r1->start;
721 start2 = r2->start;
722
723 return start1 - start2;
724}
725
726struct var_mtrr_range_state {
727 unsigned long base_pfn;
728 unsigned long size_pfn;
729 mtrr_type type;
730};
731
732struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
733static int __initdata debug_print;
734
735static int __init
736x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
737 unsigned long extra_remove_base,
738 unsigned long extra_remove_size)
739{
740 unsigned long i, base, size;
741 mtrr_type type;
742
743 for (i = 0; i < num_var_ranges; i++) {
744 type = range_state[i].type;
745 if (type != MTRR_TYPE_WRBACK)
746 continue;
747 base = range_state[i].base_pfn;
748 size = range_state[i].size_pfn;
749 nr_range = add_range_with_merge(range, nr_range, base,
750 base + size - 1);
751 }
752 if (debug_print) {
753 printk(KERN_DEBUG "After WB checking\n");
754 for (i = 0; i < nr_range; i++)
755 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
756 range[i].start, range[i].end + 1);
757 }
758
759 /* take out UC ranges */
760 for (i = 0; i < num_var_ranges; i++) {
761 type = range_state[i].type;
762 if (type != MTRR_TYPE_UNCACHABLE)
763 continue;
764 size = range_state[i].size_pfn;
765 if (!size)
766 continue;
767 base = range_state[i].base_pfn;
768 subtract_range(range, base, base + size - 1);
769 }
770 if (extra_remove_size)
771 subtract_range(range, extra_remove_base,
772 extra_remove_base + extra_remove_size - 1);
773
774 /* get new range num */
775 nr_range = 0;
776 for (i = 0; i < RANGE_NUM; i++) {
777 if (!range[i].end)
778 continue;
779 nr_range++;
780 }
781 if (debug_print) {
782 printk(KERN_DEBUG "After UC checking\n");
783 for (i = 0; i < nr_range; i++)
784 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
785 range[i].start, range[i].end + 1);
786 }
787
788 /* sort the ranges */
789 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
790 if (debug_print) {
791 printk(KERN_DEBUG "After sorting\n");
792 for (i = 0; i < nr_range; i++)
793 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
794 range[i].start, range[i].end + 1);
795 }
796
797 /* clear those is not used */
798 for (i = nr_range; i < RANGE_NUM; i++)
799 memset(&range[i], 0, sizeof(range[i]));
800
801 return nr_range;
802}
803
804static struct res_range __initdata range[RANGE_NUM];
805
806#ifdef CONFIG_MTRR_SANITIZER
807
808static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
809{
810 unsigned long sum;
811 int i;
812
813 sum = 0;
814 for (i = 0; i < nr_range; i++)
815 sum += range[i].end + 1 - range[i].start;
816
817 return sum;
818}
819
820static int enable_mtrr_cleanup __initdata =
821 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
822
823static int __init disable_mtrr_cleanup_setup(char *str)
824{
825 if (enable_mtrr_cleanup != -1)
826 enable_mtrr_cleanup = 0;
827 return 0;
828}
829early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
830
831static int __init enable_mtrr_cleanup_setup(char *str)
832{
833 if (enable_mtrr_cleanup != -1)
834 enable_mtrr_cleanup = 1;
835 return 0;
836}
837early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
838
839struct var_mtrr_state {
840 unsigned long range_startk;
841 unsigned long range_sizek;
842 unsigned long chunk_sizek;
843 unsigned long gran_sizek;
844 unsigned int reg;
845};
846
847static void __init
848set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
849 unsigned char type, unsigned int address_bits)
850{
851 u32 base_lo, base_hi, mask_lo, mask_hi;
852 u64 base, mask;
853
854 if (!sizek) {
855 fill_mtrr_var_range(reg, 0, 0, 0, 0);
856 return;
857 }
858
859 mask = (1ULL << address_bits) - 1;
860 mask &= ~((((u64)sizek) << 10) - 1);
861
862 base = ((u64)basek) << 10;
863
864 base |= type;
865 mask |= 0x800;
866
867 base_lo = base & ((1ULL<<32) - 1);
868 base_hi = base >> 32;
869
870 mask_lo = mask & ((1ULL<<32) - 1);
871 mask_hi = mask >> 32;
872
873 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
874}
875
876static void __init
877save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
878 unsigned char type)
879{
880 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
881 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
882 range_state[reg].type = type;
883}
884
885static void __init
886set_var_mtrr_all(unsigned int address_bits)
887{
888 unsigned long basek, sizek;
889 unsigned char type;
890 unsigned int reg;
891
892 for (reg = 0; reg < num_var_ranges; reg++) {
893 basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
894 sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
895 type = range_state[reg].type;
896
897 set_var_mtrr(reg, basek, sizek, type, address_bits);
898 }
899}
900
901static unsigned int __init
902range_to_mtrr(unsigned int reg, unsigned long range_startk,
903 unsigned long range_sizek, unsigned char type)
904{
905 if (!range_sizek || (reg >= num_var_ranges))
906 return reg;
907
908 while (range_sizek) {
909 unsigned long max_align, align;
910 unsigned long sizek;
911
912 /* Compute the maximum size I can make a range */
913 if (range_startk)
914 max_align = ffs(range_startk) - 1;
915 else
916 max_align = 32;
917 align = fls(range_sizek) - 1;
918 if (align > max_align)
919 align = max_align;
920
921 sizek = 1 << align;
922 if (debug_print)
923 printk(KERN_DEBUG "Setting variable MTRR %d, "
924 "base: %ldMB, range: %ldMB, type %s\n",
925 reg, range_startk >> 10, sizek >> 10,
926 (type == MTRR_TYPE_UNCACHABLE)?"UC":
927 ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
928 );
929 save_var_mtrr(reg++, range_startk, sizek, type);
930 range_startk += sizek;
931 range_sizek -= sizek;
932 if (reg >= num_var_ranges)
933 break;
934 }
935 return reg;
936}
937
938static unsigned __init
939range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
940 unsigned long sizek)
941{
942 unsigned long hole_basek, hole_sizek;
943 unsigned long second_basek, second_sizek;
944 unsigned long range0_basek, range0_sizek;
945 unsigned long range_basek, range_sizek;
946 unsigned long chunk_sizek;
947 unsigned long gran_sizek;
948
949 hole_basek = 0;
950 hole_sizek = 0;
951 second_basek = 0;
952 second_sizek = 0;
953 chunk_sizek = state->chunk_sizek;
954 gran_sizek = state->gran_sizek;
955
956 /* align with gran size, prevent small block used up MTRRs */
957 range_basek = ALIGN(state->range_startk, gran_sizek);
958 if ((range_basek > basek) && basek)
959 return second_sizek;
960 state->range_sizek -= (range_basek - state->range_startk);
961 range_sizek = ALIGN(state->range_sizek, gran_sizek);
962
963 while (range_sizek > state->range_sizek) {
964 range_sizek -= gran_sizek;
965 if (!range_sizek)
966 return 0;
967 }
968 state->range_sizek = range_sizek;
969
970 /* try to append some small hole */
971 range0_basek = state->range_startk;
972 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
973 if (range0_sizek == state->range_sizek) {
974 if (debug_print)
975 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
976 range0_basek<<10,
977 (range0_basek + state->range_sizek)<<10);
978 state->reg = range_to_mtrr(state->reg, range0_basek,
979 state->range_sizek, MTRR_TYPE_WRBACK);
980 return 0;
981 }
982
983 range0_sizek -= chunk_sizek;
984 if (range0_sizek && sizek) {
985 while (range0_basek + range0_sizek > (basek + sizek)) {
986 range0_sizek -= chunk_sizek;
987 if (!range0_sizek)
988 break;
989 }
990 }
991
992 if (range0_sizek) {
993 if (debug_print)
994 printk(KERN_DEBUG "range0: %016lx - %016lx\n",
995 range0_basek<<10,
996 (range0_basek + range0_sizek)<<10);
997 state->reg = range_to_mtrr(state->reg, range0_basek,
998 range0_sizek, MTRR_TYPE_WRBACK);
999
1000 }
1001
1002 range_basek = range0_basek + range0_sizek;
1003 range_sizek = chunk_sizek;
1004
1005 if (range_basek + range_sizek > basek &&
1006 range_basek + range_sizek <= (basek + sizek)) {
1007 /* one hole */
1008 second_basek = basek;
1009 second_sizek = range_basek + range_sizek - basek;
1010 }
1011
1012 /* if last piece, only could one hole near end */
1013 if ((second_basek || !basek) &&
1014 range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
1015 (chunk_sizek >> 1)) {
1016 /*
1017 * one hole in middle (second_sizek is 0) or at end
1018 * (second_sizek is 0 )
1019 */
1020 hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
1021 - second_sizek;
1022 hole_basek = range_basek + range_sizek - hole_sizek
1023 - second_sizek;
1024 } else {
1025 /* fallback for big hole, or several holes */
1026 range_sizek = state->range_sizek - range0_sizek;
1027 second_basek = 0;
1028 second_sizek = 0;
1029 }
1030
1031 if (debug_print)
1032 printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
1033 (range_basek + range_sizek)<<10);
1034 state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
1035 MTRR_TYPE_WRBACK);
1036 if (hole_sizek) {
1037 if (debug_print)
1038 printk(KERN_DEBUG "hole: %016lx - %016lx\n",
1039 hole_basek<<10, (hole_basek + hole_sizek)<<10);
1040 state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
1041 MTRR_TYPE_UNCACHABLE);
1042
1043 }
1044
1045 return second_sizek;
1046}
1047
1048static void __init
1049set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
1050 unsigned long size_pfn)
1051{
1052 unsigned long basek, sizek;
1053 unsigned long second_sizek = 0;
1054
1055 if (state->reg >= num_var_ranges)
1056 return;
1057
1058 basek = base_pfn << (PAGE_SHIFT - 10);
1059 sizek = size_pfn << (PAGE_SHIFT - 10);
1060
1061 /* See if I can merge with the last range */
1062 if ((basek <= 1024) ||
1063 (state->range_startk + state->range_sizek == basek)) {
1064 unsigned long endk = basek + sizek;
1065 state->range_sizek = endk - state->range_startk;
1066 return;
1067 }
1068 /* Write the range mtrrs */
1069 if (state->range_sizek != 0)
1070 second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
1071
1072 /* Allocate an msr */
1073 state->range_startk = basek + second_sizek;
1074 state->range_sizek = sizek - second_sizek;
1075}
1076
1077/* mininum size of mtrr block that can take hole */
1078static u64 mtrr_chunk_size __initdata = (256ULL<<20);
1079
1080static int __init parse_mtrr_chunk_size_opt(char *p)
1081{
1082 if (!p)
1083 return -EINVAL;
1084 mtrr_chunk_size = memparse(p, &p);
1085 return 0;
1086}
1087early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
1088
1089/* granity of mtrr of block */
1090static u64 mtrr_gran_size __initdata;
1091
1092static int __init parse_mtrr_gran_size_opt(char *p)
1093{
1094 if (!p)
1095 return -EINVAL;
1096 mtrr_gran_size = memparse(p, &p);
1097 return 0;
1098}
1099early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
1100
1101static int nr_mtrr_spare_reg __initdata =
1102 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
1103
1104static int __init parse_mtrr_spare_reg(char *arg)
1105{
1106 if (arg)
1107 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
1108 return 0;
1109}
1110
1111early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
1112
1113static int __init
1114x86_setup_var_mtrrs(struct res_range *range, int nr_range,
1115 u64 chunk_size, u64 gran_size)
1116{
1117 struct var_mtrr_state var_state;
1118 int i;
1119 int num_reg;
1120
1121 var_state.range_startk = 0;
1122 var_state.range_sizek = 0;
1123 var_state.reg = 0;
1124 var_state.chunk_sizek = chunk_size >> 10;
1125 var_state.gran_sizek = gran_size >> 10;
1126
1127 memset(range_state, 0, sizeof(range_state));
1128
1129 /* Write the range etc */
1130 for (i = 0; i < nr_range; i++)
1131 set_var_mtrr_range(&var_state, range[i].start,
1132 range[i].end - range[i].start + 1);
1133
1134 /* Write the last range */
1135 if (var_state.range_sizek != 0)
1136 range_to_mtrr_with_hole(&var_state, 0, 0);
1137
1138 num_reg = var_state.reg;
1139 /* Clear out the extra MTRR's */
1140 while (var_state.reg < num_var_ranges) {
1141 save_var_mtrr(var_state.reg, 0, 0, 0);
1142 var_state.reg++;
1143 }
1144
1145 return num_reg;
1146}
1147
1148struct mtrr_cleanup_result {
1149 unsigned long gran_sizek;
1150 unsigned long chunk_sizek;
1151 unsigned long lose_cover_sizek;
1152 unsigned int num_reg;
1153 int bad;
1154};
1155
1156/*
1157 * gran_size: 1M, 2M, ..., 2G
1158 * chunk size: gran_size, ..., 4G
1159 * so we need (2+13)*6
1160 */
1161#define NUM_RESULT 90
1162#define PSHIFT (PAGE_SHIFT - 10)
1163
1164static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
1165static struct res_range __initdata range_new[RANGE_NUM];
1166static unsigned long __initdata min_loss_pfn[RANGE_NUM];
1167
1168static int __init mtrr_cleanup(unsigned address_bits)
1169{
1170 unsigned long extra_remove_base, extra_remove_size;
1171 unsigned long i, base, size, def, dummy;
1172 mtrr_type type;
1173 int nr_range, nr_range_new;
1174 u64 chunk_size, gran_size;
1175 unsigned long range_sums, range_sums_new;
1176 int index_good;
1177 int num_reg_good;
1178
1179 /* extra one for all 0 */
1180 int num[MTRR_NUM_TYPES + 1];
1181
1182 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
1183 return 0;
1184 rdmsr(MTRRdefType_MSR, def, dummy);
1185 def &= 0xff;
1186 if (def != MTRR_TYPE_UNCACHABLE)
1187 return 0;
1188
1189 /* get it and store it aside */
1190 memset(range_state, 0, sizeof(range_state));
1191 for (i = 0; i < num_var_ranges; i++) {
1192 mtrr_if->get(i, &base, &size, &type);
1193 range_state[i].base_pfn = base;
1194 range_state[i].size_pfn = size;
1195 range_state[i].type = type;
1196 }
1197
1198 /* check entries number */
1199 memset(num, 0, sizeof(num));
1200 for (i = 0; i < num_var_ranges; i++) {
1201 type = range_state[i].type;
1202 size = range_state[i].size_pfn;
1203 if (type >= MTRR_NUM_TYPES)
1204 continue;
1205 if (!size)
1206 type = MTRR_NUM_TYPES;
1207 num[type]++;
1208 }
1209
1210 /* check if we got UC entries */
1211 if (!num[MTRR_TYPE_UNCACHABLE])
1212 return 0;
1213
1214 /* check if we only had WB and UC */
1215 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1216 num_var_ranges - num[MTRR_NUM_TYPES])
1217 return 0;
1218
1219 memset(range, 0, sizeof(range));
1220 extra_remove_size = 0;
1221 if (mtrr_tom2) {
1222 extra_remove_base = 1 << (32 - PAGE_SHIFT);
1223 extra_remove_size =
1224 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
1225 }
1226 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
1227 extra_remove_size);
1228 range_sums = sum_ranges(range, nr_range);
1229 printk(KERN_INFO "total RAM coverred: %ldM\n",
1230 range_sums >> (20 - PAGE_SHIFT));
1231
1232 if (mtrr_chunk_size && mtrr_gran_size) {
1233 int num_reg;
1234
1235 debug_print = 1;
1236 /* convert ranges to var ranges state */
1237 num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
1238 mtrr_gran_size);
1239
1240 /* we got new setting in range_state, check it */
1241 memset(range_new, 0, sizeof(range_new));
1242 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1243 extra_remove_base,
1244 extra_remove_size);
1245 range_sums_new = sum_ranges(range_new, nr_range_new);
1246
1247 i = 0;
1248 result[i].chunk_sizek = mtrr_chunk_size >> 10;
1249 result[i].gran_sizek = mtrr_gran_size >> 10;
1250 result[i].num_reg = num_reg;
1251 if (range_sums < range_sums_new) {
1252 result[i].lose_cover_sizek =
1253 (range_sums_new - range_sums) << PSHIFT;
1254 result[i].bad = 1;
1255 } else
1256 result[i].lose_cover_sizek =
1257 (range_sums - range_sums_new) << PSHIFT;
1258
1259 printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
1260 result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
1261 result[i].chunk_sizek >> 10);
1262 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n",
1263 result[i].num_reg, result[i].bad?"-":"",
1264 result[i].lose_cover_sizek >> 10);
1265 if (!result[i].bad) {
1266 set_var_mtrr_all(address_bits);
1267 return 1;
1268 }
1269 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
1270 "will find optimal one\n");
1271 debug_print = 0;
1272 memset(result, 0, sizeof(result[0]));
1273 }
1274
1275 i = 0;
1276 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
1277 memset(result, 0, sizeof(result));
1278 for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
1279 for (chunk_size = gran_size; chunk_size < (1ULL<<33);
1280 chunk_size <<= 1) {
1281 int num_reg;
1282
1283 if (debug_print)
1284 printk(KERN_INFO
1285 "\ngran_size: %lldM chunk_size_size: %lldM\n",
1286 gran_size >> 20, chunk_size >> 20);
1287 if (i >= NUM_RESULT)
1288 continue;
1289
1290 /* convert ranges to var ranges state */
1291 num_reg = x86_setup_var_mtrrs(range, nr_range,
1292 chunk_size, gran_size);
1293
1294 /* we got new setting in range_state, check it */
1295 memset(range_new, 0, sizeof(range_new));
1296 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1297 extra_remove_base, extra_remove_size);
1298 range_sums_new = sum_ranges(range_new, nr_range_new);
1299
1300 result[i].chunk_sizek = chunk_size >> 10;
1301 result[i].gran_sizek = gran_size >> 10;
1302 result[i].num_reg = num_reg;
1303 if (range_sums < range_sums_new) {
1304 result[i].lose_cover_sizek =
1305 (range_sums_new - range_sums) << PSHIFT;
1306 result[i].bad = 1;
1307 } else
1308 result[i].lose_cover_sizek =
1309 (range_sums - range_sums_new) << PSHIFT;
1310
1311 /* double check it */
1312 if (!result[i].bad && !result[i].lose_cover_sizek) {
1313 if (nr_range_new != nr_range ||
1314 memcmp(range, range_new, sizeof(range)))
1315 result[i].bad = 1;
1316 }
1317
1318 if (!result[i].bad && (range_sums - range_sums_new <
1319 min_loss_pfn[num_reg])) {
1320 min_loss_pfn[num_reg] =
1321 range_sums - range_sums_new;
1322 }
1323 i++;
1324 }
1325 }
1326
1327 /* print out all */
1328 for (i = 0; i < NUM_RESULT; i++) {
1329 printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
1330 result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
1331 result[i].chunk_sizek >> 10);
1332 printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
1333 result[i].num_reg, result[i].bad?"-":"",
1334 result[i].lose_cover_sizek >> 10);
1335 }
1336
1337 /* try to find the optimal index */
1338 if (nr_mtrr_spare_reg >= num_var_ranges)
1339 nr_mtrr_spare_reg = num_var_ranges - 1;
1340 num_reg_good = -1;
1341 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
1342 if (!min_loss_pfn[i]) {
1343 num_reg_good = i;
1344 break;
1345 }
1346 }
1347
1348 index_good = -1;
1349 if (num_reg_good != -1) {
1350 for (i = 0; i < NUM_RESULT; i++) {
1351 if (!result[i].bad &&
1352 result[i].num_reg == num_reg_good &&
1353 !result[i].lose_cover_sizek) {
1354 index_good = i;
1355 break;
1356 }
1357 }
1358 }
1359
1360 if (index_good != -1) {
1361 printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
1362 i = index_good;
1363 printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
1364 result[i].gran_sizek >> 10,
1365 result[i].chunk_sizek >> 10);
1366 printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
1367 result[i].num_reg,
1368 result[i].lose_cover_sizek >> 10);
1369 /* convert ranges to var ranges state */
1370 chunk_size = result[i].chunk_sizek;
1371 chunk_size <<= 10;
1372 gran_size = result[i].gran_sizek;
1373 gran_size <<= 10;
1374 debug_print = 1;
1375 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
1376 set_var_mtrr_all(address_bits);
1377 return 1;
1378 }
1379
1380 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
1381 printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
1382
1383 return 0;
1384}
1385#else
1386static int __init mtrr_cleanup(unsigned address_bits)
1387{
1388 return 0;
1389}
1390#endif
1391
1392static int __initdata changed_by_mtrr_cleanup;
1393
612static int disable_mtrr_trim; 1394static int disable_mtrr_trim;
613 1395
614static int __init disable_mtrr_trim_setup(char *str) 1396static int __init disable_mtrr_trim_setup(char *str)
@@ -648,6 +1430,19 @@ int __init amd_special_default_mtrr(void)
648 return 0; 1430 return 0;
649} 1431}
650 1432
1433static u64 __init real_trim_memory(unsigned long start_pfn,
1434 unsigned long limit_pfn)
1435{
1436 u64 trim_start, trim_size;
1437 trim_start = start_pfn;
1438 trim_start <<= PAGE_SHIFT;
1439 trim_size = limit_pfn;
1440 trim_size <<= PAGE_SHIFT;
1441 trim_size -= trim_start;
1442
1443 return e820_update_range(trim_start, trim_size, E820_RAM,
1444 E820_RESERVED);
1445}
651/** 1446/**
652 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs 1447 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
653 * @end_pfn: ending page frame number 1448 * @end_pfn: ending page frame number
@@ -663,8 +1458,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
663{ 1458{
664 unsigned long i, base, size, highest_pfn = 0, def, dummy; 1459 unsigned long i, base, size, highest_pfn = 0, def, dummy;
665 mtrr_type type; 1460 mtrr_type type;
666 u64 trim_start, trim_size; 1461 int nr_range;
1462 u64 total_trim_size;
667 1463
1464 /* extra one for all 0 */
1465 int num[MTRR_NUM_TYPES + 1];
668 /* 1466 /*
669 * Make sure we only trim uncachable memory on machines that 1467 * Make sure we only trim uncachable memory on machines that
670 * support the Intel MTRR architecture: 1468 * support the Intel MTRR architecture:
@@ -676,14 +1474,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
676 if (def != MTRR_TYPE_UNCACHABLE) 1474 if (def != MTRR_TYPE_UNCACHABLE)
677 return 0; 1475 return 0;
678 1476
679 if (amd_special_default_mtrr()) 1477 /* get it and store it aside */
680 return 0; 1478 memset(range_state, 0, sizeof(range_state));
1479 for (i = 0; i < num_var_ranges; i++) {
1480 mtrr_if->get(i, &base, &size, &type);
1481 range_state[i].base_pfn = base;
1482 range_state[i].size_pfn = size;
1483 range_state[i].type = type;
1484 }
681 1485
682 /* Find highest cached pfn */ 1486 /* Find highest cached pfn */
683 for (i = 0; i < num_var_ranges; i++) { 1487 for (i = 0; i < num_var_ranges; i++) {
684 mtrr_if->get(i, &base, &size, &type); 1488 type = range_state[i].type;
685 if (type != MTRR_TYPE_WRBACK) 1489 if (type != MTRR_TYPE_WRBACK)
686 continue; 1490 continue;
1491 base = range_state[i].base_pfn;
1492 size = range_state[i].size_pfn;
687 if (highest_pfn < base + size) 1493 if (highest_pfn < base + size)
688 highest_pfn = base + size; 1494 highest_pfn = base + size;
689 } 1495 }
@@ -698,22 +1504,65 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
698 return 0; 1504 return 0;
699 } 1505 }
700 1506
701 if (highest_pfn < end_pfn) { 1507 /* check entries number */
1508 memset(num, 0, sizeof(num));
1509 for (i = 0; i < num_var_ranges; i++) {
1510 type = range_state[i].type;
1511 if (type >= MTRR_NUM_TYPES)
1512 continue;
1513 size = range_state[i].size_pfn;
1514 if (!size)
1515 type = MTRR_NUM_TYPES;
1516 num[type]++;
1517 }
1518
1519 /* no entry for WB? */
1520 if (!num[MTRR_TYPE_WRBACK])
1521 return 0;
1522
1523 /* check if we only had WB and UC */
1524 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1525 num_var_ranges - num[MTRR_NUM_TYPES])
1526 return 0;
1527
1528 memset(range, 0, sizeof(range));
1529 nr_range = 0;
1530 if (mtrr_tom2) {
1531 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1532 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
1533 if (highest_pfn < range[nr_range].end + 1)
1534 highest_pfn = range[nr_range].end + 1;
1535 nr_range++;
1536 }
1537 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1538
1539 total_trim_size = 0;
1540 /* check the head */
1541 if (range[0].start)
1542 total_trim_size += real_trim_memory(0, range[0].start);
1543 /* check the holes */
1544 for (i = 0; i < nr_range - 1; i++) {
1545 if (range[i].end + 1 < range[i+1].start)
1546 total_trim_size += real_trim_memory(range[i].end + 1,
1547 range[i+1].start);
1548 }
1549 /* check the top */
1550 i = nr_range - 1;
1551 if (range[i].end + 1 < end_pfn)
1552 total_trim_size += real_trim_memory(range[i].end + 1,
1553 end_pfn);
1554
1555 if (total_trim_size) {
702 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" 1556 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
703 " all of memory, losing %luMB of RAM.\n", 1557 " all of memory, losing %lluMB of RAM.\n",
704 (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); 1558 total_trim_size >> 20);
705 1559
706 WARN_ON(1); 1560 if (!changed_by_mtrr_cleanup)
1561 WARN_ON(1);
707 1562
708 printk(KERN_INFO "update e820 for mtrr\n"); 1563 printk(KERN_INFO "update e820 for mtrr\n");
709 trim_start = highest_pfn;
710 trim_start <<= PAGE_SHIFT;
711 trim_size = end_pfn;
712 trim_size <<= PAGE_SHIFT;
713 trim_size -= trim_start;
714 update_memory_range(trim_start, trim_size, E820_RAM,
715 E820_RESERVED);
716 update_e820(); 1564 update_e820();
1565
717 return 1; 1566 return 1;
718 } 1567 }
719 1568
@@ -729,18 +1578,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
729 */ 1578 */
730void __init mtrr_bp_init(void) 1579void __init mtrr_bp_init(void)
731{ 1580{
1581 u32 phys_addr;
732 init_ifs(); 1582 init_ifs();
733 1583
1584 phys_addr = 32;
1585
734 if (cpu_has_mtrr) { 1586 if (cpu_has_mtrr) {
735 mtrr_if = &generic_mtrr_ops; 1587 mtrr_if = &generic_mtrr_ops;
736 size_or_mask = 0xff000000; /* 36 bits */ 1588 size_or_mask = 0xff000000; /* 36 bits */
737 size_and_mask = 0x00f00000; 1589 size_and_mask = 0x00f00000;
1590 phys_addr = 36;
738 1591
739 /* This is an AMD specific MSR, but we assume(hope?) that 1592 /* This is an AMD specific MSR, but we assume(hope?) that
740 Intel will implement it to when they extend the address 1593 Intel will implement it to when they extend the address
741 bus of the Xeon. */ 1594 bus of the Xeon. */
742 if (cpuid_eax(0x80000000) >= 0x80000008) { 1595 if (cpuid_eax(0x80000000) >= 0x80000008) {
743 u32 phys_addr;
744 phys_addr = cpuid_eax(0x80000008) & 0xff; 1596 phys_addr = cpuid_eax(0x80000008) & 0xff;
745 /* CPUID workaround for Intel 0F33/0F34 CPU */ 1597 /* CPUID workaround for Intel 0F33/0F34 CPU */
746 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 1598 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1610,7 @@ void __init mtrr_bp_init(void)
758 don't support PAE */ 1610 don't support PAE */
759 size_or_mask = 0xfff00000; /* 32 bits */ 1611 size_or_mask = 0xfff00000; /* 32 bits */
760 size_and_mask = 0; 1612 size_and_mask = 0;
1613 phys_addr = 32;
761 } 1614 }
762 } else { 1615 } else {
763 switch (boot_cpu_data.x86_vendor) { 1616 switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1644,15 @@ void __init mtrr_bp_init(void)
791 if (mtrr_if) { 1644 if (mtrr_if) {
792 set_num_var_ranges(); 1645 set_num_var_ranges();
793 init_table(); 1646 init_table();
794 if (use_intel()) 1647 if (use_intel()) {
795 get_mtrr_state(); 1648 get_mtrr_state();
1649
1650 if (mtrr_cleanup(phys_addr)) {
1651 changed_by_mtrr_cleanup = 1;
1652 mtrr_if->set_all();
1653 }
1654
1655 }
796 } 1656 }
797} 1657}
798 1658
@@ -822,16 +1682,17 @@ void mtrr_ap_init(void)
822 */ 1682 */
823void mtrr_save_state(void) 1683void mtrr_save_state(void)
824{ 1684{
825 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); 1685 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
826} 1686}
827 1687
828static int __init mtrr_init_finialize(void) 1688static int __init mtrr_init_finialize(void)
829{ 1689{
830 if (!mtrr_if) 1690 if (!mtrr_if)
831 return 0; 1691 return 0;
832 if (use_intel()) 1692 if (use_intel()) {
833 mtrr_state_warn(); 1693 if (!changed_by_mtrr_cleanup)
834 else { 1694 mtrr_state_warn();
1695 } else {
835 /* The CPUs haven't MTRR and seem to not support SMP. They have 1696 /* The CPUs haven't MTRR and seem to not support SMP. They have
836 * specific drivers, we use a tricky method to support 1697 * specific drivers, we use a tricky method to support
837 * suspend/resume for them. 1698 * suspend/resume for them.
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2cc77eb6fea3..2dc4ec656b23 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt);
81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); 81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); 82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
83 83
84void fill_mtrr_var_range(unsigned int index,
85 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
84void get_mtrr_state(void); 86void get_mtrr_state(void);
85 87
86extern void set_mtrr_ops(struct mtrr_ops * ops); 88extern void set_mtrr_ops(struct mtrr_ops * ops);
@@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if;
92#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) 94#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
93 95
94extern unsigned int num_var_ranges; 96extern unsigned int num_var_ranges;
97extern u64 mtrr_tom2;
95 98
96void mtrr_state_warn(void); 99void mtrr_state_warn(void);
97const char *mtrr_attrib_to_str(int x); 100const char *mtrr_attrib_to_str(int x);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f9ae93adffe5..de7439f82b92 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -1,11 +1,15 @@
1/* local apic based NMI watchdog for various CPUs. 1/*
2 This file also handles reservation of performance counters for coordination 2 * local apic based NMI watchdog for various CPUs.
3 with other users (like oprofile). 3 *
4 4 * This file also handles reservation of performance counters for coordination
5 Note that these events normally don't tick when the CPU idles. This means 5 * with other users (like oprofile).
6 the frequency varies with CPU load. 6 *
7 7 * Note that these events normally don't tick when the CPU idles. This means
8 Original code for K7/P6 written by Keith Owens */ 8 * the frequency varies with CPU load.
9 *
10 * Original code for K7/P6 written by Keith Owens
11 *
12 */
9 13
10#include <linux/percpu.h> 14#include <linux/percpu.h>
11#include <linux/module.h> 15#include <linux/module.h>
@@ -36,12 +40,16 @@ struct wd_ops {
36 40
37static const struct wd_ops *wd_ops; 41static const struct wd_ops *wd_ops;
38 42
39/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 43/*
40 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) 44 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
45 * offset from MSR_P4_BSU_ESCR0.
46 *
47 * It will be the max for all platforms (for now)
41 */ 48 */
42#define NMI_MAX_COUNTER_BITS 66 49#define NMI_MAX_COUNTER_BITS 66
43 50
44/* perfctr_nmi_owner tracks the ownership of the perfctr registers: 51/*
52 * perfctr_nmi_owner tracks the ownership of the perfctr registers:
45 * evtsel_nmi_owner tracks the ownership of the event selection 53 * evtsel_nmi_owner tracks the ownership of the event selection
46 * - different performance counters/ event selection may be reserved for 54 * - different performance counters/ event selection may be reserved for
47 * different subsystems this reservation system just tries to coordinate 55 * different subsystems this reservation system just tries to coordinate
@@ -73,8 +81,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
73 return 0; 81 return 0;
74} 82}
75 83
76/* converts an msr to an appropriate reservation bit */ 84/*
77/* returns the bit offset of the event selection register */ 85 * converts an msr to an appropriate reservation bit
86 * returns the bit offset of the event selection register
87 */
78static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) 88static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
79{ 89{
80 /* returns the bit offset of the event selection register */ 90 /* returns the bit offset of the event selection register */
@@ -114,6 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
114 124
115 return (!test_bit(counter, perfctr_nmi_owner)); 125 return (!test_bit(counter, perfctr_nmi_owner));
116} 126}
127EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
117 128
118int reserve_perfctr_nmi(unsigned int msr) 129int reserve_perfctr_nmi(unsigned int msr)
119{ 130{
@@ -128,6 +139,7 @@ int reserve_perfctr_nmi(unsigned int msr)
128 return 1; 139 return 1;
129 return 0; 140 return 0;
130} 141}
142EXPORT_SYMBOL(reserve_perfctr_nmi);
131 143
132void release_perfctr_nmi(unsigned int msr) 144void release_perfctr_nmi(unsigned int msr)
133{ 145{
@@ -140,6 +152,7 @@ void release_perfctr_nmi(unsigned int msr)
140 152
141 clear_bit(counter, perfctr_nmi_owner); 153 clear_bit(counter, perfctr_nmi_owner);
142} 154}
155EXPORT_SYMBOL(release_perfctr_nmi);
143 156
144int reserve_evntsel_nmi(unsigned int msr) 157int reserve_evntsel_nmi(unsigned int msr)
145{ 158{
@@ -154,6 +167,7 @@ int reserve_evntsel_nmi(unsigned int msr)
154 return 1; 167 return 1;
155 return 0; 168 return 0;
156} 169}
170EXPORT_SYMBOL(reserve_evntsel_nmi);
157 171
158void release_evntsel_nmi(unsigned int msr) 172void release_evntsel_nmi(unsigned int msr)
159{ 173{
@@ -166,11 +180,6 @@ void release_evntsel_nmi(unsigned int msr)
166 180
167 clear_bit(counter, evntsel_nmi_owner); 181 clear_bit(counter, evntsel_nmi_owner);
168} 182}
169
170EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
171EXPORT_SYMBOL(reserve_perfctr_nmi);
172EXPORT_SYMBOL(release_perfctr_nmi);
173EXPORT_SYMBOL(reserve_evntsel_nmi);
174EXPORT_SYMBOL(release_evntsel_nmi); 183EXPORT_SYMBOL(release_evntsel_nmi);
175 184
176void disable_lapic_nmi_watchdog(void) 185void disable_lapic_nmi_watchdog(void)
@@ -180,8 +189,10 @@ void disable_lapic_nmi_watchdog(void)
180 if (atomic_read(&nmi_active) <= 0) 189 if (atomic_read(&nmi_active) <= 0)
181 return; 190 return;
182 191
183 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); 192 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
184 wd_ops->unreserve(); 193
194 if (wd_ops)
195 wd_ops->unreserve();
185 196
186 BUG_ON(atomic_read(&nmi_active) != 0); 197 BUG_ON(atomic_read(&nmi_active) != 0);
187} 198}
@@ -202,7 +213,7 @@ void enable_lapic_nmi_watchdog(void)
202 return; 213 return;
203 } 214 }
204 215
205 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); 216 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
206 touch_nmi_watchdog(); 217 touch_nmi_watchdog();
207} 218}
208 219
@@ -232,31 +243,32 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
232 return retval; 243 return retval;
233} 244}
234 245
235static void 246static void write_watchdog_counter(unsigned int perfctr_msr,
236write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) 247 const char *descr, unsigned nmi_hz)
237{ 248{
238 u64 count = (u64)cpu_khz * 1000; 249 u64 count = (u64)cpu_khz * 1000;
239 250
240 do_div(count, nmi_hz); 251 do_div(count, nmi_hz);
241 if(descr) 252 if(descr)
242 Dprintk("setting %s to -0x%08Lx\n", descr, count); 253 pr_debug("setting %s to -0x%08Lx\n", descr, count);
243 wrmsrl(perfctr_msr, 0 - count); 254 wrmsrl(perfctr_msr, 0 - count);
244} 255}
245 256
246static void write_watchdog_counter32(unsigned int perfctr_msr, 257static void write_watchdog_counter32(unsigned int perfctr_msr,
247 const char *descr, unsigned nmi_hz) 258 const char *descr, unsigned nmi_hz)
248{ 259{
249 u64 count = (u64)cpu_khz * 1000; 260 u64 count = (u64)cpu_khz * 1000;
250 261
251 do_div(count, nmi_hz); 262 do_div(count, nmi_hz);
252 if(descr) 263 if(descr)
253 Dprintk("setting %s to -0x%08Lx\n", descr, count); 264 pr_debug("setting %s to -0x%08Lx\n", descr, count);
254 wrmsr(perfctr_msr, (u32)(-count), 0); 265 wrmsr(perfctr_msr, (u32)(-count), 0);
255} 266}
256 267
257/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface 268/*
258 nicely stable so there is not much variety */ 269 * AMD K7/K8/Family10h/Family11h support.
259 270 * AMD keeps this interface nicely stable so there is not much variety
271 */
260#define K7_EVNTSEL_ENABLE (1 << 22) 272#define K7_EVNTSEL_ENABLE (1 << 22)
261#define K7_EVNTSEL_INT (1 << 20) 273#define K7_EVNTSEL_INT (1 << 20)
262#define K7_EVNTSEL_OS (1 << 17) 274#define K7_EVNTSEL_OS (1 << 17)
@@ -289,7 +301,7 @@ static int setup_k7_watchdog(unsigned nmi_hz)
289 301
290 wd->perfctr_msr = perfctr_msr; 302 wd->perfctr_msr = perfctr_msr;
291 wd->evntsel_msr = evntsel_msr; 303 wd->evntsel_msr = evntsel_msr;
292 wd->cccr_msr = 0; //unused 304 wd->cccr_msr = 0; /* unused */
293 return 1; 305 return 1;
294} 306}
295 307
@@ -325,18 +337,19 @@ static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
325} 337}
326 338
327static const struct wd_ops k7_wd_ops = { 339static const struct wd_ops k7_wd_ops = {
328 .reserve = single_msr_reserve, 340 .reserve = single_msr_reserve,
329 .unreserve = single_msr_unreserve, 341 .unreserve = single_msr_unreserve,
330 .setup = setup_k7_watchdog, 342 .setup = setup_k7_watchdog,
331 .rearm = single_msr_rearm, 343 .rearm = single_msr_rearm,
332 .stop = single_msr_stop_watchdog, 344 .stop = single_msr_stop_watchdog,
333 .perfctr = MSR_K7_PERFCTR0, 345 .perfctr = MSR_K7_PERFCTR0,
334 .evntsel = MSR_K7_EVNTSEL0, 346 .evntsel = MSR_K7_EVNTSEL0,
335 .checkbit = 1ULL<<47, 347 .checkbit = 1ULL << 47,
336}; 348};
337 349
338/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ 350/*
339 351 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
352 */
340#define P6_EVNTSEL0_ENABLE (1 << 22) 353#define P6_EVNTSEL0_ENABLE (1 << 22)
341#define P6_EVNTSEL_INT (1 << 20) 354#define P6_EVNTSEL_INT (1 << 20)
342#define P6_EVNTSEL_OS (1 << 17) 355#define P6_EVNTSEL_OS (1 << 17)
@@ -372,52 +385,58 @@ static int setup_p6_watchdog(unsigned nmi_hz)
372 385
373 wd->perfctr_msr = perfctr_msr; 386 wd->perfctr_msr = perfctr_msr;
374 wd->evntsel_msr = evntsel_msr; 387 wd->evntsel_msr = evntsel_msr;
375 wd->cccr_msr = 0; //unused 388 wd->cccr_msr = 0; /* unused */
376 return 1; 389 return 1;
377} 390}
378 391
379static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 392static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
380{ 393{
381 /* P6 based Pentium M need to re-unmask 394 /*
395 * P6 based Pentium M need to re-unmask
382 * the apic vector but it doesn't hurt 396 * the apic vector but it doesn't hurt
383 * other P6 variant. 397 * other P6 variant.
384 * ArchPerfom/Core Duo also needs this */ 398 * ArchPerfom/Core Duo also needs this
399 */
385 apic_write(APIC_LVTPC, APIC_DM_NMI); 400 apic_write(APIC_LVTPC, APIC_DM_NMI);
401
386 /* P6/ARCH_PERFMON has 32 bit counter write */ 402 /* P6/ARCH_PERFMON has 32 bit counter write */
387 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); 403 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
388} 404}
389 405
390static const struct wd_ops p6_wd_ops = { 406static const struct wd_ops p6_wd_ops = {
391 .reserve = single_msr_reserve, 407 .reserve = single_msr_reserve,
392 .unreserve = single_msr_unreserve, 408 .unreserve = single_msr_unreserve,
393 .setup = setup_p6_watchdog, 409 .setup = setup_p6_watchdog,
394 .rearm = p6_rearm, 410 .rearm = p6_rearm,
395 .stop = single_msr_stop_watchdog, 411 .stop = single_msr_stop_watchdog,
396 .perfctr = MSR_P6_PERFCTR0, 412 .perfctr = MSR_P6_PERFCTR0,
397 .evntsel = MSR_P6_EVNTSEL0, 413 .evntsel = MSR_P6_EVNTSEL0,
398 .checkbit = 1ULL<<39, 414 .checkbit = 1ULL << 39,
399}; 415};
400 416
401/* Intel P4 performance counters. By far the most complicated of all. */ 417/*
402 418 * Intel P4 performance counters.
403#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 419 * By far the most complicated of all.
404#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) 420 */
405#define P4_ESCR_OS (1<<3) 421#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
406#define P4_ESCR_USR (1<<2) 422#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
407#define P4_CCCR_OVF_PMI0 (1<<26) 423#define P4_ESCR_OS (1 << 3)
408#define P4_CCCR_OVF_PMI1 (1<<27) 424#define P4_ESCR_USR (1 << 2)
409#define P4_CCCR_THRESHOLD(N) ((N)<<20) 425#define P4_CCCR_OVF_PMI0 (1 << 26)
410#define P4_CCCR_COMPLEMENT (1<<19) 426#define P4_CCCR_OVF_PMI1 (1 << 27)
411#define P4_CCCR_COMPARE (1<<18) 427#define P4_CCCR_THRESHOLD(N) ((N) << 20)
412#define P4_CCCR_REQUIRED (3<<16) 428#define P4_CCCR_COMPLEMENT (1 << 19)
413#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) 429#define P4_CCCR_COMPARE (1 << 18)
414#define P4_CCCR_ENABLE (1<<12) 430#define P4_CCCR_REQUIRED (3 << 16)
415#define P4_CCCR_OVF (1<<31) 431#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
416 432#define P4_CCCR_ENABLE (1 << 12)
417/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter 433#define P4_CCCR_OVF (1 << 31)
418 CRU_ESCR0 (with any non-null event selector) through a complemented
419 max threshold. [IA32-Vol3, Section 14.9.9] */
420 434
435/*
436 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
437 * CRU_ESCR0 (with any non-null event selector) through a complemented
438 * max threshold. [IA32-Vol3, Section 14.9.9]
439 */
421static int setup_p4_watchdog(unsigned nmi_hz) 440static int setup_p4_watchdog(unsigned nmi_hz)
422{ 441{
423 unsigned int perfctr_msr, evntsel_msr, cccr_msr; 442 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
@@ -442,7 +461,8 @@ static int setup_p4_watchdog(unsigned nmi_hz)
442#endif 461#endif
443 ht_num = 0; 462 ht_num = 0;
444 463
445 /* performance counters are shared resources 464 /*
465 * performance counters are shared resources
446 * assign each hyperthread its own set 466 * assign each hyperthread its own set
447 * (re-use the ESCR0 register, seems safe 467 * (re-use the ESCR0 register, seems safe
448 * and keeps the cccr_val the same) 468 * and keeps the cccr_val the same)
@@ -540,20 +560,21 @@ static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
540} 560}
541 561
542static const struct wd_ops p4_wd_ops = { 562static const struct wd_ops p4_wd_ops = {
543 .reserve = p4_reserve, 563 .reserve = p4_reserve,
544 .unreserve = p4_unreserve, 564 .unreserve = p4_unreserve,
545 .setup = setup_p4_watchdog, 565 .setup = setup_p4_watchdog,
546 .rearm = p4_rearm, 566 .rearm = p4_rearm,
547 .stop = stop_p4_watchdog, 567 .stop = stop_p4_watchdog,
548 /* RED-PEN this is wrong for the other sibling */ 568 /* RED-PEN this is wrong for the other sibling */
549 .perfctr = MSR_P4_BPU_PERFCTR0, 569 .perfctr = MSR_P4_BPU_PERFCTR0,
550 .evntsel = MSR_P4_BSU_ESCR0, 570 .evntsel = MSR_P4_BSU_ESCR0,
551 .checkbit = 1ULL<<39, 571 .checkbit = 1ULL << 39,
552}; 572};
553 573
554/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully 574/*
555 all future Intel CPUs. */ 575 * Watchdog using the Intel architected PerfMon.
556 576 * Used for Core2 and hopefully all future Intel CPUs.
577 */
557#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 578#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
558#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK 579#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
559 580
@@ -599,19 +620,19 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
599 620
600 wd->perfctr_msr = perfctr_msr; 621 wd->perfctr_msr = perfctr_msr;
601 wd->evntsel_msr = evntsel_msr; 622 wd->evntsel_msr = evntsel_msr;
602 wd->cccr_msr = 0; //unused 623 wd->cccr_msr = 0; /* unused */
603 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); 624 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
604 return 1; 625 return 1;
605} 626}
606 627
607static struct wd_ops intel_arch_wd_ops __read_mostly = { 628static struct wd_ops intel_arch_wd_ops __read_mostly = {
608 .reserve = single_msr_reserve, 629 .reserve = single_msr_reserve,
609 .unreserve = single_msr_unreserve, 630 .unreserve = single_msr_unreserve,
610 .setup = setup_intel_arch_watchdog, 631 .setup = setup_intel_arch_watchdog,
611 .rearm = p6_rearm, 632 .rearm = p6_rearm,
612 .stop = single_msr_stop_watchdog, 633 .stop = single_msr_stop_watchdog,
613 .perfctr = MSR_ARCH_PERFMON_PERFCTR1, 634 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
614 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, 635 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
615}; 636};
616 637
617static void probe_nmi_watchdog(void) 638static void probe_nmi_watchdog(void)
@@ -624,8 +645,10 @@ static void probe_nmi_watchdog(void)
624 wd_ops = &k7_wd_ops; 645 wd_ops = &k7_wd_ops;
625 break; 646 break;
626 case X86_VENDOR_INTEL: 647 case X86_VENDOR_INTEL:
627 /* Work around Core Duo (Yonah) errata AE49 where perfctr1 648 /*
628 doesn't have a working enable bit. */ 649 * Work around Core Duo (Yonah) errata AE49 where perfctr1
650 * doesn't have a working enable bit.
651 */
629 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { 652 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
630 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; 653 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
631 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; 654 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
@@ -636,7 +659,7 @@ static void probe_nmi_watchdog(void)
636 } 659 }
637 switch (boot_cpu_data.x86) { 660 switch (boot_cpu_data.x86) {
638 case 6: 661 case 6:
639 if (boot_cpu_data.x86_model > 0xd) 662 if (boot_cpu_data.x86_model > 13)
640 return; 663 return;
641 664
642 wd_ops = &p6_wd_ops; 665 wd_ops = &p6_wd_ops;
@@ -697,10 +720,11 @@ int lapic_wd_event(unsigned nmi_hz)
697{ 720{
698 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 721 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
699 u64 ctr; 722 u64 ctr;
723
700 rdmsrl(wd->perfctr_msr, ctr); 724 rdmsrl(wd->perfctr_msr, ctr);
701 if (ctr & wd_ops->checkbit) { /* perfctr still running? */ 725 if (ctr & wd_ops->checkbit) /* perfctr still running? */
702 return 0; 726 return 0;
703 } 727
704 wd_ops->rearm(wd, nmi_hz); 728 wd_ops->rearm(wd, nmi_hz);
705 return 1; 729 return 1;
706} 730}
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 0d0d9057e7c0..a26c480b9491 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -160,7 +160,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
160{ 160{
161 if (*pos == 0) /* just in case, cpu 0 is not the first */ 161 if (*pos == 0) /* just in case, cpu 0 is not the first */
162 *pos = first_cpu(cpu_online_map); 162 *pos = first_cpu(cpu_online_map);
163 if ((*pos) < NR_CPUS && cpu_online(*pos)) 163 if ((*pos) < nr_cpu_ids && cpu_online(*pos))
164 return &cpu_data(*pos); 164 return &cpu_data(*pos);
165 return NULL; 165 return NULL;
166} 166}