aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c8
-rw-r--r--arch/x86/kernel/apic_32.c437
-rw-r--r--arch/x86/kernel/apic_64.c626
-rw-r--r--arch/x86/kernel/cpu/Makefile34
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c88
-rw-r--r--arch/x86/kernel/cpu/amd.c548
-rw-r--r--arch/x86/kernel/cpu/amd_64.c224
-rw-r--r--arch/x86/kernel/cpu/centaur.c4
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c6
-rw-r--r--arch/x86/kernel/cpu/cmpxchg.c72
-rw-r--r--arch/x86/kernel/cpu/common.c973
-rw-r--r--arch/x86/kernel/cpu/common_64.c763
-rw-r--r--arch/x86/kernel/cpu/cpu.h19
-rw-r--r--arch/x86/kernel/cpu/cyrix.c23
-rw-r--r--arch/x86/kernel/cpu/feature_names.c84
-rw-r--r--arch/x86/kernel/cpu/intel.c364
-rw-r--r--arch/x86/kernel/cpu/intel_64.c95
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c169
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c2
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl32
-rw-r--r--arch/x86/kernel/cpu/powerflags.c20
-rw-r--r--arch/x86/kernel/cpu/transmeta.c32
-rw-r--r--arch/x86/kernel/cpu/umc.c3
-rw-r--r--arch/x86/kernel/e820.c28
-rw-r--r--arch/x86/kernel/es7000_32.c345
-rw-r--r--arch/x86/kernel/genapic_64.c88
-rw-r--r--arch/x86/kernel/genapic_flat_64.c62
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c159
-rw-r--r--arch/x86/kernel/genx2apic_phys.c154
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c70
-rw-r--r--arch/x86/kernel/i387.c154
-rw-r--r--arch/x86/kernel/i8259.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c47
-rw-r--r--arch/x86/kernel/io_apic_64.c639
-rw-r--r--arch/x86/kernel/irqinit_32.c49
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/process.c1
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/sigframe.h14
-rw-r--r--arch/x86/kernel/signal_32.c45
-rw-r--r--arch/x86/kernel/signal_64.c95
-rw-r--r--arch/x86/kernel/smpboot.c38
-rw-r--r--arch/x86/kernel/summit_32.c2
-rw-r--r--arch/x86/kernel/traps_32.c1
-rw-r--r--arch/x86/kernel/traps_64.c6
-rw-r--r--arch/x86/kernel/vmi_32.c4
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S9
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S9
-rw-r--r--arch/x86/kernel/xsave.c316
52 files changed, 4536 insertions, 2467 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec5..c9be69fedb70 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o
38 38
39obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 39obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
40obj-y += process.o 40obj-y += process.o
41obj-y += i387.o 41obj-y += i387.o xsave.o
42obj-y += ptrace.o 42obj-y += ptrace.o
43obj-y += ds.o 43obj-y += ds.o
44obj-$(CONFIG_X86_32) += tls.o 44obj-$(CONFIG_X86_32) += tls.o
@@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
71obj-$(CONFIG_X86_NUMAQ) += numaq_32.o 71obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
72obj-$(CONFIG_X86_ES7000) += es7000_32.o
72obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o 73obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
73obj-y += vsmp_64.o 74obj-y += vsmp_64.o
74obj-$(CONFIG_KPROBES) += kprobes.o 75obj-$(CONFIG_KPROBES) += kprobes.o
@@ -104,6 +105,8 @@ obj-$(CONFIG_OLPC) += olpc.o
104ifeq ($(CONFIG_X86_64),y) 105ifeq ($(CONFIG_X86_64),y)
105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 106 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
106 obj-y += bios_uv.o 107 obj-y += bios_uv.o
108 obj-y += genx2apic_cluster.o
109 obj-y += genx2apic_phys.o
107 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 110 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
108 obj-$(CONFIG_AUDIT) += audit_64.o 111 obj-$(CONFIG_AUDIT) += audit_64.o
109 112
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7d40ef7b36e3..c2ac1b4515a0 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -252,10 +252,8 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
252 return; 252 return;
253 } 253 }
254 254
255#ifdef CONFIG_X86_32
256 if (boot_cpu_physical_apicid != -1U) 255 if (boot_cpu_physical_apicid != -1U)
257 ver = apic_version[boot_cpu_physical_apicid]; 256 ver = apic_version[boot_cpu_physical_apicid];
258#endif
259 257
260 generic_processor_info(id, ver); 258 generic_processor_info(id, ver);
261} 259}
@@ -774,11 +772,9 @@ static void __init acpi_register_lapic_address(unsigned long address)
774 772
775 set_fixmap_nocache(FIX_APIC_BASE, address); 773 set_fixmap_nocache(FIX_APIC_BASE, address);
776 if (boot_cpu_physical_apicid == -1U) { 774 if (boot_cpu_physical_apicid == -1U) {
777 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 775 boot_cpu_physical_apicid = read_apic_id();
778#ifdef CONFIG_X86_32
779 apic_version[boot_cpu_physical_apicid] = 776 apic_version[boot_cpu_physical_apicid] =
780 GET_APIC_VERSION(apic_read(APIC_LVR)); 777 GET_APIC_VERSION(apic_read(APIC_LVR));
781#endif
782 } 778 }
783} 779}
784 780
@@ -1350,7 +1346,9 @@ static void __init acpi_process_madt(void)
1350 acpi_ioapic = 1; 1346 acpi_ioapic = 1;
1351 1347
1352 smp_found_config = 1; 1348 smp_found_config = 1;
1349#ifdef CONFIG_X86_32
1353 setup_apic_routing(); 1350 setup_apic_routing();
1351#endif
1354 } 1352 }
1355 } 1353 }
1356 if (error == -EINVAL) { 1354 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index f88bd0d982b0..a91c57cb666a 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -60,10 +60,8 @@ unsigned long mp_lapic_addr;
60static int force_enable_local_apic; 60static int force_enable_local_apic;
61int disable_apic; 61int disable_apic;
62 62
63/* Local APIC timer verification ok */
64static int local_apic_timer_verify_ok;
65/* Disable local APIC timer from the kernel commandline or via dmi quirk */ 63/* Disable local APIC timer from the kernel commandline or via dmi quirk */
66static int local_apic_timer_disabled; 64static int disable_apic_timer __cpuinitdata;
67/* Local APIC timer works in C2 */ 65/* Local APIC timer works in C2 */
68int local_apic_timer_c2_ok; 66int local_apic_timer_c2_ok;
69EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 67EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -130,7 +128,11 @@ static inline int lapic_get_version(void)
130 */ 128 */
131static inline int lapic_is_integrated(void) 129static inline int lapic_is_integrated(void)
132{ 130{
131#ifdef CONFIG_X86_64
132 return 1;
133#else
133 return APIC_INTEGRATED(lapic_get_version()); 134 return APIC_INTEGRATED(lapic_get_version());
135#endif
134} 136}
135 137
136/* 138/*
@@ -145,13 +147,18 @@ static int modern_apic(void)
145 return lapic_get_version() >= 0x14; 147 return lapic_get_version() >= 0x14;
146} 148}
147 149
148void apic_wait_icr_idle(void) 150/*
151 * Paravirt kernels also might be using these below ops. So we still
152 * use generic apic_read()/apic_write(), which might be pointing to different
153 * ops in PARAVIRT case.
154 */
155void xapic_wait_icr_idle(void)
149{ 156{
150 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 157 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
151 cpu_relax(); 158 cpu_relax();
152} 159}
153 160
154u32 safe_apic_wait_icr_idle(void) 161u32 safe_xapic_wait_icr_idle(void)
155{ 162{
156 u32 send_status; 163 u32 send_status;
157 int timeout; 164 int timeout;
@@ -167,16 +174,48 @@ u32 safe_apic_wait_icr_idle(void)
167 return send_status; 174 return send_status;
168} 175}
169 176
177void xapic_icr_write(u32 low, u32 id)
178{
179 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
180 apic_write(APIC_ICR, low);
181}
182
183u64 xapic_icr_read(void)
184{
185 u32 icr1, icr2;
186
187 icr2 = apic_read(APIC_ICR2);
188 icr1 = apic_read(APIC_ICR);
189
190 return icr1 | ((u64)icr2 << 32);
191}
192
193static struct apic_ops xapic_ops = {
194 .read = native_apic_mem_read,
195 .write = native_apic_mem_write,
196 .icr_read = xapic_icr_read,
197 .icr_write = xapic_icr_write,
198 .wait_icr_idle = xapic_wait_icr_idle,
199 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
200};
201
202struct apic_ops __read_mostly *apic_ops = &xapic_ops;
203EXPORT_SYMBOL_GPL(apic_ops);
204
170/** 205/**
171 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 206 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
172 */ 207 */
173void __cpuinit enable_NMI_through_LVT0(void) 208void __cpuinit enable_NMI_through_LVT0(void)
174{ 209{
175 unsigned int v = APIC_DM_NMI; 210 unsigned int v;
176 211
177 /* Level triggered for 82489DX */ 212 /* unmask and set to NMI */
213 v = APIC_DM_NMI;
214
215 /* Level triggered for 82489DX (32bit mode) */
178 if (!lapic_is_integrated()) 216 if (!lapic_is_integrated())
179 v |= APIC_LVT_LEVEL_TRIGGER; 217 v |= APIC_LVT_LEVEL_TRIGGER;
218
180 apic_write(APIC_LVT0, v); 219 apic_write(APIC_LVT0, v);
181} 220}
182 221
@@ -193,9 +232,13 @@ int get_physical_broadcast(void)
193 */ 232 */
194int lapic_get_maxlvt(void) 233int lapic_get_maxlvt(void)
195{ 234{
196 unsigned int v = apic_read(APIC_LVR); 235 unsigned int v;
197 236
198 /* 82489DXs do not report # of LVT entries. */ 237 v = apic_read(APIC_LVR);
238 /*
239 * - we always have APIC integrated on 64bit mode
240 * - 82489DXs do not report # of LVT entries
241 */
199 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; 242 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
200} 243}
201 244
@@ -203,8 +246,12 @@ int lapic_get_maxlvt(void)
203 * Local APIC timer 246 * Local APIC timer
204 */ 247 */
205 248
206/* Clock divisor is set to 16 */ 249/* Clock divisor */
250#ifdef CONFG_X86_64
251#define APIC_DIVISOR 1
252#else
207#define APIC_DIVISOR 16 253#define APIC_DIVISOR 16
254#endif
208 255
209/* 256/*
210 * This function sets up the local APIC timer, with a timeout of 257 * This function sets up the local APIC timer, with a timeout of
@@ -212,6 +259,9 @@ int lapic_get_maxlvt(void)
212 * this function twice on the boot CPU, once with a bogus timeout 259 * this function twice on the boot CPU, once with a bogus timeout
213 * value, second time for real. The other (noncalibrating) CPUs 260 * value, second time for real. The other (noncalibrating) CPUs
214 * call this function only once, with the real, calibrated value. 261 * call this function only once, with the real, calibrated value.
262 *
263 * We do reads before writes even if unnecessary, to get around the
264 * P5 APIC double write bug.
215 */ 265 */
216static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 266static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
217{ 267{
@@ -233,14 +283,44 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
233 */ 283 */
234 tmp_value = apic_read(APIC_TDCR); 284 tmp_value = apic_read(APIC_TDCR);
235 apic_write(APIC_TDCR, 285 apic_write(APIC_TDCR,
236 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 286 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
237 APIC_TDR_DIV_16); 287 APIC_TDR_DIV_16);
238 288
239 if (!oneshot) 289 if (!oneshot)
240 apic_write(APIC_TMICT, clocks / APIC_DIVISOR); 290 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
241} 291}
242 292
243/* 293/*
294 * Setup extended LVT, AMD specific (K8, family 10h)
295 *
296 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
297 * MCE interrupts are supported. Thus MCE offset must be set to 0.
298 */
299
300#define APIC_EILVT_LVTOFF_MCE 0
301#define APIC_EILVT_LVTOFF_IBS 1
302
303static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
304{
305 unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
306 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
307
308 apic_write(reg, v);
309}
310
311u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
312{
313 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
314 return APIC_EILVT_LVTOFF_MCE;
315}
316
317u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
318{
319 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
320 return APIC_EILVT_LVTOFF_IBS;
321}
322
323/*
244 * Program the next event, relative to now 324 * Program the next event, relative to now
245 */ 325 */
246static int lapic_next_event(unsigned long delta, 326static int lapic_next_event(unsigned long delta,
@@ -259,8 +339,8 @@ static void lapic_timer_setup(enum clock_event_mode mode,
259 unsigned long flags; 339 unsigned long flags;
260 unsigned int v; 340 unsigned int v;
261 341
262 /* Lapic used for broadcast ? */ 342 /* Lapic used as dummy for broadcast ? */
263 if (!local_apic_timer_verify_ok) 343 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
264 return; 344 return;
265 345
266 local_irq_save(flags); 346 local_irq_save(flags);
@@ -473,7 +553,7 @@ static int __init calibrate_APIC_clock(void)
473 return -1; 553 return -1;
474 } 554 }
475 555
476 local_apic_timer_verify_ok = 1; 556 levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
477 557
478 /* We trust the pm timer based calibration */ 558 /* We trust the pm timer based calibration */
479 if (!pm_referenced) { 559 if (!pm_referenced) {
@@ -507,11 +587,11 @@ static int __init calibrate_APIC_clock(void)
507 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) 587 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
508 apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); 588 apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
509 else 589 else
510 local_apic_timer_verify_ok = 0; 590 levt->features |= CLOCK_EVT_FEAT_DUMMY;
511 } else 591 } else
512 local_irq_enable(); 592 local_irq_enable();
513 593
514 if (!local_apic_timer_verify_ok) { 594 if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
515 printk(KERN_WARNING 595 printk(KERN_WARNING
516 "APIC timer disabled due to verification failure.\n"); 596 "APIC timer disabled due to verification failure.\n");
517 return -1; 597 return -1;
@@ -533,7 +613,8 @@ void __init setup_boot_APIC_clock(void)
533 * timer as a dummy clock event source on SMP systems, so the 613 * timer as a dummy clock event source on SMP systems, so the
534 * broadcast mechanism is used. On UP systems simply ignore it. 614 * broadcast mechanism is used. On UP systems simply ignore it.
535 */ 615 */
536 if (local_apic_timer_disabled) { 616 if (disable_apic_timer) {
617 printk(KERN_INFO "Disabling APIC timer\n");
537 /* No broadcast on UP ! */ 618 /* No broadcast on UP ! */
538 if (num_possible_cpus() > 1) { 619 if (num_possible_cpus() > 1) {
539 lapic_clockevent.mult = 1; 620 lapic_clockevent.mult = 1;
@@ -602,7 +683,11 @@ static void local_apic_timer_interrupt(void)
602 /* 683 /*
603 * the NMI deadlock-detector uses this. 684 * the NMI deadlock-detector uses this.
604 */ 685 */
686#ifdef CONFIG_X86_64
687 add_pda(apic_timer_irqs, 1);
688#else
605 per_cpu(irq_stat, cpu).apic_timer_irqs++; 689 per_cpu(irq_stat, cpu).apic_timer_irqs++;
690#endif
606 691
607 evt->event_handler(evt); 692 evt->event_handler(evt);
608} 693}
@@ -642,35 +727,6 @@ int setup_profiling_timer(unsigned int multiplier)
642} 727}
643 728
644/* 729/*
645 * Setup extended LVT, AMD specific (K8, family 10h)
646 *
647 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
648 * MCE interrupts are supported. Thus MCE offset must be set to 0.
649 */
650
651#define APIC_EILVT_LVTOFF_MCE 0
652#define APIC_EILVT_LVTOFF_IBS 1
653
654static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
655{
656 unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
657 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
658 apic_write(reg, v);
659}
660
661u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
662{
663 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
664 return APIC_EILVT_LVTOFF_MCE;
665}
666
667u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
668{
669 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
670 return APIC_EILVT_LVTOFF_IBS;
671}
672
673/*
674 * Local APIC start and shutdown 730 * Local APIC start and shutdown
675 */ 731 */
676 732
@@ -715,7 +771,7 @@ void clear_local_APIC(void)
715 } 771 }
716 772
717 /* lets not touch this if we didn't frob it */ 773 /* lets not touch this if we didn't frob it */
718#ifdef CONFIG_X86_MCE_P4THERMAL 774#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
719 if (maxlvt >= 5) { 775 if (maxlvt >= 5) {
720 v = apic_read(APIC_LVTTHMR); 776 v = apic_read(APIC_LVTTHMR);
721 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); 777 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
@@ -732,10 +788,6 @@ void clear_local_APIC(void)
732 if (maxlvt >= 4) 788 if (maxlvt >= 4)
733 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 789 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
734 790
735#ifdef CONFIG_X86_MCE_P4THERMAL
736 if (maxlvt >= 5)
737 apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
738#endif
739 /* Integrated APIC (!82489DX) ? */ 791 /* Integrated APIC (!82489DX) ? */
740 if (lapic_is_integrated()) { 792 if (lapic_is_integrated()) {
741 if (maxlvt > 3) 793 if (maxlvt > 3)
@@ -750,7 +802,7 @@ void clear_local_APIC(void)
750 */ 802 */
751void disable_local_APIC(void) 803void disable_local_APIC(void)
752{ 804{
753 unsigned long value; 805 unsigned int value;
754 806
755 clear_local_APIC(); 807 clear_local_APIC();
756 808
@@ -762,6 +814,7 @@ void disable_local_APIC(void)
762 value &= ~APIC_SPIV_APIC_ENABLED; 814 value &= ~APIC_SPIV_APIC_ENABLED;
763 apic_write(APIC_SPIV, value); 815 apic_write(APIC_SPIV, value);
764 816
817#ifdef CONFIG_X86_32
765 /* 818 /*
766 * When LAPIC was disabled by the BIOS and enabled by the kernel, 819 * When LAPIC was disabled by the BIOS and enabled by the kernel,
767 * restore the disabled state. 820 * restore the disabled state.
@@ -773,6 +826,7 @@ void disable_local_APIC(void)
773 l &= ~MSR_IA32_APICBASE_ENABLE; 826 l &= ~MSR_IA32_APICBASE_ENABLE;
774 wrmsr(MSR_IA32_APICBASE, l, h); 827 wrmsr(MSR_IA32_APICBASE, l, h);
775 } 828 }
829#endif
776} 830}
777 831
778/* 832/*
@@ -789,11 +843,15 @@ void lapic_shutdown(void)
789 return; 843 return;
790 844
791 local_irq_save(flags); 845 local_irq_save(flags);
792 clear_local_APIC();
793 846
794 if (enabled_via_apicbase) 847#ifdef CONFIG_X86_32
848 if (!enabled_via_apicbase)
849 clear_local_APIC();
850 else
851#endif
795 disable_local_APIC(); 852 disable_local_APIC();
796 853
854
797 local_irq_restore(flags); 855 local_irq_restore(flags);
798} 856}
799 857
@@ -838,6 +896,12 @@ int __init verify_local_APIC(void)
838 */ 896 */
839 reg0 = apic_read(APIC_ID); 897 reg0 = apic_read(APIC_ID);
840 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 898 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
899 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
900 reg1 = apic_read(APIC_ID);
901 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
902 apic_write(APIC_ID, reg0);
903 if (reg1 != (reg0 ^ APIC_ID_MASK))
904 return 0;
841 905
842 /* 906 /*
843 * The next two are just to see if we have sane values. 907 * The next two are just to see if we have sane values.
@@ -863,14 +927,15 @@ void __init sync_Arb_IDs(void)
863 */ 927 */
864 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 928 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
865 return; 929 return;
930
866 /* 931 /*
867 * Wait for idle. 932 * Wait for idle.
868 */ 933 */
869 apic_wait_icr_idle(); 934 apic_wait_icr_idle();
870 935
871 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); 936 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
872 apic_write(APIC_ICR, 937 apic_write(APIC_ICR, APIC_DEST_ALLINC |
873 APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); 938 APIC_INT_LEVELTRIG | APIC_DM_INIT);
874} 939}
875 940
876/* 941/*
@@ -878,7 +943,7 @@ void __init sync_Arb_IDs(void)
878 */ 943 */
879void __init init_bsp_APIC(void) 944void __init init_bsp_APIC(void)
880{ 945{
881 unsigned long value; 946 unsigned int value;
882 947
883 /* 948 /*
884 * Don't do the setup now if we have a SMP BIOS as the 949 * Don't do the setup now if we have a SMP BIOS as the
@@ -899,11 +964,13 @@ void __init init_bsp_APIC(void)
899 value &= ~APIC_VECTOR_MASK; 964 value &= ~APIC_VECTOR_MASK;
900 value |= APIC_SPIV_APIC_ENABLED; 965 value |= APIC_SPIV_APIC_ENABLED;
901 966
967#ifdef CONFIG_X86_32
902 /* This bit is reserved on P4/Xeon and should be cleared */ 968 /* This bit is reserved on P4/Xeon and should be cleared */
903 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 969 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
904 (boot_cpu_data.x86 == 15)) 970 (boot_cpu_data.x86 == 15))
905 value &= ~APIC_SPIV_FOCUS_DISABLED; 971 value &= ~APIC_SPIV_FOCUS_DISABLED;
906 else 972 else
973#endif
907 value |= APIC_SPIV_FOCUS_DISABLED; 974 value |= APIC_SPIV_FOCUS_DISABLED;
908 value |= SPURIOUS_APIC_VECTOR; 975 value |= SPURIOUS_APIC_VECTOR;
909 apic_write(APIC_SPIV, value); 976 apic_write(APIC_SPIV, value);
@@ -922,6 +989,16 @@ static void __cpuinit lapic_setup_esr(void)
922{ 989{
923 unsigned long oldvalue, value, maxlvt; 990 unsigned long oldvalue, value, maxlvt;
924 if (lapic_is_integrated() && !esr_disable) { 991 if (lapic_is_integrated() && !esr_disable) {
992 if (esr_disable) {
993 /*
994 * Something untraceable is creating bad interrupts on
995 * secondary quads ... for the moment, just leave the
996 * ESR disabled - we can't do anything useful with the
997 * errors anyway - mbligh
998 */
999 printk(KERN_INFO "Leaving ESR disabled.\n");
1000 return;
1001 }
925 /* !82489DX */ 1002 /* !82489DX */
926 maxlvt = lapic_get_maxlvt(); 1003 maxlvt = lapic_get_maxlvt();
927 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 1004 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
@@ -942,16 +1019,7 @@ static void __cpuinit lapic_setup_esr(void)
942 "vector: 0x%08lx after: 0x%08lx\n", 1019 "vector: 0x%08lx after: 0x%08lx\n",
943 oldvalue, value); 1020 oldvalue, value);
944 } else { 1021 } else {
945 if (esr_disable) 1022 printk(KERN_INFO "No ESR for 82489DX.\n");
946 /*
947 * Something untraceable is creating bad interrupts on
948 * secondary quads ... for the moment, just leave the
949 * ESR disabled - we can't do anything useful with the
950 * errors anyway - mbligh
951 */
952 printk(KERN_INFO "Leaving ESR disabled.\n");
953 else
954 printk(KERN_INFO "No ESR for 82489DX.\n");
955 } 1023 }
956} 1024}
957 1025
@@ -1089,13 +1157,17 @@ void __cpuinit setup_local_APIC(void)
1089 1157
1090void __cpuinit end_local_APIC_setup(void) 1158void __cpuinit end_local_APIC_setup(void)
1091{ 1159{
1092 unsigned long value;
1093
1094 lapic_setup_esr(); 1160 lapic_setup_esr();
1095 /* Disable the local apic timer */ 1161
1096 value = apic_read(APIC_LVTT); 1162#ifdef CONFIG_X86_32
1097 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 1163 {
1098 apic_write(APIC_LVTT, value); 1164 unsigned int value;
1165 /* Disable the local apic timer */
1166 value = apic_read(APIC_LVTT);
1167 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1168 apic_write(APIC_LVTT, value);
1169 }
1170#endif
1099 1171
1100 setup_apic_nmi_watchdog(NULL); 1172 setup_apic_nmi_watchdog(NULL);
1101 apic_pm_activate(); 1173 apic_pm_activate();
@@ -1205,7 +1277,7 @@ void __init init_apic_mappings(void)
1205 * default configuration (or the MP table is broken). 1277 * default configuration (or the MP table is broken).
1206 */ 1278 */
1207 if (boot_cpu_physical_apicid == -1U) 1279 if (boot_cpu_physical_apicid == -1U)
1208 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1280 boot_cpu_physical_apicid = read_apic_id();
1209 1281
1210} 1282}
1211 1283
@@ -1242,7 +1314,7 @@ int __init APIC_init_uniprocessor(void)
1242 * might be zero if read from MP tables. Get it from LAPIC. 1314 * might be zero if read from MP tables. Get it from LAPIC.
1243 */ 1315 */
1244#ifdef CONFIG_CRASH_DUMP 1316#ifdef CONFIG_CRASH_DUMP
1245 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1317 boot_cpu_physical_apicid = read_apic_id();
1246#endif 1318#endif
1247 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1319 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1248 1320
@@ -1321,59 +1393,12 @@ void smp_error_interrupt(struct pt_regs *regs)
1321 irq_exit(); 1393 irq_exit();
1322} 1394}
1323 1395
1324#ifdef CONFIG_SMP
1325void __init smp_intr_init(void)
1326{
1327 /*
1328 * IRQ0 must be given a fixed assignment and initialized,
1329 * because it's used before the IO-APIC is set up.
1330 */
1331 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
1332
1333 /*
1334 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
1335 * IPI, driven by wakeup.
1336 */
1337 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
1338
1339 /* IPI for invalidation */
1340 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
1341
1342 /* IPI for generic function call */
1343 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1344
1345 /* IPI for single call function */
1346 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
1347 call_function_single_interrupt);
1348}
1349#endif
1350
1351/*
1352 * Initialize APIC interrupts
1353 */
1354void __init apic_intr_init(void)
1355{
1356#ifdef CONFIG_SMP
1357 smp_intr_init();
1358#endif
1359 /* self generated IPI for local APIC timer */
1360 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
1361
1362 /* IPI vectors for APIC spurious and error interrupts */
1363 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
1364 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
1365
1366 /* thermal monitor LVT interrupt */
1367#ifdef CONFIG_X86_MCE_P4THERMAL
1368 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
1369#endif
1370}
1371
1372/** 1396/**
1373 * connect_bsp_APIC - attach the APIC to the interrupt system 1397 * connect_bsp_APIC - attach the APIC to the interrupt system
1374 */ 1398 */
1375void __init connect_bsp_APIC(void) 1399void __init connect_bsp_APIC(void)
1376{ 1400{
1401#ifdef CONFIG_X86_32
1377 if (pic_mode) { 1402 if (pic_mode) {
1378 /* 1403 /*
1379 * Do not trust the local APIC being empty at bootup. 1404 * Do not trust the local APIC being empty at bootup.
@@ -1388,6 +1413,7 @@ void __init connect_bsp_APIC(void)
1388 outb(0x70, 0x22); 1413 outb(0x70, 0x22);
1389 outb(0x01, 0x23); 1414 outb(0x01, 0x23);
1390 } 1415 }
1416#endif
1391 enable_apic_mode(); 1417 enable_apic_mode();
1392} 1418}
1393 1419
@@ -1400,6 +1426,9 @@ void __init connect_bsp_APIC(void)
1400 */ 1426 */
1401void disconnect_bsp_APIC(int virt_wire_setup) 1427void disconnect_bsp_APIC(int virt_wire_setup)
1402{ 1428{
1429 unsigned int value;
1430
1431#ifdef CONFIG_X86_32
1403 if (pic_mode) { 1432 if (pic_mode) {
1404 /* 1433 /*
1405 * Put the board back into PIC mode (has an effect only on 1434 * Put the board back into PIC mode (has an effect only on
@@ -1411,54 +1440,53 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1411 "entering PIC mode.\n"); 1440 "entering PIC mode.\n");
1412 outb(0x70, 0x22); 1441 outb(0x70, 0x22);
1413 outb(0x00, 0x23); 1442 outb(0x00, 0x23);
1414 } else { 1443 return;
1415 /* Go back to Virtual Wire compatibility mode */ 1444 }
1416 unsigned long value; 1445#endif
1417 1446
1418 /* For the spurious interrupt use vector F, and enable it */ 1447 /* Go back to Virtual Wire compatibility mode */
1419 value = apic_read(APIC_SPIV);
1420 value &= ~APIC_VECTOR_MASK;
1421 value |= APIC_SPIV_APIC_ENABLED;
1422 value |= 0xf;
1423 apic_write(APIC_SPIV, value);
1424 1448
1425 if (!virt_wire_setup) { 1449 /* For the spurious interrupt use vector F, and enable it */
1426 /* 1450 value = apic_read(APIC_SPIV);
1427 * For LVT0 make it edge triggered, active high, 1451 value &= ~APIC_VECTOR_MASK;
1428 * external and enabled 1452 value |= APIC_SPIV_APIC_ENABLED;
1429 */ 1453 value |= 0xf;
1430 value = apic_read(APIC_LVT0); 1454 apic_write(APIC_SPIV, value);
1431 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1432 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1433 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1434 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1435 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1436 apic_write(APIC_LVT0, value);
1437 } else {
1438 /* Disable LVT0 */
1439 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1440 }
1441 1455
1456 if (!virt_wire_setup) {
1442 /* 1457 /*
1443 * For LVT1 make it edge triggered, active high, nmi and 1458 * For LVT0 make it edge triggered, active high,
1444 * enabled 1459 * external and enabled
1445 */ 1460 */
1446 value = apic_read(APIC_LVT1); 1461 value = apic_read(APIC_LVT0);
1447 value &= ~( 1462 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1448 APIC_MODE_MASK | APIC_SEND_PENDING |
1449 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 1463 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1450 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); 1464 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1451 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; 1465 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1452 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); 1466 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1453 apic_write(APIC_LVT1, value); 1467 apic_write(APIC_LVT0, value);
1468 } else {
1469 /* Disable LVT0 */
1470 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1454 } 1471 }
1472
1473 /*
1474 * For LVT1 make it edge triggered, active high,
1475 * nmi and enabled
1476 */
1477 value = apic_read(APIC_LVT1);
1478 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1479 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1480 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1481 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1482 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
1483 apic_write(APIC_LVT1, value);
1455} 1484}
1456 1485
1457void __cpuinit generic_processor_info(int apicid, int version) 1486void __cpuinit generic_processor_info(int apicid, int version)
1458{ 1487{
1459 int cpu; 1488 int cpu;
1460 cpumask_t tmp_map; 1489 cpumask_t tmp_map;
1461 physid_mask_t phys_cpu;
1462 1490
1463 /* 1491 /*
1464 * Validate version 1492 * Validate version
@@ -1471,9 +1499,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1471 } 1499 }
1472 apic_version[apicid] = version; 1500 apic_version[apicid] = version;
1473 1501
1474 phys_cpu = apicid_to_cpu_present(apicid);
1475 physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
1476
1477 if (num_processors >= NR_CPUS) { 1502 if (num_processors >= NR_CPUS) {
1478 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1503 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
1479 " Processor ignored.\n", NR_CPUS); 1504 " Processor ignored.\n", NR_CPUS);
@@ -1484,17 +1509,19 @@ void __cpuinit generic_processor_info(int apicid, int version)
1484 cpus_complement(tmp_map, cpu_present_map); 1509 cpus_complement(tmp_map, cpu_present_map);
1485 cpu = first_cpu(tmp_map); 1510 cpu = first_cpu(tmp_map);
1486 1511
1487 if (apicid == boot_cpu_physical_apicid) 1512 physid_set(apicid, phys_cpu_present_map);
1513 if (apicid == boot_cpu_physical_apicid) {
1488 /* 1514 /*
1489 * x86_bios_cpu_apicid is required to have processors listed 1515 * x86_bios_cpu_apicid is required to have processors listed
1490 * in same order as logical cpu numbers. Hence the first 1516 * in same order as logical cpu numbers. Hence the first
1491 * entry is BSP, and so on. 1517 * entry is BSP, and so on.
1492 */ 1518 */
1493 cpu = 0; 1519 cpu = 0;
1494 1520 }
1495 if (apicid > max_physical_apicid) 1521 if (apicid > max_physical_apicid)
1496 max_physical_apicid = apicid; 1522 max_physical_apicid = apicid;
1497 1523
1524#ifdef CONFIG_X86_32
1498 /* 1525 /*
1499 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y 1526 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1500 * but we need to work other dependencies like SMP_SUSPEND etc 1527 * but we need to work other dependencies like SMP_SUSPEND etc
@@ -1514,7 +1541,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
1514 def_to_bigsmp = 1; 1541 def_to_bigsmp = 1;
1515 } 1542 }
1516 } 1543 }
1517#ifdef CONFIG_SMP 1544#endif
1545
1546#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1518 /* are we being called early in kernel startup? */ 1547 /* are we being called early in kernel startup? */
1519 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1548 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1520 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 1549 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
@@ -1527,6 +1556,7 @@ void __cpuinit generic_processor_info(int apicid, int version)
1527 per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1556 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1528 } 1557 }
1529#endif 1558#endif
1559
1530 cpu_set(cpu, cpu_possible_map); 1560 cpu_set(cpu, cpu_possible_map);
1531 cpu_set(cpu, cpu_present_map); 1561 cpu_set(cpu, cpu_present_map);
1532} 1562}
@@ -1537,6 +1567,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1537#ifdef CONFIG_PM 1567#ifdef CONFIG_PM
1538 1568
1539static struct { 1569static struct {
1570 /*
1571 * 'active' is true if the local APIC was enabled by us and
1572 * not the BIOS; this signifies that we are also responsible
1573 * for disabling it before entering apm/acpi suspend
1574 */
1540 int active; 1575 int active;
1541 /* r/w apic fields */ 1576 /* r/w apic fields */
1542 unsigned int apic_id; 1577 unsigned int apic_id;
@@ -1577,7 +1612,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1577 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 1612 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1578 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 1613 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1579 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 1614 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1580#ifdef CONFIG_X86_MCE_P4THERMAL 1615#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1581 if (maxlvt >= 5) 1616 if (maxlvt >= 5)
1582 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 1617 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1583#endif 1618#endif
@@ -1601,16 +1636,23 @@ static int lapic_resume(struct sys_device *dev)
1601 1636
1602 local_irq_save(flags); 1637 local_irq_save(flags);
1603 1638
1604 /* 1639#ifdef CONFIG_X86_64
1605 * Make sure the APICBASE points to the right address 1640 if (x2apic)
1606 * 1641 enable_x2apic();
1607 * FIXME! This will be wrong if we ever support suspend on 1642 else
1608 * SMP! We'll need to do this as part of the CPU restore! 1643#endif
1609 */ 1644 {
1610 rdmsr(MSR_IA32_APICBASE, l, h); 1645 /*
1611 l &= ~MSR_IA32_APICBASE_BASE; 1646 * Make sure the APICBASE points to the right address
1612 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1647 *
1613 wrmsr(MSR_IA32_APICBASE, l, h); 1648 * FIXME! This will be wrong if we ever support suspend on
1649 * SMP! We'll need to do this as part of the CPU restore!
1650 */
1651 rdmsr(MSR_IA32_APICBASE, l, h);
1652 l &= ~MSR_IA32_APICBASE_BASE;
1653 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1654 wrmsr(MSR_IA32_APICBASE, l, h);
1655 }
1614 1656
1615 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1657 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1616 apic_write(APIC_ID, apic_pm_state.apic_id); 1658 apic_write(APIC_ID, apic_pm_state.apic_id);
@@ -1620,7 +1662,7 @@ static int lapic_resume(struct sys_device *dev)
1620 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 1662 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1621 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 1663 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1622 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 1664 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1623#ifdef CONFIG_X86_MCE_P4THERMAL 1665#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1624 if (maxlvt >= 5) 1666 if (maxlvt >= 5)
1625 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 1667 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1626#endif 1668#endif
@@ -1634,7 +1676,9 @@ static int lapic_resume(struct sys_device *dev)
1634 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); 1676 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1635 apic_write(APIC_ESR, 0); 1677 apic_write(APIC_ESR, 0);
1636 apic_read(APIC_ESR); 1678 apic_read(APIC_ESR);
1679
1637 local_irq_restore(flags); 1680 local_irq_restore(flags);
1681
1638 return 0; 1682 return 0;
1639} 1683}
1640 1684
@@ -1690,20 +1734,20 @@ static int __init parse_lapic(char *arg)
1690} 1734}
1691early_param("lapic", parse_lapic); 1735early_param("lapic", parse_lapic);
1692 1736
1693static int __init parse_nolapic(char *arg) 1737static int __init setup_disableapic(char *arg)
1694{ 1738{
1695 disable_apic = 1; 1739 disable_apic = 1;
1696 setup_clear_cpu_cap(X86_FEATURE_APIC); 1740 setup_clear_cpu_cap(X86_FEATURE_APIC);
1697 return 0; 1741 return 0;
1698} 1742}
1699early_param("nolapic", parse_nolapic); 1743early_param("disableapic", setup_disableapic);
1700 1744
1701static int __init parse_disable_lapic_timer(char *arg) 1745/* same as disableapic, for compatibility */
1746static int __init setup_nolapic(char *arg)
1702{ 1747{
1703 local_apic_timer_disabled = 1; 1748 return setup_disableapic(arg);
1704 return 0;
1705} 1749}
1706early_param("nolapic_timer", parse_disable_lapic_timer); 1750early_param("nolapic", setup_nolapic);
1707 1751
1708static int __init parse_lapic_timer_c2_ok(char *arg) 1752static int __init parse_lapic_timer_c2_ok(char *arg)
1709{ 1753{
@@ -1712,15 +1756,40 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
1712} 1756}
1713early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 1757early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1714 1758
1759static int __init parse_disable_apic_timer(char *arg)
1760{
1761 disable_apic_timer = 1;
1762 return 0;
1763}
1764early_param("noapictimer", parse_disable_apic_timer);
1765
1766static int __init parse_nolapic_timer(char *arg)
1767{
1768 disable_apic_timer = 1;
1769 return 0;
1770}
1771early_param("nolapic_timer", parse_nolapic_timer);
1772
1715static int __init apic_set_verbosity(char *arg) 1773static int __init apic_set_verbosity(char *arg)
1716{ 1774{
1717 if (!arg) 1775 if (!arg) {
1776#ifdef CONFIG_X86_64
1777 skip_ioapic_setup = 0;
1778 ioapic_force = 1;
1779 return 0;
1780#endif
1718 return -EINVAL; 1781 return -EINVAL;
1782 }
1719 1783
1720 if (strcmp(arg, "debug") == 0) 1784 if (strcmp("debug", arg) == 0)
1721 apic_verbosity = APIC_DEBUG; 1785 apic_verbosity = APIC_DEBUG;
1722 else if (strcmp(arg, "verbose") == 0) 1786 else if (strcmp("verbose", arg) == 0)
1723 apic_verbosity = APIC_VERBOSE; 1787 apic_verbosity = APIC_VERBOSE;
1788 else {
1789 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1790 " use apic=verbose or apic=debug\n", arg);
1791 return -EINVAL;
1792 }
1724 1793
1725 return 0; 1794 return 0;
1726} 1795}
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 446c062e831c..53898b65a6ae 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
27#include <linux/clockchips.h> 27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/dmar.h>
30 31
31#include <asm/atomic.h> 32#include <asm/atomic.h>
32#include <asm/smp.h> 33#include <asm/smp.h>
@@ -39,13 +40,20 @@
39#include <asm/proto.h> 40#include <asm/proto.h>
40#include <asm/timex.h> 41#include <asm/timex.h>
41#include <asm/apic.h> 42#include <asm/apic.h>
43#include <asm/i8259.h>
42 44
43#include <mach_ipi.h> 45#include <mach_ipi.h>
44#include <mach_apic.h> 46#include <mach_apic.h>
45 47
48/* Disable local APIC timer from the kernel commandline or via dmi quirk */
46static int disable_apic_timer __cpuinitdata; 49static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 50static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 51int disable_apic;
52int disable_x2apic;
53int x2apic;
54
55/* x2apic enabled before OS handover */
56int x2apic_preenabled;
49 57
50/* Local APIC timer works in C2 */ 58/* Local APIC timer works in C2 */
51int local_apic_timer_c2_ok; 59int local_apic_timer_c2_ok;
@@ -73,6 +81,9 @@ static void lapic_timer_setup(enum clock_event_mode mode,
73static void lapic_timer_broadcast(cpumask_t mask); 81static void lapic_timer_broadcast(cpumask_t mask);
74static void apic_pm_activate(void); 82static void apic_pm_activate(void);
75 83
84/*
85 * The local apic timer can be used for any function which is CPU local.
86 */
76static struct clock_event_device lapic_clockevent = { 87static struct clock_event_device lapic_clockevent = {
77 .name = "lapic", 88 .name = "lapic",
78 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT 89 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
@@ -99,11 +110,15 @@ static inline int lapic_get_version(void)
99} 110}
100 111
101/* 112/*
102 * Check, if the APIC is integrated or a seperate chip 113 * Check, if the APIC is integrated or a separate chip
103 */ 114 */
104static inline int lapic_is_integrated(void) 115static inline int lapic_is_integrated(void)
105{ 116{
117#ifdef CONFIG_X86_64
106 return 1; 118 return 1;
119#else
120 return APIC_INTEGRATED(lapic_get_version());
121#endif
107} 122}
108 123
109/* 124/*
@@ -118,13 +133,18 @@ static int modern_apic(void)
118 return lapic_get_version() >= 0x14; 133 return lapic_get_version() >= 0x14;
119} 134}
120 135
121void apic_wait_icr_idle(void) 136/*
137 * Paravirt kernels also might be using these below ops. So we still
138 * use generic apic_read()/apic_write(), which might be pointing to different
139 * ops in PARAVIRT case.
140 */
141void xapic_wait_icr_idle(void)
122{ 142{
123 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 143 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
124 cpu_relax(); 144 cpu_relax();
125} 145}
126 146
127u32 safe_apic_wait_icr_idle(void) 147u32 safe_xapic_wait_icr_idle(void)
128{ 148{
129 u32 send_status; 149 u32 send_status;
130 int timeout; 150 int timeout;
@@ -140,6 +160,68 @@ u32 safe_apic_wait_icr_idle(void)
140 return send_status; 160 return send_status;
141} 161}
142 162
163void xapic_icr_write(u32 low, u32 id)
164{
165 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
166 apic_write(APIC_ICR, low);
167}
168
169u64 xapic_icr_read(void)
170{
171 u32 icr1, icr2;
172
173 icr2 = apic_read(APIC_ICR2);
174 icr1 = apic_read(APIC_ICR);
175
176 return icr1 | ((u64)icr2 << 32);
177}
178
179static struct apic_ops xapic_ops = {
180 .read = native_apic_mem_read,
181 .write = native_apic_mem_write,
182 .icr_read = xapic_icr_read,
183 .icr_write = xapic_icr_write,
184 .wait_icr_idle = xapic_wait_icr_idle,
185 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
186};
187
188struct apic_ops __read_mostly *apic_ops = &xapic_ops;
189EXPORT_SYMBOL_GPL(apic_ops);
190
191static void x2apic_wait_icr_idle(void)
192{
193 /* no need to wait for icr idle in x2apic */
194 return;
195}
196
197static u32 safe_x2apic_wait_icr_idle(void)
198{
199 /* no need to wait for icr idle in x2apic */
200 return 0;
201}
202
203void x2apic_icr_write(u32 low, u32 id)
204{
205 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
206}
207
208u64 x2apic_icr_read(void)
209{
210 unsigned long val;
211
212 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
213 return val;
214}
215
216static struct apic_ops x2apic_ops = {
217 .read = native_apic_msr_read,
218 .write = native_apic_msr_write,
219 .icr_read = x2apic_icr_read,
220 .icr_write = x2apic_icr_write,
221 .wait_icr_idle = x2apic_wait_icr_idle,
222 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
223};
224
143/** 225/**
144 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 226 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
145 */ 227 */
@@ -149,6 +231,11 @@ void __cpuinit enable_NMI_through_LVT0(void)
149 231
150 /* unmask and set to NMI */ 232 /* unmask and set to NMI */
151 v = APIC_DM_NMI; 233 v = APIC_DM_NMI;
234
235 /* Level triggered for 82489DX (32bit mode) */
236 if (!lapic_is_integrated())
237 v |= APIC_LVT_LEVEL_TRIGGER;
238
152 apic_write(APIC_LVT0, v); 239 apic_write(APIC_LVT0, v);
153} 240}
154 241
@@ -157,14 +244,28 @@ void __cpuinit enable_NMI_through_LVT0(void)
157 */ 244 */
158int lapic_get_maxlvt(void) 245int lapic_get_maxlvt(void)
159{ 246{
160 unsigned int v, maxlvt; 247 unsigned int v;
161 248
162 v = apic_read(APIC_LVR); 249 v = apic_read(APIC_LVR);
163 maxlvt = GET_APIC_MAXLVT(v); 250 /*
164 return maxlvt; 251 * - we always have APIC integrated on 64bit mode
252 * - 82489DXs do not report # of LVT entries
253 */
254 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
165} 255}
166 256
167/* 257/*
258 * Local APIC timer
259 */
260
261/* Clock divisor */
262#ifdef CONFG_X86_64
263#define APIC_DIVISOR 1
264#else
265#define APIC_DIVISOR 16
266#endif
267
268/*
168 * This function sets up the local APIC timer, with a timeout of 269 * This function sets up the local APIC timer, with a timeout of
169 * 'clocks' APIC bus clock. During calibration we actually call 270 * 'clocks' APIC bus clock. During calibration we actually call
170 * this function twice on the boot CPU, once with a bogus timeout 271 * this function twice on the boot CPU, once with a bogus timeout
@@ -174,7 +275,6 @@ int lapic_get_maxlvt(void)
174 * We do reads before writes even if unnecessary, to get around the 275 * We do reads before writes even if unnecessary, to get around the
175 * P5 APIC double write bug. 276 * P5 APIC double write bug.
176 */ 277 */
177
178static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 278static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
179{ 279{
180 unsigned int lvtt_value, tmp_value; 280 unsigned int lvtt_value, tmp_value;
@@ -182,6 +282,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
182 lvtt_value = LOCAL_TIMER_VECTOR; 282 lvtt_value = LOCAL_TIMER_VECTOR;
183 if (!oneshot) 283 if (!oneshot)
184 lvtt_value |= APIC_LVT_TIMER_PERIODIC; 284 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
285 if (!lapic_is_integrated())
286 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
287
185 if (!irqen) 288 if (!irqen)
186 lvtt_value |= APIC_LVT_MASKED; 289 lvtt_value |= APIC_LVT_MASKED;
187 290
@@ -191,12 +294,12 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
191 * Divide PICLK by 16 294 * Divide PICLK by 16
192 */ 295 */
193 tmp_value = apic_read(APIC_TDCR); 296 tmp_value = apic_read(APIC_TDCR);
194 apic_write(APIC_TDCR, (tmp_value 297 apic_write(APIC_TDCR,
195 & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) 298 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
196 | APIC_TDR_DIV_16); 299 APIC_TDR_DIV_16);
197 300
198 if (!oneshot) 301 if (!oneshot)
199 apic_write(APIC_TMICT, clocks); 302 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
200} 303}
201 304
202/* 305/*
@@ -366,7 +469,7 @@ static int __init calibrate_APIC_clock(void)
366 lapic_clockevent.min_delta_ns = 469 lapic_clockevent.min_delta_ns =
367 clockevent_delta2ns(0xF, &lapic_clockevent); 470 clockevent_delta2ns(0xF, &lapic_clockevent);
368 471
369 calibration_result = result / HZ; 472 calibration_result = (result * APIC_DIVISOR) / HZ;
370 473
371 /* 474 /*
372 * Do a sanity check on the APIC calibration result 475 * Do a sanity check on the APIC calibration result
@@ -388,10 +491,10 @@ static int __init calibrate_APIC_clock(void)
388void __init setup_boot_APIC_clock(void) 491void __init setup_boot_APIC_clock(void)
389{ 492{
390 /* 493 /*
391 * The local apic timer can be disabled via the kernel commandline. 494 * The local apic timer can be disabled via the kernel
392 * Register the lapic timer as a dummy clock event source on SMP 495 * commandline or from the CPU detection code. Register the lapic
393 * systems, so the broadcast mechanism is used. On UP systems simply 496 * timer as a dummy clock event source on SMP systems, so the
394 * ignore it. 497 * broadcast mechanism is used. On UP systems simply ignore it.
395 */ 498 */
396 if (disable_apic_timer) { 499 if (disable_apic_timer) {
397 printk(KERN_INFO "Disabling APIC timer\n"); 500 printk(KERN_INFO "Disabling APIC timer\n");
@@ -403,7 +506,9 @@ void __init setup_boot_APIC_clock(void)
403 return; 506 return;
404 } 507 }
405 508
406 printk(KERN_INFO "Using local APIC timer interrupts.\n"); 509 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
510 "calibrating APIC timer ...\n");
511
407 if (calibrate_APIC_clock()) { 512 if (calibrate_APIC_clock()) {
408 /* No broadcast on UP ! */ 513 /* No broadcast on UP ! */
409 if (num_possible_cpus() > 1) 514 if (num_possible_cpus() > 1)
@@ -422,6 +527,7 @@ void __init setup_boot_APIC_clock(void)
422 printk(KERN_WARNING "APIC timer registered as dummy," 527 printk(KERN_WARNING "APIC timer registered as dummy,"
423 " due to nmi_watchdog=%d!\n", nmi_watchdog); 528 " due to nmi_watchdog=%d!\n", nmi_watchdog);
424 529
530 /* Setup the lapic or request the broadcast */
425 setup_APIC_timer(); 531 setup_APIC_timer();
426} 532}
427 533
@@ -460,7 +566,11 @@ static void local_apic_timer_interrupt(void)
460 /* 566 /*
461 * the NMI deadlock-detector uses this. 567 * the NMI deadlock-detector uses this.
462 */ 568 */
569#ifdef CONFIG_X86_64
463 add_pda(apic_timer_irqs, 1); 570 add_pda(apic_timer_irqs, 1);
571#else
572 per_cpu(irq_stat, cpu).apic_timer_irqs++;
573#endif
464 574
465 evt->event_handler(evt); 575 evt->event_handler(evt);
466} 576}
@@ -491,6 +601,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
491 irq_enter(); 601 irq_enter();
492 local_apic_timer_interrupt(); 602 local_apic_timer_interrupt();
493 irq_exit(); 603 irq_exit();
604
494 set_irq_regs(old_regs); 605 set_irq_regs(old_regs);
495} 606}
496 607
@@ -544,6 +655,13 @@ void clear_local_APIC(void)
544 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); 655 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
545 } 656 }
546 657
658 /* lets not touch this if we didn't frob it */
659#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
660 if (maxlvt >= 5) {
661 v = apic_read(APIC_LVTTHMR);
662 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
663 }
664#endif
547 /* 665 /*
548 * Clean APIC state for other OSs: 666 * Clean APIC state for other OSs:
549 */ 667 */
@@ -554,8 +672,14 @@ void clear_local_APIC(void)
554 apic_write(APIC_LVTERR, APIC_LVT_MASKED); 672 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
555 if (maxlvt >= 4) 673 if (maxlvt >= 4)
556 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 674 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
557 apic_write(APIC_ESR, 0); 675
558 apic_read(APIC_ESR); 676 /* Integrated APIC (!82489DX) ? */
677 if (lapic_is_integrated()) {
678 if (maxlvt > 3)
679 /* Clear ESR due to Pentium errata 3AP and 11AP */
680 apic_write(APIC_ESR, 0);
681 apic_read(APIC_ESR);
682 }
559} 683}
560 684
561/** 685/**
@@ -574,8 +698,28 @@ void disable_local_APIC(void)
574 value = apic_read(APIC_SPIV); 698 value = apic_read(APIC_SPIV);
575 value &= ~APIC_SPIV_APIC_ENABLED; 699 value &= ~APIC_SPIV_APIC_ENABLED;
576 apic_write(APIC_SPIV, value); 700 apic_write(APIC_SPIV, value);
701
702#ifdef CONFIG_X86_32
703 /*
704 * When LAPIC was disabled by the BIOS and enabled by the kernel,
705 * restore the disabled state.
706 */
707 if (enabled_via_apicbase) {
708 unsigned int l, h;
709
710 rdmsr(MSR_IA32_APICBASE, l, h);
711 l &= ~MSR_IA32_APICBASE_ENABLE;
712 wrmsr(MSR_IA32_APICBASE, l, h);
713 }
714#endif
577} 715}
578 716
717/*
718 * If Linux enabled the LAPIC against the BIOS default disable it down before
719 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
720 * not power-off. Additionally clear all LVT entries before disable_local_APIC
721 * for the case where Linux didn't enable the LAPIC.
722 */
579void lapic_shutdown(void) 723void lapic_shutdown(void)
580{ 724{
581 unsigned long flags; 725 unsigned long flags;
@@ -585,7 +729,13 @@ void lapic_shutdown(void)
585 729
586 local_irq_save(flags); 730 local_irq_save(flags);
587 731
588 disable_local_APIC(); 732#ifdef CONFIG_X86_32
733 if (!enabled_via_apicbase)
734 clear_local_APIC();
735 else
736#endif
737 disable_local_APIC();
738
589 739
590 local_irq_restore(flags); 740 local_irq_restore(flags);
591} 741}
@@ -629,10 +779,10 @@ int __init verify_local_APIC(void)
629 /* 779 /*
630 * The ID register is read/write in a real APIC. 780 * The ID register is read/write in a real APIC.
631 */ 781 */
632 reg0 = read_apic_id(); 782 reg0 = apic_read(APIC_ID);
633 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 783 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
634 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 784 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
635 reg1 = read_apic_id(); 785 reg1 = apic_read(APIC_ID);
636 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 786 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
637 apic_write(APIC_ID, reg0); 787 apic_write(APIC_ID, reg0);
638 if (reg1 != (reg0 ^ APIC_ID_MASK)) 788 if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -656,8 +806,11 @@ int __init verify_local_APIC(void)
656 */ 806 */
657void __init sync_Arb_IDs(void) 807void __init sync_Arb_IDs(void)
658{ 808{
659 /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ 809 /*
660 if (modern_apic()) 810 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
811 * needed on AMD.
812 */
813 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
661 return; 814 return;
662 815
663 /* 816 /*
@@ -666,8 +819,8 @@ void __init sync_Arb_IDs(void)
666 apic_wait_icr_idle(); 819 apic_wait_icr_idle();
667 820
668 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); 821 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
669 apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG 822 apic_write(APIC_ICR, APIC_DEST_ALLINC |
670 | APIC_DM_INIT); 823 APIC_INT_LEVELTRIG | APIC_DM_INIT);
671} 824}
672 825
673/* 826/*
@@ -684,8 +837,6 @@ void __init init_bsp_APIC(void)
684 if (smp_found_config || !cpu_has_apic) 837 if (smp_found_config || !cpu_has_apic)
685 return; 838 return;
686 839
687 value = apic_read(APIC_LVR);
688
689 /* 840 /*
690 * Do not trust the local APIC being empty at bootup. 841 * Do not trust the local APIC being empty at bootup.
691 */ 842 */
@@ -697,7 +848,15 @@ void __init init_bsp_APIC(void)
697 value = apic_read(APIC_SPIV); 848 value = apic_read(APIC_SPIV);
698 value &= ~APIC_VECTOR_MASK; 849 value &= ~APIC_VECTOR_MASK;
699 value |= APIC_SPIV_APIC_ENABLED; 850 value |= APIC_SPIV_APIC_ENABLED;
700 value |= APIC_SPIV_FOCUS_DISABLED; 851
852#ifdef CONFIG_X86_32
853 /* This bit is reserved on P4/Xeon and should be cleared */
854 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
855 (boot_cpu_data.x86 == 15))
856 value &= ~APIC_SPIV_FOCUS_DISABLED;
857 else
858#endif
859 value |= APIC_SPIV_FOCUS_DISABLED;
701 value |= SPURIOUS_APIC_VECTOR; 860 value |= SPURIOUS_APIC_VECTOR;
702 apic_write(APIC_SPIV, value); 861 apic_write(APIC_SPIV, value);
703 862
@@ -706,9 +865,50 @@ void __init init_bsp_APIC(void)
706 */ 865 */
707 apic_write(APIC_LVT0, APIC_DM_EXTINT); 866 apic_write(APIC_LVT0, APIC_DM_EXTINT);
708 value = APIC_DM_NMI; 867 value = APIC_DM_NMI;
868 if (!lapic_is_integrated()) /* 82489DX */
869 value |= APIC_LVT_LEVEL_TRIGGER;
709 apic_write(APIC_LVT1, value); 870 apic_write(APIC_LVT1, value);
710} 871}
711 872
873static void __cpuinit lapic_setup_esr(void)
874{
875 unsigned long oldvalue, value, maxlvt;
876 if (lapic_is_integrated() && !esr_disable) {
877 if (esr_disable) {
878 /*
879 * Something untraceable is creating bad interrupts on
880 * secondary quads ... for the moment, just leave the
881 * ESR disabled - we can't do anything useful with the
882 * errors anyway - mbligh
883 */
884 printk(KERN_INFO "Leaving ESR disabled.\n");
885 return;
886 }
887 /* !82489DX */
888 maxlvt = lapic_get_maxlvt();
889 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
890 apic_write(APIC_ESR, 0);
891 oldvalue = apic_read(APIC_ESR);
892
893 /* enables sending errors */
894 value = ERROR_APIC_VECTOR;
895 apic_write(APIC_LVTERR, value);
896 /*
897 * spec says clear errors after enabling vector.
898 */
899 if (maxlvt > 3)
900 apic_write(APIC_ESR, 0);
901 value = apic_read(APIC_ESR);
902 if (value != oldvalue)
903 apic_printk(APIC_VERBOSE, "ESR value before enabling "
904 "vector: 0x%08lx after: 0x%08lx\n",
905 oldvalue, value);
906 } else {
907 printk(KERN_INFO "No ESR for 82489DX.\n");
908 }
909}
910
911
712/** 912/**
713 * setup_local_APIC - setup the local APIC 913 * setup_local_APIC - setup the local APIC
714 */ 914 */
@@ -814,25 +1014,143 @@ void __cpuinit setup_local_APIC(void)
814 preempt_enable(); 1014 preempt_enable();
815} 1015}
816 1016
817static void __cpuinit lapic_setup_esr(void)
818{
819 unsigned maxlvt = lapic_get_maxlvt();
820
821 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR);
822 /*
823 * spec says clear errors after enabling vector.
824 */
825 if (maxlvt > 3)
826 apic_write(APIC_ESR, 0);
827}
828
829void __cpuinit end_local_APIC_setup(void) 1017void __cpuinit end_local_APIC_setup(void)
830{ 1018{
831 lapic_setup_esr(); 1019 lapic_setup_esr();
1020
1021#ifdef CONFIG_X86_32
1022 {
1023 unsigned int value;
1024 /* Disable the local apic timer */
1025 value = apic_read(APIC_LVTT);
1026 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1027 apic_write(APIC_LVTT, value);
1028 }
1029#endif
1030
832 setup_apic_nmi_watchdog(NULL); 1031 setup_apic_nmi_watchdog(NULL);
833 apic_pm_activate(); 1032 apic_pm_activate();
834} 1033}
835 1034
1035void check_x2apic(void)
1036{
1037 int msr, msr2;
1038
1039 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1040
1041 if (msr & X2APIC_ENABLE) {
1042 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
1043 x2apic_preenabled = x2apic = 1;
1044 apic_ops = &x2apic_ops;
1045 }
1046}
1047
1048void enable_x2apic(void)
1049{
1050 int msr, msr2;
1051
1052 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1053 if (!(msr & X2APIC_ENABLE)) {
1054 printk("Enabling x2apic\n");
1055 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1056 }
1057}
1058
1059void enable_IR_x2apic(void)
1060{
1061#ifdef CONFIG_INTR_REMAP
1062 int ret;
1063 unsigned long flags;
1064
1065 if (!cpu_has_x2apic)
1066 return;
1067
1068 if (!x2apic_preenabled && disable_x2apic) {
1069 printk(KERN_INFO
1070 "Skipped enabling x2apic and Interrupt-remapping "
1071 "because of nox2apic\n");
1072 return;
1073 }
1074
1075 if (x2apic_preenabled && disable_x2apic)
1076 panic("Bios already enabled x2apic, can't enforce nox2apic");
1077
1078 if (!x2apic_preenabled && skip_ioapic_setup) {
1079 printk(KERN_INFO
1080 "Skipped enabling x2apic and Interrupt-remapping "
1081 "because of skipping io-apic setup\n");
1082 return;
1083 }
1084
1085 ret = dmar_table_init();
1086 if (ret) {
1087 printk(KERN_INFO
1088 "dmar_table_init() failed with %d:\n", ret);
1089
1090 if (x2apic_preenabled)
1091 panic("x2apic enabled by bios. But IR enabling failed");
1092 else
1093 printk(KERN_INFO
1094 "Not enabling x2apic,Intr-remapping\n");
1095 return;
1096 }
1097
1098 local_irq_save(flags);
1099 mask_8259A();
1100 save_mask_IO_APIC_setup();
1101
1102 ret = enable_intr_remapping(1);
1103
1104 if (ret && x2apic_preenabled) {
1105 local_irq_restore(flags);
1106 panic("x2apic enabled by bios. But IR enabling failed");
1107 }
1108
1109 if (ret)
1110 goto end;
1111
1112 if (!x2apic) {
1113 x2apic = 1;
1114 apic_ops = &x2apic_ops;
1115 enable_x2apic();
1116 }
1117end:
1118 if (ret)
1119 /*
1120 * IR enabling failed
1121 */
1122 restore_IO_APIC_setup();
1123 else
1124 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
1125
1126 unmask_8259A();
1127 local_irq_restore(flags);
1128
1129 if (!ret) {
1130 if (!x2apic_preenabled)
1131 printk(KERN_INFO
1132 "Enabled x2apic and interrupt-remapping\n");
1133 else
1134 printk(KERN_INFO
1135 "Enabled Interrupt-remapping\n");
1136 } else
1137 printk(KERN_ERR
1138 "Failed to enable Interrupt-remapping and x2apic\n");
1139#else
1140 if (!cpu_has_x2apic)
1141 return;
1142
1143 if (x2apic_preenabled)
1144 panic("x2apic enabled prior OS handover,"
1145 " enable CONFIG_INTR_REMAP");
1146
1147 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1148 " and x2apic\n");
1149#endif
1150
1151 return;
1152}
1153
836/* 1154/*
837 * Detect and enable local APICs on non-SMP boards. 1155 * Detect and enable local APICs on non-SMP boards.
838 * Original code written by Keir Fraser. 1156 * Original code written by Keir Fraser.
@@ -872,7 +1190,7 @@ void __init early_init_lapic_mapping(void)
872 * Fetch the APIC ID of the BSP in case we have a 1190 * Fetch the APIC ID of the BSP in case we have a
873 * default configuration (or the MP table is broken). 1191 * default configuration (or the MP table is broken).
874 */ 1192 */
875 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1193 boot_cpu_physical_apicid = read_apic_id();
876} 1194}
877 1195
878/** 1196/**
@@ -880,6 +1198,11 @@ void __init early_init_lapic_mapping(void)
880 */ 1198 */
881void __init init_apic_mappings(void) 1199void __init init_apic_mappings(void)
882{ 1200{
1201 if (x2apic) {
1202 boot_cpu_physical_apicid = read_apic_id();
1203 return;
1204 }
1205
883 /* 1206 /*
884 * If no local APIC can be found then set up a fake all 1207 * If no local APIC can be found then set up a fake all
885 * zeroes page to simulate the local APIC and another 1208 * zeroes page to simulate the local APIC and another
@@ -899,13 +1222,15 @@ void __init init_apic_mappings(void)
899 * Fetch the APIC ID of the BSP in case we have a 1222 * Fetch the APIC ID of the BSP in case we have a
900 * default configuration (or the MP table is broken). 1223 * default configuration (or the MP table is broken).
901 */ 1224 */
902 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1225 boot_cpu_physical_apicid = read_apic_id();
903} 1226}
904 1227
905/* 1228/*
906 * This initializes the IO-APIC and APIC hardware if this is 1229 * This initializes the IO-APIC and APIC hardware if this is
907 * a UP kernel. 1230 * a UP kernel.
908 */ 1231 */
1232int apic_version[MAX_APICS];
1233
909int __init APIC_init_uniprocessor(void) 1234int __init APIC_init_uniprocessor(void)
910{ 1235{
911 if (disable_apic) { 1236 if (disable_apic) {
@@ -918,6 +1243,9 @@ int __init APIC_init_uniprocessor(void)
918 return -1; 1243 return -1;
919 } 1244 }
920 1245
1246 enable_IR_x2apic();
1247 setup_apic_routing();
1248
921 verify_local_APIC(); 1249 verify_local_APIC();
922 1250
923 connect_bsp_APIC(); 1251 connect_bsp_APIC();
@@ -1004,17 +1332,57 @@ asmlinkage void smp_error_interrupt(void)
1004} 1332}
1005 1333
1006/** 1334/**
1007 * * connect_bsp_APIC - attach the APIC to the interrupt system 1335 * connect_bsp_APIC - attach the APIC to the interrupt system
1008 * */ 1336 */
1009void __init connect_bsp_APIC(void) 1337void __init connect_bsp_APIC(void)
1010{ 1338{
1339#ifdef CONFIG_X86_32
1340 if (pic_mode) {
1341 /*
1342 * Do not trust the local APIC being empty at bootup.
1343 */
1344 clear_local_APIC();
1345 /*
1346 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
1347 * local APIC to INT and NMI lines.
1348 */
1349 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1350 "enabling APIC mode.\n");
1351 outb(0x70, 0x22);
1352 outb(0x01, 0x23);
1353 }
1354#endif
1011 enable_apic_mode(); 1355 enable_apic_mode();
1012} 1356}
1013 1357
1358/**
1359 * disconnect_bsp_APIC - detach the APIC from the interrupt system
1360 * @virt_wire_setup: indicates, whether virtual wire mode is selected
1361 *
1362 * Virtual wire mode is necessary to deliver legacy interrupts even when the
1363 * APIC is disabled.
1364 */
1014void disconnect_bsp_APIC(int virt_wire_setup) 1365void disconnect_bsp_APIC(int virt_wire_setup)
1015{ 1366{
1367 unsigned int value;
1368
1369#ifdef CONFIG_X86_32
1370 if (pic_mode) {
1371 /*
1372 * Put the board back into PIC mode (has an effect only on
1373 * certain older boards). Note that APIC interrupts, including
1374 * IPIs, won't work beyond this point! The only exception are
1375 * INIT IPIs.
1376 */
1377 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
1378 "entering PIC mode.\n");
1379 outb(0x70, 0x22);
1380 outb(0x00, 0x23);
1381 return;
1382 }
1383#endif
1384
1016 /* Go back to Virtual Wire compatibility mode */ 1385 /* Go back to Virtual Wire compatibility mode */
1017 unsigned long value;
1018 1386
1019 /* For the spurious interrupt use vector F, and enable it */ 1387 /* For the spurious interrupt use vector F, and enable it */
1020 value = apic_read(APIC_SPIV); 1388 value = apic_read(APIC_SPIV);
@@ -1040,7 +1408,10 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1040 apic_write(APIC_LVT0, APIC_LVT_MASKED); 1408 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1041 } 1409 }
1042 1410
1043 /* For LVT1 make it edge triggered, active high, nmi and enabled */ 1411 /*
1412 * For LVT1 make it edge triggered, active high,
1413 * nmi and enabled
1414 */
1044 value = apic_read(APIC_LVT1); 1415 value = apic_read(APIC_LVT1);
1045 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | 1416 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1046 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 1417 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
@@ -1055,9 +1426,20 @@ void __cpuinit generic_processor_info(int apicid, int version)
1055 int cpu; 1426 int cpu;
1056 cpumask_t tmp_map; 1427 cpumask_t tmp_map;
1057 1428
1429 /*
1430 * Validate version
1431 */
1432 if (version == 0x0) {
1433 printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
1434 "fixing up to 0x10. (tell your hw vendor)\n",
1435 version);
1436 version = 0x10;
1437 }
1438 apic_version[apicid] = version;
1439
1058 if (num_processors >= NR_CPUS) { 1440 if (num_processors >= NR_CPUS) {
1059 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1441 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
1060 " Processor ignored.\n", NR_CPUS); 1442 " Processor ignored.\n", NR_CPUS);
1061 return; 1443 return;
1062 } 1444 }
1063 1445
@@ -1077,6 +1459,29 @@ void __cpuinit generic_processor_info(int apicid, int version)
1077 if (apicid > max_physical_apicid) 1459 if (apicid > max_physical_apicid)
1078 max_physical_apicid = apicid; 1460 max_physical_apicid = apicid;
1079 1461
1462#ifdef CONFIG_X86_32
1463 /*
1464 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1465 * but we need to work other dependencies like SMP_SUSPEND etc
1466 * before this can be done without some confusion.
1467 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
1468 * - Ashok Raj <ashok.raj@intel.com>
1469 */
1470 if (max_physical_apicid >= 8) {
1471 switch (boot_cpu_data.x86_vendor) {
1472 case X86_VENDOR_INTEL:
1473 if (!APIC_XAPIC(version)) {
1474 def_to_bigsmp = 0;
1475 break;
1476 }
1477 /* If P4 and above fall through */
1478 case X86_VENDOR_AMD:
1479 def_to_bigsmp = 1;
1480 }
1481 }
1482#endif
1483
1484#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1080 /* are we being called early in kernel startup? */ 1485 /* are we being called early in kernel startup? */
1081 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1486 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1082 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 1487 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
@@ -1088,20 +1493,28 @@ void __cpuinit generic_processor_info(int apicid, int version)
1088 per_cpu(x86_cpu_to_apicid, cpu) = apicid; 1493 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1089 per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1494 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1090 } 1495 }
1496#endif
1091 1497
1092 cpu_set(cpu, cpu_possible_map); 1498 cpu_set(cpu, cpu_possible_map);
1093 cpu_set(cpu, cpu_present_map); 1499 cpu_set(cpu, cpu_present_map);
1094} 1500}
1095 1501
1502int hard_smp_processor_id(void)
1503{
1504 return read_apic_id();
1505}
1506
1096/* 1507/*
1097 * Power management 1508 * Power management
1098 */ 1509 */
1099#ifdef CONFIG_PM 1510#ifdef CONFIG_PM
1100 1511
1101static struct { 1512static struct {
1102 /* 'active' is true if the local APIC was enabled by us and 1513 /*
1103 not the BIOS; this signifies that we are also responsible 1514 * 'active' is true if the local APIC was enabled by us and
1104 for disabling it before entering apm/acpi suspend */ 1515 * not the BIOS; this signifies that we are also responsible
1516 * for disabling it before entering apm/acpi suspend
1517 */
1105 int active; 1518 int active;
1106 /* r/w apic fields */ 1519 /* r/w apic fields */
1107 unsigned int apic_id; 1520 unsigned int apic_id;
@@ -1129,7 +1542,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1129 1542
1130 maxlvt = lapic_get_maxlvt(); 1543 maxlvt = lapic_get_maxlvt();
1131 1544
1132 apic_pm_state.apic_id = read_apic_id(); 1545 apic_pm_state.apic_id = apic_read(APIC_ID);
1133 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 1546 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1134 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 1547 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1135 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 1548 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1142,10 +1555,11 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1142 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 1555 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1143 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 1556 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1144 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 1557 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1145#ifdef CONFIG_X86_MCE_INTEL 1558#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1146 if (maxlvt >= 5) 1559 if (maxlvt >= 5)
1147 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 1560 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1148#endif 1561#endif
1562
1149 local_irq_save(flags); 1563 local_irq_save(flags);
1150 disable_local_APIC(); 1564 disable_local_APIC();
1151 local_irq_restore(flags); 1565 local_irq_restore(flags);
@@ -1164,10 +1578,25 @@ static int lapic_resume(struct sys_device *dev)
1164 maxlvt = lapic_get_maxlvt(); 1578 maxlvt = lapic_get_maxlvt();
1165 1579
1166 local_irq_save(flags); 1580 local_irq_save(flags);
1167 rdmsr(MSR_IA32_APICBASE, l, h); 1581
1168 l &= ~MSR_IA32_APICBASE_BASE; 1582#ifdef CONFIG_X86_64
1169 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1583 if (x2apic)
1170 wrmsr(MSR_IA32_APICBASE, l, h); 1584 enable_x2apic();
1585 else
1586#endif
1587 {
1588 /*
1589 * Make sure the APICBASE points to the right address
1590 *
1591 * FIXME! This will be wrong if we ever support suspend on
1592 * SMP! We'll need to do this as part of the CPU restore!
1593 */
1594 rdmsr(MSR_IA32_APICBASE, l, h);
1595 l &= ~MSR_IA32_APICBASE_BASE;
1596 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1597 wrmsr(MSR_IA32_APICBASE, l, h);
1598 }
1599
1171 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1600 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1172 apic_write(APIC_ID, apic_pm_state.apic_id); 1601 apic_write(APIC_ID, apic_pm_state.apic_id);
1173 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 1602 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1176,7 +1605,7 @@ static int lapic_resume(struct sys_device *dev)
1176 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 1605 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1177 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 1606 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1178 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 1607 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1179#ifdef CONFIG_X86_MCE_INTEL 1608#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1180 if (maxlvt >= 5) 1609 if (maxlvt >= 5)
1181 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 1610 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1182#endif 1611#endif
@@ -1190,10 +1619,17 @@ static int lapic_resume(struct sys_device *dev)
1190 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); 1619 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1191 apic_write(APIC_ESR, 0); 1620 apic_write(APIC_ESR, 0);
1192 apic_read(APIC_ESR); 1621 apic_read(APIC_ESR);
1622
1193 local_irq_restore(flags); 1623 local_irq_restore(flags);
1624
1194 return 0; 1625 return 0;
1195} 1626}
1196 1627
1628/*
1629 * This device has no shutdown method - fully functioning local APICs
1630 * are needed on every CPU up until machine_halt/restart/poweroff.
1631 */
1632
1197static struct sysdev_class lapic_sysclass = { 1633static struct sysdev_class lapic_sysclass = {
1198 .name = "lapic", 1634 .name = "lapic",
1199 .resume = lapic_resume, 1635 .resume = lapic_resume,
@@ -1307,31 +1743,19 @@ __cpuinit int apic_is_clustered_box(void)
1307 return (clusters > 2); 1743 return (clusters > 2);
1308} 1744}
1309 1745
1310/* 1746static __init int setup_nox2apic(char *str)
1311 * APIC command line parameters
1312 */
1313static int __init apic_set_verbosity(char *str)
1314{ 1747{
1315 if (str == NULL) { 1748 disable_x2apic = 1;
1316 skip_ioapic_setup = 0; 1749 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1317 ioapic_force = 1;
1318 return 0;
1319 }
1320 if (strcmp("debug", str) == 0)
1321 apic_verbosity = APIC_DEBUG;
1322 else if (strcmp("verbose", str) == 0)
1323 apic_verbosity = APIC_VERBOSE;
1324 else {
1325 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1326 " use apic=verbose or apic=debug\n", str);
1327 return -EINVAL;
1328 }
1329
1330 return 0; 1750 return 0;
1331} 1751}
1332early_param("apic", apic_set_verbosity); 1752early_param("nox2apic", setup_nox2apic);
1753
1333 1754
1334static __init int setup_disableapic(char *str) 1755/*
1756 * APIC command line parameters
1757 */
1758static int __init setup_disableapic(char *arg)
1335{ 1759{
1336 disable_apic = 1; 1760 disable_apic = 1;
1337 setup_clear_cpu_cap(X86_FEATURE_APIC); 1761 setup_clear_cpu_cap(X86_FEATURE_APIC);
@@ -1340,9 +1764,9 @@ static __init int setup_disableapic(char *str)
1340early_param("disableapic", setup_disableapic); 1764early_param("disableapic", setup_disableapic);
1341 1765
1342/* same as disableapic, for compatibility */ 1766/* same as disableapic, for compatibility */
1343static __init int setup_nolapic(char *str) 1767static int __init setup_nolapic(char *arg)
1344{ 1768{
1345 return setup_disableapic(str); 1769 return setup_disableapic(arg);
1346} 1770}
1347early_param("nolapic", setup_nolapic); 1771early_param("nolapic", setup_nolapic);
1348 1772
@@ -1353,14 +1777,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
1353} 1777}
1354early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 1778early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1355 1779
1356static __init int setup_noapictimer(char *str) 1780static int __init parse_disable_apic_timer(char *arg)
1357{ 1781{
1358 if (str[0] != ' ' && str[0] != 0)
1359 return 0;
1360 disable_apic_timer = 1; 1782 disable_apic_timer = 1;
1361 return 1; 1783 return 0;
1362} 1784}
1363__setup("noapictimer", setup_noapictimer); 1785early_param("noapictimer", parse_disable_apic_timer);
1786
1787static int __init parse_nolapic_timer(char *arg)
1788{
1789 disable_apic_timer = 1;
1790 return 0;
1791}
1792early_param("nolapic_timer", parse_nolapic_timer);
1364 1793
1365static __init int setup_apicpmtimer(char *s) 1794static __init int setup_apicpmtimer(char *s)
1366{ 1795{
@@ -1370,6 +1799,31 @@ static __init int setup_apicpmtimer(char *s)
1370} 1799}
1371__setup("apicpmtimer", setup_apicpmtimer); 1800__setup("apicpmtimer", setup_apicpmtimer);
1372 1801
1802static int __init apic_set_verbosity(char *arg)
1803{
1804 if (!arg) {
1805#ifdef CONFIG_X86_64
1806 skip_ioapic_setup = 0;
1807 ioapic_force = 1;
1808 return 0;
1809#endif
1810 return -EINVAL;
1811 }
1812
1813 if (strcmp("debug", arg) == 0)
1814 apic_verbosity = APIC_DEBUG;
1815 else if (strcmp("verbose", arg) == 0)
1816 apic_verbosity = APIC_VERBOSE;
1817 else {
1818 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1819 " use apic=verbose or apic=debug\n", arg);
1820 return -EINVAL;
1821 }
1822
1823 return 0;
1824}
1825early_param("apic", apic_set_verbosity);
1826
1373static int __init lapic_insert_resource(void) 1827static int __init lapic_insert_resource(void)
1374{ 1828{
1375 if (!apic_phys) 1829 if (!apic_phys)
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ee76eaad3001..7f0b45a5d788 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,22 +3,30 @@
3# 3#
4 4
5obj-y := intel_cacheinfo.o addon_cpuid_features.o 5obj-y := intel_cacheinfo.o addon_cpuid_features.o
6obj-y += proc.o feature_names.o 6obj-y += proc.o capflags.o powerflags.o common.o
7 7
8obj-$(CONFIG_X86_32) += common.o bugs.o 8obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
9obj-$(CONFIG_X86_64) += common_64.o bugs_64.o 9obj-$(CONFIG_X86_64) += bugs_64.o
10obj-$(CONFIG_X86_32) += amd.o 10
11obj-$(CONFIG_X86_64) += amd_64.o 11obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
12obj-$(CONFIG_X86_32) += cyrix.o 12obj-$(CONFIG_CPU_SUP_AMD) += amd.o
13obj-$(CONFIG_X86_32) += centaur.o 13obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
14obj-$(CONFIG_X86_64) += centaur_64.o 14obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o
15obj-$(CONFIG_X86_32) += transmeta.o 15obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o
16obj-$(CONFIG_X86_32) += intel.o 16obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
17obj-$(CONFIG_X86_64) += intel_64.o 17obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
18obj-$(CONFIG_X86_32) += umc.o
19 18
20obj-$(CONFIG_X86_MCE) += mcheck/ 19obj-$(CONFIG_X86_MCE) += mcheck/
21obj-$(CONFIG_MTRR) += mtrr/ 20obj-$(CONFIG_MTRR) += mtrr/
22obj-$(CONFIG_CPU_FREQ) += cpufreq/ 21obj-$(CONFIG_CPU_FREQ) += cpufreq/
23 22
24obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o 23obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
24
25quiet_cmd_mkcapflags = MKCAP $@
26 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
27
28cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h
29
30targets += capflags.c
31$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE
32 $(call if_changed,mkcapflags)
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index a6ef672adbba..0d9c993aa93e 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -7,6 +7,8 @@
7#include <asm/pat.h> 7#include <asm/pat.h>
8#include <asm/processor.h> 8#include <asm/processor.h>
9 9
10#include <mach_apic.h>
11
10struct cpuid_bit { 12struct cpuid_bit {
11 u16 feature; 13 u16 feature;
12 u8 reg; 14 u8 reg;
@@ -48,6 +50,92 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
48 } 50 }
49} 51}
50 52
53/* leaf 0xb SMT level */
54#define SMT_LEVEL 0
55
56/* leaf 0xb sub-leaf types */
57#define INVALID_TYPE 0
58#define SMT_TYPE 1
59#define CORE_TYPE 2
60
61#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
62#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
63#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
64
65/*
66 * Check for extended topology enumeration cpuid leaf 0xb and if it
67 * exists, use it for populating initial_apicid and cpu topology
68 * detection.
69 */
70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
71{
72#ifdef CONFIG_SMP
73 unsigned int eax, ebx, ecx, edx, sub_index;
74 unsigned int ht_mask_width, core_plus_mask_width;
75 unsigned int core_select_mask, core_level_siblings;
76
77 if (c->cpuid_level < 0xb)
78 return;
79
80 cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
81
82 /*
83 * check if the cpuid leaf 0xb is actually implemented.
84 */
85 if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
86 return;
87
88 set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
89
90 /*
91 * initial apic id, which also represents 32-bit extended x2apic id.
92 */
93 c->initial_apicid = edx;
94
95 /*
96 * Populate HT related information from sub-leaf level 0.
97 */
98 core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
99 core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
100
101 sub_index = 1;
102 do {
103 cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
104
105 /*
106 * Check for the Core type in the implemented sub leaves.
107 */
108 if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
109 core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
110 core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
111 break;
112 }
113
114 sub_index++;
115 } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
116
117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
118
119#ifdef CONFIG_X86_32
120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
121 & core_select_mask;
122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
123#else
124 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
125 c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
126#endif
127 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
128
129
130 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
131 c->phys_proc_id);
132 if (c->x86_max_cores > 1)
133 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
134 c->cpu_core_id);
135 return;
136#endif
137}
138
51#ifdef CONFIG_X86_PAT 139#ifdef CONFIG_X86_PAT
52void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) 140void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
53{ 141{
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 18514ed26104..32e73520adf7 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,13 +1,22 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/bitops.h> 2#include <linux/bitops.h>
3#include <linux/mm.h> 3#include <linux/mm.h>
4
4#include <asm/io.h> 5#include <asm/io.h>
5#include <asm/processor.h> 6#include <asm/processor.h>
6#include <asm/apic.h> 7#include <asm/apic.h>
7 8
9#ifdef CONFIG_X86_64
10# include <asm/numa_64.h>
11# include <asm/mmconfig.h>
12# include <asm/cacheflush.h>
13#endif
14
8#include <mach_apic.h> 15#include <mach_apic.h>
16
9#include "cpu.h" 17#include "cpu.h"
10 18
19#ifdef CONFIG_X86_32
11/* 20/*
12 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause 21 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause
13 * misexecution of code under Linux. Owners of such processors should 22 * misexecution of code under Linux. Owners of such processors should
@@ -24,26 +33,273 @@
24extern void vide(void); 33extern void vide(void);
25__asm__(".align 4\nvide: ret"); 34__asm__(".align 4\nvide: ret");
26 35
27static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) 36static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
28{ 37{
29 if (cpuid_eax(0x80000000) >= 0x80000007) { 38/*
30 c->x86_power = cpuid_edx(0x80000007); 39 * General Systems BIOSen alias the cpu frequency registers
31 if (c->x86_power & (1<<8)) 40 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
32 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 41 * drivers subsequently pokes it, and changes the CPU speed.
42 * Workaround : Remove the unneeded alias.
43 */
44#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
45#define CBAR_ENB (0x80000000)
46#define CBAR_KEY (0X000000CB)
47 if (c->x86_model == 9 || c->x86_model == 10) {
48 if (inl (CBAR) & CBAR_ENB)
49 outl (0 | CBAR_KEY, CBAR);
33 } 50 }
34
35 /* Set MTRR capability flag if appropriate */
36 if (c->x86_model == 13 || c->x86_model == 9 ||
37 (c->x86_model == 8 && c->x86_mask >= 8))
38 set_cpu_cap(c, X86_FEATURE_K6_MTRR);
39} 51}
40 52
41static void __cpuinit init_amd(struct cpuinfo_x86 *c) 53
54static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
42{ 55{
43 u32 l, h; 56 u32 l, h;
44 int mbytes = num_physpages >> (20-PAGE_SHIFT); 57 int mbytes = num_physpages >> (20-PAGE_SHIFT);
45 int r;
46 58
59 if (c->x86_model < 6) {
60 /* Based on AMD doc 20734R - June 2000 */
61 if (c->x86_model == 0) {
62 clear_cpu_cap(c, X86_FEATURE_APIC);
63 set_cpu_cap(c, X86_FEATURE_PGE);
64 }
65 return;
66 }
67
68 if (c->x86_model == 6 && c->x86_mask == 1) {
69 const int K6_BUG_LOOP = 1000000;
70 int n;
71 void (*f_vide)(void);
72 unsigned long d, d2;
73
74 printk(KERN_INFO "AMD K6 stepping B detected - ");
75
76 /*
77 * It looks like AMD fixed the 2.6.2 bug and improved indirect
78 * calls at the same time.
79 */
80
81 n = K6_BUG_LOOP;
82 f_vide = vide;
83 rdtscl(d);
84 while (n--)
85 f_vide();
86 rdtscl(d2);
87 d = d2-d;
88
89 if (d > 20*K6_BUG_LOOP)
90 printk("system stability may be impaired when more than 32 MB are used.\n");
91 else
92 printk("probably OK (after B9730xxxx).\n");
93 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
94 }
95
96 /* K6 with old style WHCR */
97 if (c->x86_model < 8 ||
98 (c->x86_model == 8 && c->x86_mask < 8)) {
99 /* We can only write allocate on the low 508Mb */
100 if (mbytes > 508)
101 mbytes = 508;
102
103 rdmsr(MSR_K6_WHCR, l, h);
104 if ((l&0x0000FFFF) == 0) {
105 unsigned long flags;
106 l = (1<<0)|((mbytes/4)<<1);
107 local_irq_save(flags);
108 wbinvd();
109 wrmsr(MSR_K6_WHCR, l, h);
110 local_irq_restore(flags);
111 printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
112 mbytes);
113 }
114 return;
115 }
116
117 if ((c->x86_model == 8 && c->x86_mask > 7) ||
118 c->x86_model == 9 || c->x86_model == 13) {
119 /* The more serious chips .. */
120
121 if (mbytes > 4092)
122 mbytes = 4092;
123
124 rdmsr(MSR_K6_WHCR, l, h);
125 if ((l&0xFFFF0000) == 0) {
126 unsigned long flags;
127 l = ((mbytes>>2)<<22)|(1<<16);
128 local_irq_save(flags);
129 wbinvd();
130 wrmsr(MSR_K6_WHCR, l, h);
131 local_irq_restore(flags);
132 printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
133 mbytes);
134 }
135
136 return;
137 }
138
139 if (c->x86_model == 10) {
140 /* AMD Geode LX is model 10 */
141 /* placeholder for any needed mods */
142 return;
143 }
144}
145
146static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
147{
148 u32 l, h;
149
150 /*
151 * Bit 15 of Athlon specific MSR 15, needs to be 0
152 * to enable SSE on Palomino/Morgan/Barton CPU's.
153 * If the BIOS didn't enable it already, enable it here.
154 */
155 if (c->x86_model >= 6 && c->x86_model <= 10) {
156 if (!cpu_has(c, X86_FEATURE_XMM)) {
157 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
158 rdmsr(MSR_K7_HWCR, l, h);
159 l &= ~0x00008000;
160 wrmsr(MSR_K7_HWCR, l, h);
161 set_cpu_cap(c, X86_FEATURE_XMM);
162 }
163 }
164
165 /*
166 * It's been determined by AMD that Athlons since model 8 stepping 1
167 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
168 * As per AMD technical note 27212 0.2
169 */
170 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
171 rdmsr(MSR_K7_CLK_CTL, l, h);
172 if ((l & 0xfff00000) != 0x20000000) {
173 printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
174 ((l & 0x000fffff)|0x20000000));
175 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
176 }
177 }
178
179 set_cpu_cap(c, X86_FEATURE_K7);
180}
181#endif
182
183#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
184static int __cpuinit nearby_node(int apicid)
185{
186 int i, node;
187
188 for (i = apicid - 1; i >= 0; i--) {
189 node = apicid_to_node[i];
190 if (node != NUMA_NO_NODE && node_online(node))
191 return node;
192 }
193 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
194 node = apicid_to_node[i];
195 if (node != NUMA_NO_NODE && node_online(node))
196 return node;
197 }
198 return first_node(node_online_map); /* Shouldn't happen */
199}
200#endif
201
202/*
203 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
204 * Assumes number of cores is a power of two.
205 */
206static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
207{
208#ifdef CONFIG_X86_HT
209 unsigned bits;
210
211 bits = c->x86_coreid_bits;
212
213 /* Low order bits define the core id (index of core in socket) */
214 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
215 /* Convert the initial APIC ID into the socket ID */
216 c->phys_proc_id = c->initial_apicid >> bits;
217#endif
218}
219
220static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
221{
222#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
223 int cpu = smp_processor_id();
224 int node;
225 unsigned apicid = hard_smp_processor_id();
226
227 node = c->phys_proc_id;
228 if (apicid_to_node[apicid] != NUMA_NO_NODE)
229 node = apicid_to_node[apicid];
230 if (!node_online(node)) {
231 /* Two possibilities here:
232 - The CPU is missing memory and no node was created.
233 In that case try picking one from a nearby CPU
234 - The APIC IDs differ from the HyperTransport node IDs
235 which the K8 northbridge parsing fills in.
236 Assume they are all increased by a constant offset,
237 but in the same order as the HT nodeids.
238 If that doesn't result in a usable node fall back to the
239 path for the previous case. */
240
241 int ht_nodeid = c->initial_apicid;
242
243 if (ht_nodeid >= 0 &&
244 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
245 node = apicid_to_node[ht_nodeid];
246 /* Pick a nearby node */
247 if (!node_online(node))
248 node = nearby_node(apicid);
249 }
250 numa_set_node(cpu, node);
251
252 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
253#endif
254}
255
256static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
257{
258#ifdef CONFIG_X86_HT
259 unsigned bits, ecx;
260
261 /* Multi core CPU? */
262 if (c->extended_cpuid_level < 0x80000008)
263 return;
264
265 ecx = cpuid_ecx(0x80000008);
266
267 c->x86_max_cores = (ecx & 0xff) + 1;
268
269 /* CPU telling us the core id bits shift? */
270 bits = (ecx >> 12) & 0xF;
271
272 /* Otherwise recompute */
273 if (bits == 0) {
274 while ((1 << bits) < c->x86_max_cores)
275 bits++;
276 }
277
278 c->x86_coreid_bits = bits;
279#endif
280}
281
282static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
283{
284 early_init_amd_mc(c);
285
286 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
287 if (c->x86_power & (1<<8))
288 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
289
290#ifdef CONFIG_X86_64
291 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
292#else
293 /* Set MTRR capability flag if appropriate */
294 if (c->x86 == 5)
295 if (c->x86_model == 13 || c->x86_model == 9 ||
296 (c->x86_model == 8 && c->x86_mask >= 8))
297 set_cpu_cap(c, X86_FEATURE_K6_MTRR);
298#endif
299}
300
301static void __cpuinit init_amd(struct cpuinfo_x86 *c)
302{
47#ifdef CONFIG_SMP 303#ifdef CONFIG_SMP
48 unsigned long long value; 304 unsigned long long value;
49 305
@@ -54,7 +310,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
54 * Errata 63 for SH-B3 steppings 310 * Errata 63 for SH-B3 steppings
55 * Errata 122 for all steppings (F+ have it disabled by default) 311 * Errata 122 for all steppings (F+ have it disabled by default)
56 */ 312 */
57 if (c->x86 == 15) { 313 if (c->x86 == 0xf) {
58 rdmsrl(MSR_K7_HWCR, value); 314 rdmsrl(MSR_K7_HWCR, value);
59 value |= 1 << 6; 315 value |= 1 << 6;
60 wrmsrl(MSR_K7_HWCR, value); 316 wrmsrl(MSR_K7_HWCR, value);
@@ -64,209 +320,119 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
64 early_init_amd(c); 320 early_init_amd(c);
65 321
66 /* 322 /*
67 * FIXME: We should handle the K5 here. Set up the write
68 * range and also turn on MSR 83 bits 4 and 31 (write alloc,
69 * no bus pipeline)
70 */
71
72 /*
73 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 323 * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
74 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 324 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
75 */ 325 */
76 clear_cpu_cap(c, 0*32+31); 326 clear_cpu_cap(c, 0*32+31);
77 327
78 r = get_model_name(c); 328#ifdef CONFIG_X86_64
329 /* On C+ stepping K8 rep microcode works well for copy/memset */
330 if (c->x86 == 0xf) {
331 u32 level;
79 332
80 switch (c->x86) { 333 level = cpuid_eax(1);
81 case 4: 334 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
82 /* 335 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
83 * General Systems BIOSen alias the cpu frequency registers
84 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
85 * drivers subsequently pokes it, and changes the CPU speed.
86 * Workaround : Remove the unneeded alias.
87 */
88#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
89#define CBAR_ENB (0x80000000)
90#define CBAR_KEY (0X000000CB)
91 if (c->x86_model == 9 || c->x86_model == 10) {
92 if (inl (CBAR) & CBAR_ENB)
93 outl (0 | CBAR_KEY, CBAR);
94 }
95 break;
96 case 5:
97 if (c->x86_model < 6) {
98 /* Based on AMD doc 20734R - June 2000 */
99 if (c->x86_model == 0) {
100 clear_cpu_cap(c, X86_FEATURE_APIC);
101 set_cpu_cap(c, X86_FEATURE_PGE);
102 }
103 break;
104 }
105
106 if (c->x86_model == 6 && c->x86_mask == 1) {
107 const int K6_BUG_LOOP = 1000000;
108 int n;
109 void (*f_vide)(void);
110 unsigned long d, d2;
111
112 printk(KERN_INFO "AMD K6 stepping B detected - ");
113
114 /*
115 * It looks like AMD fixed the 2.6.2 bug and improved indirect
116 * calls at the same time.
117 */
118
119 n = K6_BUG_LOOP;
120 f_vide = vide;
121 rdtscl(d);
122 while (n--)
123 f_vide();
124 rdtscl(d2);
125 d = d2-d;
126
127 if (d > 20*K6_BUG_LOOP)
128 printk("system stability may be impaired when more than 32 MB are used.\n");
129 else
130 printk("probably OK (after B9730xxxx).\n");
131 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
132 }
133
134 /* K6 with old style WHCR */
135 if (c->x86_model < 8 ||
136 (c->x86_model == 8 && c->x86_mask < 8)) {
137 /* We can only write allocate on the low 508Mb */
138 if (mbytes > 508)
139 mbytes = 508;
140
141 rdmsr(MSR_K6_WHCR, l, h);
142 if ((l&0x0000FFFF) == 0) {
143 unsigned long flags;
144 l = (1<<0)|((mbytes/4)<<1);
145 local_irq_save(flags);
146 wbinvd();
147 wrmsr(MSR_K6_WHCR, l, h);
148 local_irq_restore(flags);
149 printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
150 mbytes);
151 }
152 break;
153 }
154
155 if ((c->x86_model == 8 && c->x86_mask > 7) ||
156 c->x86_model == 9 || c->x86_model == 13) {
157 /* The more serious chips .. */
158
159 if (mbytes > 4092)
160 mbytes = 4092;
161
162 rdmsr(MSR_K6_WHCR, l, h);
163 if ((l&0xFFFF0000) == 0) {
164 unsigned long flags;
165 l = ((mbytes>>2)<<22)|(1<<16);
166 local_irq_save(flags);
167 wbinvd();
168 wrmsr(MSR_K6_WHCR, l, h);
169 local_irq_restore(flags);
170 printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
171 mbytes);
172 }
173
174 break;
175 }
176
177 if (c->x86_model == 10) {
178 /* AMD Geode LX is model 10 */
179 /* placeholder for any needed mods */
180 break;
181 }
182 break;
183 case 6: /* An Athlon/Duron */
184
185 /*
186 * Bit 15 of Athlon specific MSR 15, needs to be 0
187 * to enable SSE on Palomino/Morgan/Barton CPU's.
188 * If the BIOS didn't enable it already, enable it here.
189 */
190 if (c->x86_model >= 6 && c->x86_model <= 10) {
191 if (!cpu_has(c, X86_FEATURE_XMM)) {
192 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
193 rdmsr(MSR_K7_HWCR, l, h);
194 l &= ~0x00008000;
195 wrmsr(MSR_K7_HWCR, l, h);
196 set_cpu_cap(c, X86_FEATURE_XMM);
197 }
198 }
199
200 /*
201 * It's been determined by AMD that Athlons since model 8 stepping 1
202 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
203 * As per AMD technical note 27212 0.2
204 */
205 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
206 rdmsr(MSR_K7_CLK_CTL, l, h);
207 if ((l & 0xfff00000) != 0x20000000) {
208 printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
209 ((l & 0x000fffff)|0x20000000));
210 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
211 }
212 }
213 break;
214 } 336 }
337 if (c->x86 == 0x10 || c->x86 == 0x11)
338 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
339#else
340
341 /*
342 * FIXME: We should handle the K5 here. Set up the write
343 * range and also turn on MSR 83 bits 4 and 31 (write alloc,
344 * no bus pipeline)
345 */
215 346
216 switch (c->x86) { 347 switch (c->x86) {
217 case 15: 348 case 4:
218 /* Use K8 tuning for Fam10h and Fam11h */ 349 init_amd_k5(c);
219 case 0x10:
220 case 0x11:
221 set_cpu_cap(c, X86_FEATURE_K8);
222 break; 350 break;
223 case 6: 351 case 5:
224 set_cpu_cap(c, X86_FEATURE_K7); 352 init_amd_k6(c);
353 break;
354 case 6: /* An Athlon/Duron */
355 init_amd_k7(c);
225 break; 356 break;
226 } 357 }
358
359 /* K6s reports MCEs but don't actually have all the MSRs */
360 if (c->x86 < 6)
361 clear_cpu_cap(c, X86_FEATURE_MCE);
362#endif
363
364 /* Enable workaround for FXSAVE leak */
227 if (c->x86 >= 6) 365 if (c->x86 >= 6)
228 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); 366 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
229 367
230 display_cacheinfo(c); 368 if (!c->x86_model_id[0]) {
231 369 switch (c->x86) {
232 if (cpuid_eax(0x80000000) >= 0x80000008) 370 case 0xf:
233 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 371 /* Should distinguish Models here, but this is only
372 a fallback anyways. */
373 strcpy(c->x86_model_id, "Hammer");
374 break;
375 }
376 }
234 377
235#ifdef CONFIG_X86_HT 378 display_cacheinfo(c);
236 /*
237 * On a AMD multi core setup the lower bits of the APIC id
238 * distinguish the cores.
239 */
240 if (c->x86_max_cores > 1) {
241 int cpu = smp_processor_id();
242 unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
243 379
244 if (bits == 0) { 380 /* Multi core CPU? */
245 while ((1 << bits) < c->x86_max_cores) 381 if (c->extended_cpuid_level >= 0x80000008) {
246 bits++; 382 amd_detect_cmp(c);
247 } 383 srat_detect_node(c);
248 c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
249 c->phys_proc_id >>= bits;
250 printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
251 cpu, c->x86_max_cores, c->cpu_core_id);
252 } 384 }
385
386#ifdef CONFIG_X86_32
387 detect_ht(c);
253#endif 388#endif
254 389
255 if (cpuid_eax(0x80000000) >= 0x80000006) { 390 if (c->extended_cpuid_level >= 0x80000006) {
256 if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) 391 if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000))
257 num_cache_leaves = 4; 392 num_cache_leaves = 4;
258 else 393 else
259 num_cache_leaves = 3; 394 num_cache_leaves = 3;
260 } 395 }
261 396
262 /* K6s reports MCEs but don't actually have all the MSRs */ 397 if (c->x86 >= 0xf && c->x86 <= 0x11)
263 if (c->x86 < 6) 398 set_cpu_cap(c, X86_FEATURE_K8);
264 clear_cpu_cap(c, X86_FEATURE_MCE);
265 399
266 if (cpu_has_xmm2) 400 if (cpu_has_xmm2) {
401 /* MFENCE stops RDTSC speculation */
267 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); 402 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
403 }
404
405#ifdef CONFIG_X86_64
406 if (c->x86 == 0x10) {
407 /* do this for boot cpu */
408 if (c == &boot_cpu_data)
409 check_enable_amd_mmconf_dmi();
410
411 fam10h_check_enable_mmcfg();
412 }
413
414 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
415 unsigned long long tseg;
416
417 /*
418 * Split up direct mapping around the TSEG SMM area.
419 * Don't do it for gbpages because there seems very little
420 * benefit in doing so.
421 */
422 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
423 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
424 if ((tseg>>PMD_SHIFT) <
425 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
426 ((tseg>>PMD_SHIFT) <
427 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
428 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
429 set_memory_4k((unsigned long)__va(tseg), 1);
430 }
431 }
432#endif
268} 433}
269 434
435#ifdef CONFIG_X86_32
270static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) 436static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
271{ 437{
272 /* AMD errata T13 (order #21922) */ 438 /* AMD errata T13 (order #21922) */
@@ -279,10 +445,12 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int
279 } 445 }
280 return size; 446 return size;
281} 447}
448#endif
282 449
283static struct cpu_dev amd_cpu_dev __cpuinitdata = { 450static struct cpu_dev amd_cpu_dev __cpuinitdata = {
284 .c_vendor = "AMD", 451 .c_vendor = "AMD",
285 .c_ident = { "AuthenticAMD" }, 452 .c_ident = { "AuthenticAMD" },
453#ifdef CONFIG_X86_32
286 .c_models = { 454 .c_models = {
287 { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = 455 { .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
288 { 456 {
@@ -295,9 +463,11 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = {
295 } 463 }
296 }, 464 },
297 }, 465 },
466 .c_size_cache = amd_size_cache,
467#endif
298 .c_early_init = early_init_amd, 468 .c_early_init = early_init_amd,
299 .c_init = init_amd, 469 .c_init = init_amd,
300 .c_size_cache = amd_size_cache, 470 .c_x86_vendor = X86_VENDOR_AMD,
301}; 471};
302 472
303cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); 473cpu_dev_register(amd_cpu_dev);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
deleted file mode 100644
index d1692b2a41ff..000000000000
--- a/arch/x86/kernel/cpu/amd_64.c
+++ /dev/null
@@ -1,224 +0,0 @@
1#include <linux/init.h>
2#include <linux/mm.h>
3
4#include <asm/numa_64.h>
5#include <asm/mmconfig.h>
6#include <asm/cacheflush.h>
7
8#include <mach_apic.h>
9
10#include "cpu.h"
11
12int force_mwait __cpuinitdata;
13
14#ifdef CONFIG_NUMA
15static int __cpuinit nearby_node(int apicid)
16{
17 int i, node;
18
19 for (i = apicid - 1; i >= 0; i--) {
20 node = apicid_to_node[i];
21 if (node != NUMA_NO_NODE && node_online(node))
22 return node;
23 }
24 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
25 node = apicid_to_node[i];
26 if (node != NUMA_NO_NODE && node_online(node))
27 return node;
28 }
29 return first_node(node_online_map); /* Shouldn't happen */
30}
31#endif
32
33/*
34 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
35 * Assumes number of cores is a power of two.
36 */
37static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
38{
39#ifdef CONFIG_SMP
40 unsigned bits;
41#ifdef CONFIG_NUMA
42 int cpu = smp_processor_id();
43 int node = 0;
44 unsigned apicid = hard_smp_processor_id();
45#endif
46 bits = c->x86_coreid_bits;
47
48 /* Low order bits define the core id (index of core in socket) */
49 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
50 /* Convert the initial APIC ID into the socket ID */
51 c->phys_proc_id = c->initial_apicid >> bits;
52
53#ifdef CONFIG_NUMA
54 node = c->phys_proc_id;
55 if (apicid_to_node[apicid] != NUMA_NO_NODE)
56 node = apicid_to_node[apicid];
57 if (!node_online(node)) {
58 /* Two possibilities here:
59 - The CPU is missing memory and no node was created.
60 In that case try picking one from a nearby CPU
61 - The APIC IDs differ from the HyperTransport node IDs
62 which the K8 northbridge parsing fills in.
63 Assume they are all increased by a constant offset,
64 but in the same order as the HT nodeids.
65 If that doesn't result in a usable node fall back to the
66 path for the previous case. */
67
68 int ht_nodeid = c->initial_apicid;
69
70 if (ht_nodeid >= 0 &&
71 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
72 node = apicid_to_node[ht_nodeid];
73 /* Pick a nearby node */
74 if (!node_online(node))
75 node = nearby_node(apicid);
76 }
77 numa_set_node(cpu, node);
78
79 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
80#endif
81#endif
82}
83
84static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
85{
86#ifdef CONFIG_SMP
87 unsigned bits, ecx;
88
89 /* Multi core CPU? */
90 if (c->extended_cpuid_level < 0x80000008)
91 return;
92
93 ecx = cpuid_ecx(0x80000008);
94
95 c->x86_max_cores = (ecx & 0xff) + 1;
96
97 /* CPU telling us the core id bits shift? */
98 bits = (ecx >> 12) & 0xF;
99
100 /* Otherwise recompute */
101 if (bits == 0) {
102 while ((1 << bits) < c->x86_max_cores)
103 bits++;
104 }
105
106 c->x86_coreid_bits = bits;
107
108#endif
109}
110
111static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
112{
113 early_init_amd_mc(c);
114
115 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
116 if (c->x86_power & (1<<8))
117 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
118
119 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
120}
121
122static void __cpuinit init_amd(struct cpuinfo_x86 *c)
123{
124 unsigned level;
125
126#ifdef CONFIG_SMP
127 unsigned long value;
128
129 /*
130 * Disable TLB flush filter by setting HWCR.FFDIS on K8
131 * bit 6 of msr C001_0015
132 *
133 * Errata 63 for SH-B3 steppings
134 * Errata 122 for all steppings (F+ have it disabled by default)
135 */
136 if (c->x86 == 0xf) {
137 rdmsrl(MSR_K8_HWCR, value);
138 value |= 1 << 6;
139 wrmsrl(MSR_K8_HWCR, value);
140 }
141#endif
142
143 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
144 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
145 clear_cpu_cap(c, 0*32+31);
146
147 /* On C+ stepping K8 rep microcode works well for copy/memset */
148 if (c->x86 == 0xf) {
149 level = cpuid_eax(1);
150 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
151 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
152 }
153 if (c->x86 == 0x10 || c->x86 == 0x11)
154 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
155
156 /* Enable workaround for FXSAVE leak */
157 if (c->x86 >= 6)
158 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
159
160 level = get_model_name(c);
161 if (!level) {
162 switch (c->x86) {
163 case 0xf:
164 /* Should distinguish Models here, but this is only
165 a fallback anyways. */
166 strcpy(c->x86_model_id, "Hammer");
167 break;
168 }
169 }
170 display_cacheinfo(c);
171
172 /* Multi core CPU? */
173 if (c->extended_cpuid_level >= 0x80000008)
174 amd_detect_cmp(c);
175
176 if (c->extended_cpuid_level >= 0x80000006 &&
177 (cpuid_edx(0x80000006) & 0xf000))
178 num_cache_leaves = 4;
179 else
180 num_cache_leaves = 3;
181
182 if (c->x86 >= 0xf && c->x86 <= 0x11)
183 set_cpu_cap(c, X86_FEATURE_K8);
184
185 /* MFENCE stops RDTSC speculation */
186 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
187
188 if (c->x86 == 0x10) {
189 /* do this for boot cpu */
190 if (c == &boot_cpu_data)
191 check_enable_amd_mmconf_dmi();
192
193 fam10h_check_enable_mmcfg();
194 }
195
196 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
197 unsigned long long tseg;
198
199 /*
200 * Split up direct mapping around the TSEG SMM area.
201 * Don't do it for gbpages because there seems very little
202 * benefit in doing so.
203 */
204 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
205 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
206 if ((tseg>>PMD_SHIFT) <
207 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
208 ((tseg>>PMD_SHIFT) <
209 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
210 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
211 set_memory_4k((unsigned long)__va(tseg), 1);
212 }
213 }
214}
215
216static struct cpu_dev amd_cpu_dev __cpuinitdata = {
217 .c_vendor = "AMD",
218 .c_ident = { "AuthenticAMD" },
219 .c_early_init = early_init_amd,
220 .c_init = init_amd,
221};
222
223cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
224
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index a0534c04d38a..89bfdd9cacc6 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -289,7 +289,6 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
289 if (c->x86_model >= 6 && c->x86_model < 9) 289 if (c->x86_model >= 6 && c->x86_model < 9)
290 set_cpu_cap(c, X86_FEATURE_3DNOW); 290 set_cpu_cap(c, X86_FEATURE_3DNOW);
291 291
292 get_model_name(c);
293 display_cacheinfo(c); 292 display_cacheinfo(c);
294} 293}
295 294
@@ -475,6 +474,7 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
475 .c_early_init = early_init_centaur, 474 .c_early_init = early_init_centaur,
476 .c_init = init_centaur, 475 .c_init = init_centaur,
477 .c_size_cache = centaur_size_cache, 476 .c_size_cache = centaur_size_cache,
477 .c_x86_vendor = X86_VENDOR_CENTAUR,
478}; 478};
479 479
480cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev); 480cpu_dev_register(centaur_cpu_dev);
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index 1d181c40e2e1..a1625f5a1e78 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -16,9 +16,10 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
16 16
17static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 17static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
18{ 18{
19 early_init_centaur(c);
20
19 if (c->x86 == 0x6 && c->x86_model >= 0xf) { 21 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
20 c->x86_cache_alignment = c->x86_clflush_size * 2; 22 c->x86_cache_alignment = c->x86_clflush_size * 2;
21 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
22 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 23 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
23 } 24 }
24 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); 25 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
@@ -29,7 +30,8 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
29 .c_ident = { "CentaurHauls" }, 30 .c_ident = { "CentaurHauls" },
30 .c_early_init = early_init_centaur, 31 .c_early_init = early_init_centaur,
31 .c_init = init_centaur, 32 .c_init = init_centaur,
33 .c_x86_vendor = X86_VENDOR_CENTAUR,
32}; 34};
33 35
34cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev); 36cpu_dev_register(centaur_cpu_dev);
35 37
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c
new file mode 100644
index 000000000000..2056ccf572cc
--- /dev/null
+++ b/arch/x86/kernel/cpu/cmpxchg.c
@@ -0,0 +1,72 @@
1/*
2 * cmpxchg*() fallbacks for CPU not supporting these instructions
3 */
4
5#include <linux/kernel.h>
6#include <linux/smp.h>
7#include <linux/module.h>
8
9#ifndef CONFIG_X86_CMPXCHG
10unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
11{
12 u8 prev;
13 unsigned long flags;
14
15 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
16 local_irq_save(flags);
17 prev = *(u8 *)ptr;
18 if (prev == old)
19 *(u8 *)ptr = new;
20 local_irq_restore(flags);
21 return prev;
22}
23EXPORT_SYMBOL(cmpxchg_386_u8);
24
25unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
26{
27 u16 prev;
28 unsigned long flags;
29
30 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
31 local_irq_save(flags);
32 prev = *(u16 *)ptr;
33 if (prev == old)
34 *(u16 *)ptr = new;
35 local_irq_restore(flags);
36 return prev;
37}
38EXPORT_SYMBOL(cmpxchg_386_u16);
39
40unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
41{
42 u32 prev;
43 unsigned long flags;
44
45 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
46 local_irq_save(flags);
47 prev = *(u32 *)ptr;
48 if (prev == old)
49 *(u32 *)ptr = new;
50 local_irq_restore(flags);
51 return prev;
52}
53EXPORT_SYMBOL(cmpxchg_386_u32);
54#endif
55
56#ifndef CONFIG_X86_CMPXCHG64
57unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
58{
59 u64 prev;
60 unsigned long flags;
61
62 /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
63 local_irq_save(flags);
64 prev = *(u64 *)ptr;
65 if (prev == old)
66 *(u64 *)ptr = new;
67 local_irq_restore(flags);
68 return prev;
69}
70EXPORT_SYMBOL(cmpxchg_486_u64);
71#endif
72
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4e456bd955bb..7581b62df184 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,28 +1,62 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
2#include <linux/string.h> 4#include <linux/string.h>
5#include <linux/bootmem.h>
6#include <linux/bitops.h>
7#include <linux/module.h>
8#include <linux/kgdb.h>
9#include <linux/topology.h>
3#include <linux/delay.h> 10#include <linux/delay.h>
4#include <linux/smp.h> 11#include <linux/smp.h>
5#include <linux/module.h>
6#include <linux/percpu.h> 12#include <linux/percpu.h>
7#include <linux/bootmem.h>
8#include <asm/processor.h>
9#include <asm/i387.h> 13#include <asm/i387.h>
10#include <asm/msr.h> 14#include <asm/msr.h>
11#include <asm/io.h> 15#include <asm/io.h>
16#include <asm/linkage.h>
12#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
13#include <asm/mtrr.h> 18#include <asm/mtrr.h>
14#include <asm/mce.h> 19#include <asm/mce.h>
15#include <asm/pat.h> 20#include <asm/pat.h>
16#include <asm/asm.h> 21#include <asm/asm.h>
22#include <asm/numa.h>
17#ifdef CONFIG_X86_LOCAL_APIC 23#ifdef CONFIG_X86_LOCAL_APIC
18#include <asm/mpspec.h> 24#include <asm/mpspec.h>
19#include <asm/apic.h> 25#include <asm/apic.h>
20#include <mach_apic.h> 26#include <mach_apic.h>
27#include <asm/genapic.h>
21#endif 28#endif
22 29
30#include <asm/pda.h>
31#include <asm/pgtable.h>
32#include <asm/processor.h>
33#include <asm/desc.h>
34#include <asm/atomic.h>
35#include <asm/proto.h>
36#include <asm/sections.h>
37#include <asm/setup.h>
38
23#include "cpu.h" 39#include "cpu.h"
24 40
41static struct cpu_dev *this_cpu __cpuinitdata;
42
43#ifdef CONFIG_X86_64
44/* We need valid kernel segments for data and code in long mode too
45 * IRET will check the segment types kkeil 2000/10/28
46 * Also sysret mandates a special GDT layout
47 */
48/* The TLS descriptors are currently at a different place compared to i386.
49 Hopefully nobody expects them at a fixed place (Wine?) */
25DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 50DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
51 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
52 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
53 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
54 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
55 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
56 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
57} };
58#else
59DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
26 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 60 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
27 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 61 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
28 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 62 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -56,17 +90,150 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
56 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 90 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
57 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 91 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
58} }; 92} };
93#endif
59EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 94EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
60 95
61__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; 96#ifdef CONFIG_X86_32
62
63static int cachesize_override __cpuinitdata = -1; 97static int cachesize_override __cpuinitdata = -1;
64static int disable_x86_serial_nr __cpuinitdata = 1; 98static int disable_x86_serial_nr __cpuinitdata = 1;
65 99
66struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; 100static int __init cachesize_setup(char *str)
101{
102 get_option(&str, &cachesize_override);
103 return 1;
104}
105__setup("cachesize=", cachesize_setup);
106
107static int __init x86_fxsr_setup(char *s)
108{
109 setup_clear_cpu_cap(X86_FEATURE_FXSR);
110 setup_clear_cpu_cap(X86_FEATURE_XMM);
111 return 1;
112}
113__setup("nofxsr", x86_fxsr_setup);
114
115static int __init x86_sep_setup(char *s)
116{
117 setup_clear_cpu_cap(X86_FEATURE_SEP);
118 return 1;
119}
120__setup("nosep", x86_sep_setup);
121
122/* Standard macro to see if a specific flag is changeable */
123static inline int flag_is_changeable_p(u32 flag)
124{
125 u32 f1, f2;
126
127 asm("pushfl\n\t"
128 "pushfl\n\t"
129 "popl %0\n\t"
130 "movl %0,%1\n\t"
131 "xorl %2,%0\n\t"
132 "pushl %0\n\t"
133 "popfl\n\t"
134 "pushfl\n\t"
135 "popl %0\n\t"
136 "popfl\n\t"
137 : "=&r" (f1), "=&r" (f2)
138 : "ir" (flag));
139
140 return ((f1^f2) & flag) != 0;
141}
142
143/* Probe for the CPUID instruction */
144static int __cpuinit have_cpuid_p(void)
145{
146 return flag_is_changeable_p(X86_EFLAGS_ID);
147}
148
149static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
150{
151 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
152 /* Disable processor serial number */
153 unsigned long lo, hi;
154 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
155 lo |= 0x200000;
156 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
157 printk(KERN_NOTICE "CPU serial number disabled.\n");
158 clear_cpu_cap(c, X86_FEATURE_PN);
159
160 /* Disabling the serial number may affect the cpuid level */
161 c->cpuid_level = cpuid_eax(0);
162 }
163}
164
165static int __init x86_serial_nr_setup(char *s)
166{
167 disable_x86_serial_nr = 0;
168 return 1;
169}
170__setup("serialnumber", x86_serial_nr_setup);
171#else
172static inline int flag_is_changeable_p(u32 flag)
173{
174 return 1;
175}
176/* Probe for the CPUID instruction */
177static inline int have_cpuid_p(void)
178{
179 return 1;
180}
181static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
182{
183}
184#endif
185
186/*
187 * Naming convention should be: <Name> [(<Codename>)]
188 * This table only is used unless init_<vendor>() below doesn't set it;
189 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
190 *
191 */
192
193/* Look up CPU names by table lookup. */
194static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
195{
196 struct cpu_model_info *info;
197
198 if (c->x86_model >= 16)
199 return NULL; /* Range check */
200
201 if (!this_cpu)
202 return NULL;
203
204 info = this_cpu->c_models;
205
206 while (info && info->family) {
207 if (info->family == c->x86)
208 return info->model_names[c->x86_model];
209 info++;
210 }
211 return NULL; /* Not found */
212}
213
214__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
215
216/* Current gdt points %fs at the "master" per-cpu area: after this,
217 * it's on the real one. */
218void switch_to_new_gdt(void)
219{
220 struct desc_ptr gdt_descr;
221
222 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
223 gdt_descr.size = GDT_SIZE - 1;
224 load_gdt(&gdt_descr);
225#ifdef CONFIG_X86_32
226 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
227#endif
228}
229
230static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
67 231
68static void __cpuinit default_init(struct cpuinfo_x86 *c) 232static void __cpuinit default_init(struct cpuinfo_x86 *c)
69{ 233{
234#ifdef CONFIG_X86_64
235 display_cacheinfo(c);
236#else
70 /* Not much we can do here... */ 237 /* Not much we can do here... */
71 /* Check if at least it has cpuid */ 238 /* Check if at least it has cpuid */
72 if (c->cpuid_level == -1) { 239 if (c->cpuid_level == -1) {
@@ -76,28 +243,22 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c)
76 else if (c->x86 == 3) 243 else if (c->x86 == 3)
77 strcpy(c->x86_model_id, "386"); 244 strcpy(c->x86_model_id, "386");
78 } 245 }
246#endif
79} 247}
80 248
81static struct cpu_dev __cpuinitdata default_cpu = { 249static struct cpu_dev __cpuinitdata default_cpu = {
82 .c_init = default_init, 250 .c_init = default_init,
83 .c_vendor = "Unknown", 251 .c_vendor = "Unknown",
252 .c_x86_vendor = X86_VENDOR_UNKNOWN,
84}; 253};
85static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
86 254
87static int __init cachesize_setup(char *str) 255static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
88{
89 get_option(&str, &cachesize_override);
90 return 1;
91}
92__setup("cachesize=", cachesize_setup);
93
94int __cpuinit get_model_name(struct cpuinfo_x86 *c)
95{ 256{
96 unsigned int *v; 257 unsigned int *v;
97 char *p, *q; 258 char *p, *q;
98 259
99 if (cpuid_eax(0x80000000) < 0x80000004) 260 if (c->extended_cpuid_level < 0x80000004)
100 return 0; 261 return;
101 262
102 v = (unsigned int *) c->x86_model_id; 263 v = (unsigned int *) c->x86_model_id;
103 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); 264 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
@@ -116,30 +277,34 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
116 while (q <= &c->x86_model_id[48]) 277 while (q <= &c->x86_model_id[48])
117 *q++ = '\0'; /* Zero-pad the rest */ 278 *q++ = '\0'; /* Zero-pad the rest */
118 } 279 }
119
120 return 1;
121} 280}
122 281
123
124void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) 282void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
125{ 283{
126 unsigned int n, dummy, ecx, edx, l2size; 284 unsigned int n, dummy, ebx, ecx, edx, l2size;
127 285
128 n = cpuid_eax(0x80000000); 286 n = c->extended_cpuid_level;
129 287
130 if (n >= 0x80000005) { 288 if (n >= 0x80000005) {
131 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); 289 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
132 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", 290 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
133 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); 291 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
134 c->x86_cache_size = (ecx>>24)+(edx>>24); 292 c->x86_cache_size = (ecx>>24) + (edx>>24);
293#ifdef CONFIG_X86_64
294 /* On K8 L1 TLB is inclusive, so don't count it */
295 c->x86_tlbsize = 0;
296#endif
135 } 297 }
136 298
137 if (n < 0x80000006) /* Some chips just has a large L1. */ 299 if (n < 0x80000006) /* Some chips just has a large L1. */
138 return; 300 return;
139 301
140 ecx = cpuid_ecx(0x80000006); 302 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
141 l2size = ecx >> 16; 303 l2size = ecx >> 16;
142 304
305#ifdef CONFIG_X86_64
306 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
307#else
143 /* do processor-specific cache resizing */ 308 /* do processor-specific cache resizing */
144 if (this_cpu->c_size_cache) 309 if (this_cpu->c_size_cache)
145 l2size = this_cpu->c_size_cache(c, l2size); 310 l2size = this_cpu->c_size_cache(c, l2size);
@@ -150,116 +315,106 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
150 315
151 if (l2size == 0) 316 if (l2size == 0)
152 return; /* Again, no L2 cache is possible */ 317 return; /* Again, no L2 cache is possible */
318#endif
153 319
154 c->x86_cache_size = l2size; 320 c->x86_cache_size = l2size;
155 321
156 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", 322 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
157 l2size, ecx & 0xFF); 323 l2size, ecx & 0xFF);
158} 324}
159 325
160/* 326void __cpuinit detect_ht(struct cpuinfo_x86 *c)
161 * Naming convention should be: <Name> [(<Codename>)]
162 * This table only is used unless init_<vendor>() below doesn't set it;
163 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
164 *
165 */
166
167/* Look up CPU names by table lookup. */
168static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
169{ 327{
170 struct cpu_model_info *info; 328#ifdef CONFIG_X86_HT
329 u32 eax, ebx, ecx, edx;
330 int index_msb, core_bits;
171 331
172 if (c->x86_model >= 16) 332 if (!cpu_has(c, X86_FEATURE_HT))
173 return NULL; /* Range check */ 333 return;
174 334
175 if (!this_cpu) 335 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
176 return NULL; 336 goto out;
177 337
178 info = this_cpu->c_models; 338 if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
339 return;
179 340
180 while (info && info->family) { 341 cpuid(1, &eax, &ebx, &ecx, &edx);
181 if (info->family == c->x86) 342
182 return info->model_names[c->x86_model]; 343 smp_num_siblings = (ebx & 0xff0000) >> 16;
183 info++; 344
345 if (smp_num_siblings == 1) {
346 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
347 } else if (smp_num_siblings > 1) {
348
349 if (smp_num_siblings > NR_CPUS) {
350 printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
351 smp_num_siblings);
352 smp_num_siblings = 1;
353 return;
354 }
355
356 index_msb = get_count_order(smp_num_siblings);
357#ifdef CONFIG_X86_64
358 c->phys_proc_id = phys_pkg_id(index_msb);
359#else
360 c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
361#endif
362
363 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
364
365 index_msb = get_count_order(smp_num_siblings);
366
367 core_bits = get_count_order(c->x86_max_cores);
368
369#ifdef CONFIG_X86_64
370 c->cpu_core_id = phys_pkg_id(index_msb) &
371 ((1 << core_bits) - 1);
372#else
373 c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
374 ((1 << core_bits) - 1);
375#endif
184 } 376 }
185 return NULL; /* Not found */
186}
187 377
378out:
379 if ((c->x86_max_cores * smp_num_siblings) > 1) {
380 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
381 c->phys_proc_id);
382 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
383 c->cpu_core_id);
384 }
385#endif
386}
188 387
189static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) 388static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
190{ 389{
191 char *v = c->x86_vendor_id; 390 char *v = c->x86_vendor_id;
192 int i; 391 int i;
193 static int printed; 392 static int printed;
194 393
195 for (i = 0; i < X86_VENDOR_NUM; i++) { 394 for (i = 0; i < X86_VENDOR_NUM; i++) {
196 if (cpu_devs[i]) { 395 if (!cpu_devs[i])
197 if (!strcmp(v, cpu_devs[i]->c_ident[0]) || 396 break;
198 (cpu_devs[i]->c_ident[1] && 397
199 !strcmp(v, cpu_devs[i]->c_ident[1]))) { 398 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
200 c->x86_vendor = i; 399 (cpu_devs[i]->c_ident[1] &&
201 if (!early) 400 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
202 this_cpu = cpu_devs[i]; 401 this_cpu = cpu_devs[i];
203 return; 402 c->x86_vendor = this_cpu->c_x86_vendor;
204 } 403 return;
205 } 404 }
206 } 405 }
406
207 if (!printed) { 407 if (!printed) {
208 printed++; 408 printed++;
209 printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); 409 printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
210 printk(KERN_ERR "CPU: Your system may be unstable.\n"); 410 printk(KERN_ERR "CPU: Your system may be unstable.\n");
211 } 411 }
412
212 c->x86_vendor = X86_VENDOR_UNKNOWN; 413 c->x86_vendor = X86_VENDOR_UNKNOWN;
213 this_cpu = &default_cpu; 414 this_cpu = &default_cpu;
214} 415}
215 416
216 417void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
217static int __init x86_fxsr_setup(char *s)
218{
219 setup_clear_cpu_cap(X86_FEATURE_FXSR);
220 setup_clear_cpu_cap(X86_FEATURE_XMM);
221 return 1;
222}
223__setup("nofxsr", x86_fxsr_setup);
224
225
226static int __init x86_sep_setup(char *s)
227{
228 setup_clear_cpu_cap(X86_FEATURE_SEP);
229 return 1;
230}
231__setup("nosep", x86_sep_setup);
232
233
234/* Standard macro to see if a specific flag is changeable */
235static inline int flag_is_changeable_p(u32 flag)
236{
237 u32 f1, f2;
238
239 asm("pushfl\n\t"
240 "pushfl\n\t"
241 "popl %0\n\t"
242 "movl %0,%1\n\t"
243 "xorl %2,%0\n\t"
244 "pushl %0\n\t"
245 "popfl\n\t"
246 "pushfl\n\t"
247 "popl %0\n\t"
248 "popfl\n\t"
249 : "=&r" (f1), "=&r" (f2)
250 : "ir" (flag));
251
252 return ((f1^f2) & flag) != 0;
253}
254
255
256/* Probe for the CPUID instruction */
257static int __cpuinit have_cpuid_p(void)
258{
259 return flag_is_changeable_p(X86_EFLAGS_ID);
260}
261
262void __init cpu_detect(struct cpuinfo_x86 *c)
263{ 418{
264 /* Get vendor name */ 419 /* Get vendor name */
265 cpuid(0x00000000, (unsigned int *)&c->cpuid_level, 420 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
@@ -268,50 +423,87 @@ void __init cpu_detect(struct cpuinfo_x86 *c)
268 (unsigned int *)&c->x86_vendor_id[4]); 423 (unsigned int *)&c->x86_vendor_id[4]);
269 424
270 c->x86 = 4; 425 c->x86 = 4;
426 /* Intel-defined flags: level 0x00000001 */
271 if (c->cpuid_level >= 0x00000001) { 427 if (c->cpuid_level >= 0x00000001) {
272 u32 junk, tfms, cap0, misc; 428 u32 junk, tfms, cap0, misc;
273 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 429 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
274 c->x86 = (tfms >> 8) & 15; 430 c->x86 = (tfms >> 8) & 0xf;
275 c->x86_model = (tfms >> 4) & 15; 431 c->x86_model = (tfms >> 4) & 0xf;
432 c->x86_mask = tfms & 0xf;
276 if (c->x86 == 0xf) 433 if (c->x86 == 0xf)
277 c->x86 += (tfms >> 20) & 0xff; 434 c->x86 += (tfms >> 20) & 0xff;
278 if (c->x86 >= 0x6) 435 if (c->x86 >= 0x6)
279 c->x86_model += ((tfms >> 16) & 0xF) << 4; 436 c->x86_model += ((tfms >> 16) & 0xf) << 4;
280 c->x86_mask = tfms & 15;
281 if (cap0 & (1<<19)) { 437 if (cap0 & (1<<19)) {
282 c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
283 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 438 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
439 c->x86_cache_alignment = c->x86_clflush_size;
284 } 440 }
285 } 441 }
286} 442}
287static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) 443
444static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
288{ 445{
289 u32 tfms, xlvl; 446 u32 tfms, xlvl;
290 unsigned int ebx; 447 u32 ebx;
291 448
292 memset(&c->x86_capability, 0, sizeof c->x86_capability); 449 /* Intel-defined flags: level 0x00000001 */
293 if (have_cpuid_p()) { 450 if (c->cpuid_level >= 0x00000001) {
294 /* Intel-defined flags: level 0x00000001 */ 451 u32 capability, excap;
295 if (c->cpuid_level >= 0x00000001) { 452 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
296 u32 capability, excap; 453 c->x86_capability[0] = capability;
297 cpuid(0x00000001, &tfms, &ebx, &excap, &capability); 454 c->x86_capability[4] = excap;
298 c->x86_capability[0] = capability; 455 }
299 c->x86_capability[4] = excap;
300 }
301 456
302 /* AMD-defined flags: level 0x80000001 */ 457 /* AMD-defined flags: level 0x80000001 */
303 xlvl = cpuid_eax(0x80000000); 458 xlvl = cpuid_eax(0x80000000);
304 if ((xlvl & 0xffff0000) == 0x80000000) { 459 c->extended_cpuid_level = xlvl;
305 if (xlvl >= 0x80000001) { 460 if ((xlvl & 0xffff0000) == 0x80000000) {
306 c->x86_capability[1] = cpuid_edx(0x80000001); 461 if (xlvl >= 0x80000001) {
307 c->x86_capability[6] = cpuid_ecx(0x80000001); 462 c->x86_capability[1] = cpuid_edx(0x80000001);
308 } 463 c->x86_capability[6] = cpuid_ecx(0x80000001);
309 } 464 }
465 }
310 466
467#ifdef CONFIG_X86_64
468 if (c->extended_cpuid_level >= 0x80000008) {
469 u32 eax = cpuid_eax(0x80000008);
470
471 c->x86_virt_bits = (eax >> 8) & 0xff;
472 c->x86_phys_bits = eax & 0xff;
311 } 473 }
474#endif
475
476 if (c->extended_cpuid_level >= 0x80000007)
477 c->x86_power = cpuid_edx(0x80000007);
312 478
313} 479}
314 480
481static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
482{
483#ifdef CONFIG_X86_32
484 int i;
485
486 /*
487 * First of all, decide if this is a 486 or higher
488 * It's a 486 if we can modify the AC flag
489 */
490 if (flag_is_changeable_p(X86_EFLAGS_AC))
491 c->x86 = 4;
492 else
493 c->x86 = 3;
494
495 for (i = 0; i < X86_VENDOR_NUM; i++)
496 if (cpu_devs[i] && cpu_devs[i]->c_identify) {
497 c->x86_vendor_id[0] = 0;
498 cpu_devs[i]->c_identify(c);
499 if (c->x86_vendor_id[0]) {
500 get_cpu_vendor(c);
501 break;
502 }
503 }
504#endif
505}
506
315/* 507/*
316 * Do minimum CPU detection early. 508 * Do minimum CPU detection early.
317 * Fields really needed: vendor, cpuid_level, family, model, mask, 509 * Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -321,25 +513,61 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
321 * WARNING: this function is only called on the BP. Don't add code here 513 * WARNING: this function is only called on the BP. Don't add code here
322 * that is supposed to run on all CPUs. 514 * that is supposed to run on all CPUs.
323 */ 515 */
324static void __init early_cpu_detect(void) 516static void __init early_identify_cpu(struct cpuinfo_x86 *c)
325{ 517{
326 struct cpuinfo_x86 *c = &boot_cpu_data; 518#ifdef CONFIG_X86_64
327 519 c->x86_clflush_size = 64;
328 c->x86_cache_alignment = 32; 520#else
329 c->x86_clflush_size = 32; 521 c->x86_clflush_size = 32;
522#endif
523 c->x86_cache_alignment = c->x86_clflush_size;
524
525 memset(&c->x86_capability, 0, sizeof c->x86_capability);
526 c->extended_cpuid_level = 0;
330 527
331 if (!have_cpuid_p()) 528 if (!have_cpuid_p())
529 identify_cpu_without_cpuid(c);
530
531 /* cyrix could have cpuid enabled via c_identify()*/
532 if (!have_cpuid_p())
332 return; 533 return;
333 534
334 cpu_detect(c); 535 cpu_detect(c);
335 536
336 get_cpu_vendor(c, 1); 537 get_cpu_vendor(c);
337 538
338 early_get_cap(c); 539 get_cpu_cap(c);
339 540
340 if (c->x86_vendor != X86_VENDOR_UNKNOWN && 541 if (this_cpu->c_early_init)
341 cpu_devs[c->x86_vendor]->c_early_init) 542 this_cpu->c_early_init(c);
342 cpu_devs[c->x86_vendor]->c_early_init(c); 543
544 validate_pat_support(c);
545}
546
547void __init early_cpu_init(void)
548{
549 struct cpu_dev **cdev;
550 int count = 0;
551
552 printk("KERNEL supported cpus:\n");
553 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
554 struct cpu_dev *cpudev = *cdev;
555 unsigned int j;
556
557 if (count >= X86_VENDOR_NUM)
558 break;
559 cpu_devs[count] = cpudev;
560 count++;
561
562 for (j = 0; j < 2; j++) {
563 if (!cpudev->c_ident[j])
564 continue;
565 printk(" %s %s\n", cpudev->c_vendor,
566 cpudev->c_ident[j]);
567 }
568 }
569
570 early_identify_cpu(&boot_cpu_data);
343} 571}
344 572
345/* 573/*
@@ -357,86 +585,41 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
357 585
358static void __cpuinit generic_identify(struct cpuinfo_x86 *c) 586static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
359{ 587{
360 u32 tfms, xlvl; 588 c->extended_cpuid_level = 0;
361 unsigned int ebx;
362
363 if (have_cpuid_p()) {
364 /* Get vendor name */
365 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
366 (unsigned int *)&c->x86_vendor_id[0],
367 (unsigned int *)&c->x86_vendor_id[8],
368 (unsigned int *)&c->x86_vendor_id[4]);
369
370 get_cpu_vendor(c, 0);
371 /* Initialize the standard set of capabilities */
372 /* Note that the vendor-specific code below might override */
373 /* Intel-defined flags: level 0x00000001 */
374 if (c->cpuid_level >= 0x00000001) {
375 u32 capability, excap;
376 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
377 c->x86_capability[0] = capability;
378 c->x86_capability[4] = excap;
379 c->x86 = (tfms >> 8) & 15;
380 c->x86_model = (tfms >> 4) & 15;
381 if (c->x86 == 0xf)
382 c->x86 += (tfms >> 20) & 0xff;
383 if (c->x86 >= 0x6)
384 c->x86_model += ((tfms >> 16) & 0xF) << 4;
385 c->x86_mask = tfms & 15;
386 c->initial_apicid = (ebx >> 24) & 0xFF;
387#ifdef CONFIG_X86_HT
388 c->apicid = phys_pkg_id(c->initial_apicid, 0);
389 c->phys_proc_id = c->initial_apicid;
390#else
391 c->apicid = c->initial_apicid;
392#endif
393 if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
394 c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
395 } else {
396 /* Have CPUID level 0 only - unheard of */
397 c->x86 = 4;
398 }
399 589
400 /* AMD-defined flags: level 0x80000001 */ 590 if (!have_cpuid_p())
401 xlvl = cpuid_eax(0x80000000); 591 identify_cpu_without_cpuid(c);
402 if ((xlvl & 0xffff0000) == 0x80000000) {
403 if (xlvl >= 0x80000001) {
404 c->x86_capability[1] = cpuid_edx(0x80000001);
405 c->x86_capability[6] = cpuid_ecx(0x80000001);
406 }
407 if (xlvl >= 0x80000004)
408 get_model_name(c); /* Default name */
409 }
410 592
411 init_scattered_cpuid_features(c); 593 /* cyrix could have cpuid enabled via c_identify()*/
412 detect_nopl(c); 594 if (!have_cpuid_p())
413 } 595 return;
414}
415 596
416static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 597 cpu_detect(c);
417{
418 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
419 /* Disable processor serial number */
420 unsigned long lo, hi;
421 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
422 lo |= 0x200000;
423 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
424 printk(KERN_NOTICE "CPU serial number disabled.\n");
425 clear_cpu_cap(c, X86_FEATURE_PN);
426 598
427 /* Disabling the serial number may affect the cpuid level */ 599 get_cpu_vendor(c);
428 c->cpuid_level = cpuid_eax(0);
429 }
430}
431 600
432static int __init x86_serial_nr_setup(char *s) 601 get_cpu_cap(c);
433{
434 disable_x86_serial_nr = 0;
435 return 1;
436}
437__setup("serialnumber", x86_serial_nr_setup);
438 602
603 if (c->cpuid_level >= 0x00000001) {
604 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
605#ifdef CONFIG_X86_32
606# ifdef CONFIG_X86_HT
607 c->apicid = phys_pkg_id(c->initial_apicid, 0);
608# else
609 c->apicid = c->initial_apicid;
610# endif
611#endif
439 612
613#ifdef CONFIG_X86_HT
614 c->phys_proc_id = c->initial_apicid;
615#endif
616 }
617
618 get_model_name(c); /* Default name */
619
620 init_scattered_cpuid_features(c);
621 detect_nopl(c);
622}
440 623
441/* 624/*
442 * This does the hard work of actually picking apart the CPU stuff... 625 * This does the hard work of actually picking apart the CPU stuff...
@@ -448,30 +631,29 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
448 c->loops_per_jiffy = loops_per_jiffy; 631 c->loops_per_jiffy = loops_per_jiffy;
449 c->x86_cache_size = -1; 632 c->x86_cache_size = -1;
450 c->x86_vendor = X86_VENDOR_UNKNOWN; 633 c->x86_vendor = X86_VENDOR_UNKNOWN;
451 c->cpuid_level = -1; /* CPUID not detected */
452 c->x86_model = c->x86_mask = 0; /* So far unknown... */ 634 c->x86_model = c->x86_mask = 0; /* So far unknown... */
453 c->x86_vendor_id[0] = '\0'; /* Unset */ 635 c->x86_vendor_id[0] = '\0'; /* Unset */
454 c->x86_model_id[0] = '\0'; /* Unset */ 636 c->x86_model_id[0] = '\0'; /* Unset */
455 c->x86_max_cores = 1; 637 c->x86_max_cores = 1;
638 c->x86_coreid_bits = 0;
639#ifdef CONFIG_X86_64
640 c->x86_clflush_size = 64;
641#else
642 c->cpuid_level = -1; /* CPUID not detected */
456 c->x86_clflush_size = 32; 643 c->x86_clflush_size = 32;
644#endif
645 c->x86_cache_alignment = c->x86_clflush_size;
457 memset(&c->x86_capability, 0, sizeof c->x86_capability); 646 memset(&c->x86_capability, 0, sizeof c->x86_capability);
458 647
459 if (!have_cpuid_p()) {
460 /*
461 * First of all, decide if this is a 486 or higher
462 * It's a 486 if we can modify the AC flag
463 */
464 if (flag_is_changeable_p(X86_EFLAGS_AC))
465 c->x86 = 4;
466 else
467 c->x86 = 3;
468 }
469
470 generic_identify(c); 648 generic_identify(c);
471 649
472 if (this_cpu->c_identify) 650 if (this_cpu->c_identify)
473 this_cpu->c_identify(c); 651 this_cpu->c_identify(c);
474 652
653#ifdef CONFIG_X86_64
654 c->apicid = phys_pkg_id(0);
655#endif
656
475 /* 657 /*
476 * Vendor-specific initialization. In this section we 658 * Vendor-specific initialization. In this section we
477 * canonicalize the feature flags, meaning if there are 659 * canonicalize the feature flags, meaning if there are
@@ -505,6 +687,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
505 c->x86, c->x86_model); 687 c->x86, c->x86_model);
506 } 688 }
507 689
690#ifdef CONFIG_X86_64
691 detect_ht(c);
692#endif
693
508 /* 694 /*
509 * On SMP, boot_cpu_data holds the common feature set between 695 * On SMP, boot_cpu_data holds the common feature set between
510 * all CPUs; so make sure that we indicate which features are 696 * all CPUs; so make sure that we indicate which features are
@@ -513,7 +699,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
513 */ 699 */
514 if (c != &boot_cpu_data) { 700 if (c != &boot_cpu_data) {
515 /* AND the already accumulated flags with these */ 701 /* AND the already accumulated flags with these */
516 for (i = 0 ; i < NCAPINTS ; i++) 702 for (i = 0; i < NCAPINTS; i++)
517 boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; 703 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
518 } 704 }
519 705
@@ -521,72 +707,79 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
521 for (i = 0; i < NCAPINTS; i++) 707 for (i = 0; i < NCAPINTS; i++)
522 c->x86_capability[i] &= ~cleared_cpu_caps[i]; 708 c->x86_capability[i] &= ~cleared_cpu_caps[i];
523 709
710#ifdef CONFIG_X86_MCE
524 /* Init Machine Check Exception if available. */ 711 /* Init Machine Check Exception if available. */
525 mcheck_init(c); 712 mcheck_init(c);
713#endif
526 714
527 select_idle_routine(c); 715 select_idle_routine(c);
716
717#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
718 numa_add_cpu(smp_processor_id());
719#endif
528} 720}
529 721
530void __init identify_boot_cpu(void) 722void __init identify_boot_cpu(void)
531{ 723{
532 identify_cpu(&boot_cpu_data); 724 identify_cpu(&boot_cpu_data);
725#ifdef CONFIG_X86_32
533 sysenter_setup(); 726 sysenter_setup();
534 enable_sep_cpu(); 727 enable_sep_cpu();
728#endif
535} 729}
536 730
537void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 731void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
538{ 732{
539 BUG_ON(c == &boot_cpu_data); 733 BUG_ON(c == &boot_cpu_data);
540 identify_cpu(c); 734 identify_cpu(c);
735#ifdef CONFIG_X86_32
541 enable_sep_cpu(); 736 enable_sep_cpu();
737#endif
542 mtrr_ap_init(); 738 mtrr_ap_init();
543} 739}
544 740
545#ifdef CONFIG_X86_HT 741struct msr_range {
546void __cpuinit detect_ht(struct cpuinfo_x86 *c) 742 unsigned min;
547{ 743 unsigned max;
548 u32 eax, ebx, ecx, edx; 744};
549 int index_msb, core_bits;
550
551 cpuid(1, &eax, &ebx, &ecx, &edx);
552
553 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
554 return;
555
556 smp_num_siblings = (ebx & 0xff0000) >> 16;
557 745
558 if (smp_num_siblings == 1) { 746static struct msr_range msr_range_array[] __cpuinitdata = {
559 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 747 { 0x00000000, 0x00000418},
560 } else if (smp_num_siblings > 1) { 748 { 0xc0000000, 0xc000040b},
749 { 0xc0010000, 0xc0010142},
750 { 0xc0011000, 0xc001103b},
751};
561 752
562 if (smp_num_siblings > NR_CPUS) { 753static void __cpuinit print_cpu_msr(void)
563 printk(KERN_WARNING "CPU: Unsupported number of the " 754{
564 "siblings %d", smp_num_siblings); 755 unsigned index;
565 smp_num_siblings = 1; 756 u64 val;
566 return; 757 int i;
758 unsigned index_min, index_max;
759
760 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
761 index_min = msr_range_array[i].min;
762 index_max = msr_range_array[i].max;
763 for (index = index_min; index < index_max; index++) {
764 if (rdmsrl_amd_safe(index, &val))
765 continue;
766 printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
567 } 767 }
768 }
769}
568 770
569 index_msb = get_count_order(smp_num_siblings); 771static int show_msr __cpuinitdata;
570 c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); 772static __init int setup_show_msr(char *arg)
571 773{
572 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 774 int num;
573 c->phys_proc_id);
574
575 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
576
577 index_msb = get_count_order(smp_num_siblings) ;
578 775
579 core_bits = get_count_order(c->x86_max_cores); 776 get_option(&arg, &num);
580 777
581 c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & 778 if (num > 0)
582 ((1 << core_bits) - 1); 779 show_msr = num;
583 780 return 1;
584 if (c->x86_max_cores > 1)
585 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
586 c->cpu_core_id);
587 }
588} 781}
589#endif 782__setup("show_msr=", setup_show_msr);
590 783
591static __init int setup_noclflush(char *arg) 784static __init int setup_noclflush(char *arg)
592{ 785{
@@ -605,17 +798,25 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
605 vendor = c->x86_vendor_id; 798 vendor = c->x86_vendor_id;
606 799
607 if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) 800 if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
608 printk("%s ", vendor); 801 printk(KERN_CONT "%s ", vendor);
609 802
610 if (!c->x86_model_id[0]) 803 if (c->x86_model_id[0])
611 printk("%d86", c->x86); 804 printk(KERN_CONT "%s", c->x86_model_id);
612 else 805 else
613 printk("%s", c->x86_model_id); 806 printk(KERN_CONT "%d86", c->x86);
614 807
615 if (c->x86_mask || c->cpuid_level >= 0) 808 if (c->x86_mask || c->cpuid_level >= 0)
616 printk(" stepping %02x\n", c->x86_mask); 809 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
617 else 810 else
618 printk("\n"); 811 printk(KERN_CONT "\n");
812
813#ifdef CONFIG_SMP
814 if (c->cpu_index < show_msr)
815 print_cpu_msr();
816#else
817 if (show_msr)
818 print_cpu_msr();
819#endif
619} 820}
620 821
621static __init int setup_disablecpuid(char *arg) 822static __init int setup_disablecpuid(char *arg)
@@ -631,19 +832,89 @@ __setup("clearcpuid=", setup_disablecpuid);
631 832
632cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; 833cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
633 834
634void __init early_cpu_init(void) 835#ifdef CONFIG_X86_64
836struct x8664_pda **_cpu_pda __read_mostly;
837EXPORT_SYMBOL(_cpu_pda);
838
839struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
840
841char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
842
843void __cpuinit pda_init(int cpu)
844{
845 struct x8664_pda *pda = cpu_pda(cpu);
846
847 /* Setup up data that may be needed in __get_free_pages early */
848 loadsegment(fs, 0);
849 loadsegment(gs, 0);
850 /* Memory clobbers used to order PDA accessed */
851 mb();
852 wrmsrl(MSR_GS_BASE, pda);
853 mb();
854
855 pda->cpunumber = cpu;
856 pda->irqcount = -1;
857 pda->kernelstack = (unsigned long)stack_thread_info() -
858 PDA_STACKOFFSET + THREAD_SIZE;
859 pda->active_mm = &init_mm;
860 pda->mmu_state = 0;
861
862 if (cpu == 0) {
863 /* others are initialized in smpboot.c */
864 pda->pcurrent = &init_task;
865 pda->irqstackptr = boot_cpu_stack;
866 pda->irqstackptr += IRQSTACKSIZE - 64;
867 } else {
868 if (!pda->irqstackptr) {
869 pda->irqstackptr = (char *)
870 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
871 if (!pda->irqstackptr)
872 panic("cannot allocate irqstack for cpu %d",
873 cpu);
874 pda->irqstackptr += IRQSTACKSIZE - 64;
875 }
876
877 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
878 pda->nodenumber = cpu_to_node(cpu);
879 }
880}
881
882char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
883 DEBUG_STKSZ] __page_aligned_bss;
884
885extern asmlinkage void ignore_sysret(void);
886
887/* May not be marked __init: used by software suspend */
888void syscall_init(void)
635{ 889{
636 struct cpu_vendor_dev *cvdev; 890 /*
891 * LSTAR and STAR live in a bit strange symbiosis.
892 * They both write to the same internal register. STAR allows to
893 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
894 */
895 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
896 wrmsrl(MSR_LSTAR, system_call);
897 wrmsrl(MSR_CSTAR, ignore_sysret);
637 898
638 for (cvdev = __x86cpuvendor_start ; 899#ifdef CONFIG_IA32_EMULATION
639 cvdev < __x86cpuvendor_end ; 900 syscall32_cpu_init();
640 cvdev++) 901#endif
641 cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
642 902
643 early_cpu_detect(); 903 /* Flags to clear on syscall */
644 validate_pat_support(&boot_cpu_data); 904 wrmsrl(MSR_SYSCALL_MASK,
905 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
645} 906}
646 907
908unsigned long kernel_eflags;
909
910/*
911 * Copies of the original ist values from the tss are only accessed during
912 * debugging, no special alignment required.
913 */
914DEFINE_PER_CPU(struct orig_ist, orig_ist);
915
916#else
917
647/* Make sure %fs is initialized properly in idle threads */ 918/* Make sure %fs is initialized properly in idle threads */
648struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) 919struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
649{ 920{
@@ -651,25 +922,136 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
651 regs->fs = __KERNEL_PERCPU; 922 regs->fs = __KERNEL_PERCPU;
652 return regs; 923 return regs;
653} 924}
654 925#endif
655/* Current gdt points %fs at the "master" per-cpu area: after this,
656 * it's on the real one. */
657void switch_to_new_gdt(void)
658{
659 struct desc_ptr gdt_descr;
660
661 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
662 gdt_descr.size = GDT_SIZE - 1;
663 load_gdt(&gdt_descr);
664 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
665}
666 926
667/* 927/*
668 * cpu_init() initializes state that is per-CPU. Some data is already 928 * cpu_init() initializes state that is per-CPU. Some data is already
669 * initialized (naturally) in the bootstrap process, such as the GDT 929 * initialized (naturally) in the bootstrap process, such as the GDT
670 * and IDT. We reload them nevertheless, this function acts as a 930 * and IDT. We reload them nevertheless, this function acts as a
671 * 'CPU state barrier', nothing should get across. 931 * 'CPU state barrier', nothing should get across.
932 * A lot of state is already set up in PDA init for 64 bit
672 */ 933 */
934#ifdef CONFIG_X86_64
935void __cpuinit cpu_init(void)
936{
937 int cpu = stack_smp_processor_id();
938 struct tss_struct *t = &per_cpu(init_tss, cpu);
939 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
940 unsigned long v;
941 char *estacks = NULL;
942 struct task_struct *me;
943 int i;
944
945 /* CPU 0 is initialised in head64.c */
946 if (cpu != 0)
947 pda_init(cpu);
948 else
949 estacks = boot_exception_stacks;
950
951 me = current;
952
953 if (cpu_test_and_set(cpu, cpu_initialized))
954 panic("CPU#%d already initialized!\n", cpu);
955
956 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
957
958 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
959
960 /*
961 * Initialize the per-CPU GDT with the boot GDT,
962 * and set up the GDT descriptor:
963 */
964
965 switch_to_new_gdt();
966 load_idt((const struct desc_ptr *)&idt_descr);
967
968 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
969 syscall_init();
970
971 wrmsrl(MSR_FS_BASE, 0);
972 wrmsrl(MSR_KERNEL_GS_BASE, 0);
973 barrier();
974
975 check_efer();
976 if (cpu != 0 && x2apic)
977 enable_x2apic();
978
979 /*
980 * set up and load the per-CPU TSS
981 */
982 if (!orig_ist->ist[0]) {
983 static const unsigned int order[N_EXCEPTION_STACKS] = {
984 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
985 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
986 };
987 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
988 if (cpu) {
989 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
990 if (!estacks)
991 panic("Cannot allocate exception "
992 "stack %ld %d\n", v, cpu);
993 }
994 estacks += PAGE_SIZE << order[v];
995 orig_ist->ist[v] = t->x86_tss.ist[v] =
996 (unsigned long)estacks;
997 }
998 }
999
1000 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
1001 /*
1002 * <= is required because the CPU will access up to
1003 * 8 bits beyond the end of the IO permission bitmap.
1004 */
1005 for (i = 0; i <= IO_BITMAP_LONGS; i++)
1006 t->io_bitmap[i] = ~0UL;
1007
1008 atomic_inc(&init_mm.mm_count);
1009 me->active_mm = &init_mm;
1010 if (me->mm)
1011 BUG();
1012 enter_lazy_tlb(&init_mm, me);
1013
1014 load_sp0(t, &current->thread);
1015 set_tss_desc(cpu, t);
1016 load_TR_desc();
1017 load_LDT(&init_mm.context);
1018
1019#ifdef CONFIG_KGDB
1020 /*
1021 * If the kgdb is connected no debug regs should be altered. This
1022 * is only applicable when KGDB and a KGDB I/O module are built
1023 * into the kernel and you are using early debugging with
1024 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
1025 */
1026 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
1027 arch_kgdb_ops.correct_hw_break();
1028 else {
1029#endif
1030 /*
1031 * Clear all 6 debug registers:
1032 */
1033
1034 set_debugreg(0UL, 0);
1035 set_debugreg(0UL, 1);
1036 set_debugreg(0UL, 2);
1037 set_debugreg(0UL, 3);
1038 set_debugreg(0UL, 6);
1039 set_debugreg(0UL, 7);
1040#ifdef CONFIG_KGDB
1041 /* If the kgdb is connected no debug regs should be altered. */
1042 }
1043#endif
1044
1045 fpu_init();
1046
1047 raw_local_save_flags(kernel_eflags);
1048
1049 if (is_uv_system())
1050 uv_cpu_init();
1051}
1052
1053#else
1054
673void __cpuinit cpu_init(void) 1055void __cpuinit cpu_init(void)
674{ 1056{
675 int cpu = smp_processor_id(); 1057 int cpu = smp_processor_id();
@@ -723,9 +1105,20 @@ void __cpuinit cpu_init(void)
723 /* 1105 /*
724 * Force FPU initialization: 1106 * Force FPU initialization:
725 */ 1107 */
726 current_thread_info()->status = 0; 1108 if (cpu_has_xsave)
1109 current_thread_info()->status = TS_XSAVE;
1110 else
1111 current_thread_info()->status = 0;
727 clear_used_math(); 1112 clear_used_math();
728 mxcsr_feature_mask_init(); 1113 mxcsr_feature_mask_init();
1114
1115 /*
1116 * Boot processor to setup the FP and extended state context info.
1117 */
1118 if (!smp_processor_id())
1119 init_thread_xstate();
1120
1121 xsave_init();
729} 1122}
730 1123
731#ifdef CONFIG_HOTPLUG_CPU 1124#ifdef CONFIG_HOTPLUG_CPU
@@ -739,3 +1132,5 @@ void __cpuinit cpu_uninit(void)
739 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; 1132 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
740} 1133}
741#endif 1134#endif
1135
1136#endif
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
deleted file mode 100644
index 305b465889b0..000000000000
--- a/arch/x86/kernel/cpu/common_64.c
+++ /dev/null
@@ -1,763 +0,0 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/string.h>
5#include <linux/bootmem.h>
6#include <linux/bitops.h>
7#include <linux/module.h>
8#include <linux/kgdb.h>
9#include <linux/topology.h>
10#include <linux/delay.h>
11#include <linux/smp.h>
12#include <linux/percpu.h>
13#include <asm/i387.h>
14#include <asm/msr.h>
15#include <asm/io.h>
16#include <asm/linkage.h>
17#include <asm/mmu_context.h>
18#include <asm/mtrr.h>
19#include <asm/mce.h>
20#include <asm/pat.h>
21#include <asm/asm.h>
22#include <asm/numa.h>
23#ifdef CONFIG_X86_LOCAL_APIC
24#include <asm/mpspec.h>
25#include <asm/apic.h>
26#include <mach_apic.h>
27#endif
28#include <asm/pda.h>
29#include <asm/pgtable.h>
30#include <asm/processor.h>
31#include <asm/desc.h>
32#include <asm/atomic.h>
33#include <asm/proto.h>
34#include <asm/sections.h>
35#include <asm/setup.h>
36#include <asm/genapic.h>
37
38#include "cpu.h"
39
40/* We need valid kernel segments for data and code in long mode too
41 * IRET will check the segment types kkeil 2000/10/28
42 * Also sysret mandates a special GDT layout
43 */
44/* The TLS descriptors are currently at a different place compared to i386.
45 Hopefully nobody expects them at a fixed place (Wine?) */
46DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
47 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
48 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
49 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
50 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
51 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
52 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
53} };
54EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
55
56__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
57
58/* Current gdt points %fs at the "master" per-cpu area: after this,
59 * it's on the real one. */
60void switch_to_new_gdt(void)
61{
62 struct desc_ptr gdt_descr;
63
64 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
65 gdt_descr.size = GDT_SIZE - 1;
66 load_gdt(&gdt_descr);
67}
68
69struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
70
71static void __cpuinit default_init(struct cpuinfo_x86 *c)
72{
73 display_cacheinfo(c);
74}
75
76static struct cpu_dev __cpuinitdata default_cpu = {
77 .c_init = default_init,
78 .c_vendor = "Unknown",
79};
80static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
81
82int __cpuinit get_model_name(struct cpuinfo_x86 *c)
83{
84 unsigned int *v;
85
86 if (c->extended_cpuid_level < 0x80000004)
87 return 0;
88
89 v = (unsigned int *) c->x86_model_id;
90 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
91 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
92 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
93 c->x86_model_id[48] = 0;
94 return 1;
95}
96
97
98void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
99{
100 unsigned int n, dummy, ebx, ecx, edx;
101
102 n = c->extended_cpuid_level;
103
104 if (n >= 0x80000005) {
105 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
106 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
107 "D cache %dK (%d bytes/line)\n",
108 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
109 c->x86_cache_size = (ecx>>24) + (edx>>24);
110 /* On K8 L1 TLB is inclusive, so don't count it */
111 c->x86_tlbsize = 0;
112 }
113
114 if (n >= 0x80000006) {
115 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
116 ecx = cpuid_ecx(0x80000006);
117 c->x86_cache_size = ecx >> 16;
118 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
119
120 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
121 c->x86_cache_size, ecx & 0xFF);
122 }
123}
124
125void __cpuinit detect_ht(struct cpuinfo_x86 *c)
126{
127#ifdef CONFIG_SMP
128 u32 eax, ebx, ecx, edx;
129 int index_msb, core_bits;
130
131 cpuid(1, &eax, &ebx, &ecx, &edx);
132
133
134 if (!cpu_has(c, X86_FEATURE_HT))
135 return;
136 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
137 goto out;
138
139 smp_num_siblings = (ebx & 0xff0000) >> 16;
140
141 if (smp_num_siblings == 1) {
142 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
143 } else if (smp_num_siblings > 1) {
144
145 if (smp_num_siblings > NR_CPUS) {
146 printk(KERN_WARNING "CPU: Unsupported number of "
147 "siblings %d", smp_num_siblings);
148 smp_num_siblings = 1;
149 return;
150 }
151
152 index_msb = get_count_order(smp_num_siblings);
153 c->phys_proc_id = phys_pkg_id(index_msb);
154
155 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
156
157 index_msb = get_count_order(smp_num_siblings);
158
159 core_bits = get_count_order(c->x86_max_cores);
160
161 c->cpu_core_id = phys_pkg_id(index_msb) &
162 ((1 << core_bits) - 1);
163 }
164out:
165 if ((c->x86_max_cores * smp_num_siblings) > 1) {
166 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
167 c->phys_proc_id);
168 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
169 c->cpu_core_id);
170 }
171
172#endif
173}
174
175static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
176{
177 char *v = c->x86_vendor_id;
178 int i;
179 static int printed;
180
181 for (i = 0; i < X86_VENDOR_NUM; i++) {
182 if (cpu_devs[i]) {
183 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
184 (cpu_devs[i]->c_ident[1] &&
185 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
186 c->x86_vendor = i;
187 this_cpu = cpu_devs[i];
188 return;
189 }
190 }
191 }
192 if (!printed) {
193 printed++;
194 printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
195 printk(KERN_ERR "CPU: Your system may be unstable.\n");
196 }
197 c->x86_vendor = X86_VENDOR_UNKNOWN;
198}
199
200static void __init early_cpu_support_print(void)
201{
202 int i,j;
203 struct cpu_dev *cpu_devx;
204
205 printk("KERNEL supported cpus:\n");
206 for (i = 0; i < X86_VENDOR_NUM; i++) {
207 cpu_devx = cpu_devs[i];
208 if (!cpu_devx)
209 continue;
210 for (j = 0; j < 2; j++) {
211 if (!cpu_devx->c_ident[j])
212 continue;
213 printk(" %s %s\n", cpu_devx->c_vendor,
214 cpu_devx->c_ident[j]);
215 }
216 }
217}
218
219/*
220 * The NOPL instruction is supposed to exist on all CPUs with
221 * family >= 6, unfortunately, that's not true in practice because
222 * of early VIA chips and (more importantly) broken virtualizers that
223 * are not easy to detect. Hence, probe for it based on first
224 * principles.
225 *
226 * Note: no 64-bit chip is known to lack these, but put the code here
227 * for consistency with 32 bits, and to make it utterly trivial to
228 * diagnose the problem should it ever surface.
229 */
230static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
231{
232 const u32 nopl_signature = 0x888c53b1; /* Random number */
233 u32 has_nopl = nopl_signature;
234
235 clear_cpu_cap(c, X86_FEATURE_NOPL);
236 if (c->x86 >= 6) {
237 asm volatile("\n"
238 "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
239 "2:\n"
240 " .section .fixup,\"ax\"\n"
241 "3: xor %0,%0\n"
242 " jmp 2b\n"
243 " .previous\n"
244 _ASM_EXTABLE(1b,3b)
245 : "+a" (has_nopl));
246
247 if (has_nopl == nopl_signature)
248 set_cpu_cap(c, X86_FEATURE_NOPL);
249 }
250}
251
252static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
253
254void __init early_cpu_init(void)
255{
256 struct cpu_vendor_dev *cvdev;
257
258 for (cvdev = __x86cpuvendor_start ;
259 cvdev < __x86cpuvendor_end ;
260 cvdev++)
261 cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
262 early_cpu_support_print();
263 early_identify_cpu(&boot_cpu_data);
264}
265
266/* Do some early cpuid on the boot CPU to get some parameter that are
267 needed before check_bugs. Everything advanced is in identify_cpu
268 below. */
269static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
270{
271 u32 tfms, xlvl;
272
273 c->loops_per_jiffy = loops_per_jiffy;
274 c->x86_cache_size = -1;
275 c->x86_vendor = X86_VENDOR_UNKNOWN;
276 c->x86_model = c->x86_mask = 0; /* So far unknown... */
277 c->x86_vendor_id[0] = '\0'; /* Unset */
278 c->x86_model_id[0] = '\0'; /* Unset */
279 c->x86_clflush_size = 64;
280 c->x86_cache_alignment = c->x86_clflush_size;
281 c->x86_max_cores = 1;
282 c->x86_coreid_bits = 0;
283 c->extended_cpuid_level = 0;
284 memset(&c->x86_capability, 0, sizeof c->x86_capability);
285
286 /* Get vendor name */
287 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
288 (unsigned int *)&c->x86_vendor_id[0],
289 (unsigned int *)&c->x86_vendor_id[8],
290 (unsigned int *)&c->x86_vendor_id[4]);
291
292 get_cpu_vendor(c);
293
294 /* Initialize the standard set of capabilities */
295 /* Note that the vendor-specific code below might override */
296
297 /* Intel-defined flags: level 0x00000001 */
298 if (c->cpuid_level >= 0x00000001) {
299 __u32 misc;
300 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
301 &c->x86_capability[0]);
302 c->x86 = (tfms >> 8) & 0xf;
303 c->x86_model = (tfms >> 4) & 0xf;
304 c->x86_mask = tfms & 0xf;
305 if (c->x86 == 0xf)
306 c->x86 += (tfms >> 20) & 0xff;
307 if (c->x86 >= 0x6)
308 c->x86_model += ((tfms >> 16) & 0xF) << 4;
309 if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
310 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
311 } else {
312 /* Have CPUID level 0 only - unheard of */
313 c->x86 = 4;
314 }
315
316 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
317#ifdef CONFIG_SMP
318 c->phys_proc_id = c->initial_apicid;
319#endif
320 /* AMD-defined flags: level 0x80000001 */
321 xlvl = cpuid_eax(0x80000000);
322 c->extended_cpuid_level = xlvl;
323 if ((xlvl & 0xffff0000) == 0x80000000) {
324 if (xlvl >= 0x80000001) {
325 c->x86_capability[1] = cpuid_edx(0x80000001);
326 c->x86_capability[6] = cpuid_ecx(0x80000001);
327 }
328 if (xlvl >= 0x80000004)
329 get_model_name(c); /* Default name */
330 }
331
332 /* Transmeta-defined flags: level 0x80860001 */
333 xlvl = cpuid_eax(0x80860000);
334 if ((xlvl & 0xffff0000) == 0x80860000) {
335 /* Don't set x86_cpuid_level here for now to not confuse. */
336 if (xlvl >= 0x80860001)
337 c->x86_capability[2] = cpuid_edx(0x80860001);
338 }
339
340 if (c->extended_cpuid_level >= 0x80000007)
341 c->x86_power = cpuid_edx(0x80000007);
342
343 if (c->extended_cpuid_level >= 0x80000008) {
344 u32 eax = cpuid_eax(0x80000008);
345
346 c->x86_virt_bits = (eax >> 8) & 0xff;
347 c->x86_phys_bits = eax & 0xff;
348 }
349
350 detect_nopl(c);
351
352 if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
353 cpu_devs[c->x86_vendor]->c_early_init)
354 cpu_devs[c->x86_vendor]->c_early_init(c);
355
356 validate_pat_support(c);
357}
358
359/*
360 * This does the hard work of actually picking apart the CPU stuff...
361 */
362static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
363{
364 int i;
365
366 early_identify_cpu(c);
367
368 init_scattered_cpuid_features(c);
369
370 c->apicid = phys_pkg_id(0);
371
372 /*
373 * Vendor-specific initialization. In this section we
374 * canonicalize the feature flags, meaning if there are
375 * features a certain CPU supports which CPUID doesn't
376 * tell us, CPUID claiming incorrect flags, or other bugs,
377 * we handle them here.
378 *
379 * At the end of this section, c->x86_capability better
380 * indicate the features this CPU genuinely supports!
381 */
382 if (this_cpu->c_init)
383 this_cpu->c_init(c);
384
385 detect_ht(c);
386
387 /*
388 * On SMP, boot_cpu_data holds the common feature set between
389 * all CPUs; so make sure that we indicate which features are
390 * common between the CPUs. The first time this routine gets
391 * executed, c == &boot_cpu_data.
392 */
393 if (c != &boot_cpu_data) {
394 /* AND the already accumulated flags with these */
395 for (i = 0; i < NCAPINTS; i++)
396 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
397 }
398
399 /* Clear all flags overriden by options */
400 for (i = 0; i < NCAPINTS; i++)
401 c->x86_capability[i] &= ~cleared_cpu_caps[i];
402
403#ifdef CONFIG_X86_MCE
404 mcheck_init(c);
405#endif
406 select_idle_routine(c);
407
408#ifdef CONFIG_NUMA
409 numa_add_cpu(smp_processor_id());
410#endif
411
412}
413
414void __cpuinit identify_boot_cpu(void)
415{
416 identify_cpu(&boot_cpu_data);
417}
418
419void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
420{
421 BUG_ON(c == &boot_cpu_data);
422 identify_cpu(c);
423 mtrr_ap_init();
424}
425
426static __init int setup_noclflush(char *arg)
427{
428 setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
429 return 1;
430}
431__setup("noclflush", setup_noclflush);
432
433struct msr_range {
434 unsigned min;
435 unsigned max;
436};
437
438static struct msr_range msr_range_array[] __cpuinitdata = {
439 { 0x00000000, 0x00000418},
440 { 0xc0000000, 0xc000040b},
441 { 0xc0010000, 0xc0010142},
442 { 0xc0011000, 0xc001103b},
443};
444
445static void __cpuinit print_cpu_msr(void)
446{
447 unsigned index;
448 u64 val;
449 int i;
450 unsigned index_min, index_max;
451
452 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
453 index_min = msr_range_array[i].min;
454 index_max = msr_range_array[i].max;
455 for (index = index_min; index < index_max; index++) {
456 if (rdmsrl_amd_safe(index, &val))
457 continue;
458 printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
459 }
460 }
461}
462
463static int show_msr __cpuinitdata;
464static __init int setup_show_msr(char *arg)
465{
466 int num;
467
468 get_option(&arg, &num);
469
470 if (num > 0)
471 show_msr = num;
472 return 1;
473}
474__setup("show_msr=", setup_show_msr);
475
476void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
477{
478 if (c->x86_model_id[0])
479 printk(KERN_CONT "%s", c->x86_model_id);
480
481 if (c->x86_mask || c->cpuid_level >= 0)
482 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
483 else
484 printk(KERN_CONT "\n");
485
486#ifdef CONFIG_SMP
487 if (c->cpu_index < show_msr)
488 print_cpu_msr();
489#else
490 if (show_msr)
491 print_cpu_msr();
492#endif
493}
494
495static __init int setup_disablecpuid(char *arg)
496{
497 int bit;
498 if (get_option(&arg, &bit) && bit < NCAPINTS*32)
499 setup_clear_cpu_cap(bit);
500 else
501 return 0;
502 return 1;
503}
504__setup("clearcpuid=", setup_disablecpuid);
505
506cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
507
508struct x8664_pda **_cpu_pda __read_mostly;
509EXPORT_SYMBOL(_cpu_pda);
510
511struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
512
513char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
514
515unsigned long __supported_pte_mask __read_mostly = ~0UL;
516EXPORT_SYMBOL_GPL(__supported_pte_mask);
517
518static int do_not_nx __cpuinitdata;
519
520/* noexec=on|off
521Control non executable mappings for 64bit processes.
522
523on Enable(default)
524off Disable
525*/
526static int __init nonx_setup(char *str)
527{
528 if (!str)
529 return -EINVAL;
530 if (!strncmp(str, "on", 2)) {
531 __supported_pte_mask |= _PAGE_NX;
532 do_not_nx = 0;
533 } else if (!strncmp(str, "off", 3)) {
534 do_not_nx = 1;
535 __supported_pte_mask &= ~_PAGE_NX;
536 }
537 return 0;
538}
539early_param("noexec", nonx_setup);
540
541int force_personality32;
542
543/* noexec32=on|off
544Control non executable heap for 32bit processes.
545To control the stack too use noexec=off
546
547on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
548off PROT_READ implies PROT_EXEC
549*/
550static int __init nonx32_setup(char *str)
551{
552 if (!strcmp(str, "on"))
553 force_personality32 &= ~READ_IMPLIES_EXEC;
554 else if (!strcmp(str, "off"))
555 force_personality32 |= READ_IMPLIES_EXEC;
556 return 1;
557}
558__setup("noexec32=", nonx32_setup);
559
560void pda_init(int cpu)
561{
562 struct x8664_pda *pda = cpu_pda(cpu);
563
564 /* Setup up data that may be needed in __get_free_pages early */
565 loadsegment(fs, 0);
566 loadsegment(gs, 0);
567 /* Memory clobbers used to order PDA accessed */
568 mb();
569 wrmsrl(MSR_GS_BASE, pda);
570 mb();
571
572 pda->cpunumber = cpu;
573 pda->irqcount = -1;
574 pda->kernelstack = (unsigned long)stack_thread_info() -
575 PDA_STACKOFFSET + THREAD_SIZE;
576 pda->active_mm = &init_mm;
577 pda->mmu_state = 0;
578
579 if (cpu == 0) {
580 /* others are initialized in smpboot.c */
581 pda->pcurrent = &init_task;
582 pda->irqstackptr = boot_cpu_stack;
583 pda->irqstackptr += IRQSTACKSIZE - 64;
584 } else {
585 if (!pda->irqstackptr) {
586 pda->irqstackptr = (char *)
587 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
588 if (!pda->irqstackptr)
589 panic("cannot allocate irqstack for cpu %d",
590 cpu);
591 pda->irqstackptr += IRQSTACKSIZE - 64;
592 }
593
594 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
595 pda->nodenumber = cpu_to_node(cpu);
596 }
597}
598
599char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
600 DEBUG_STKSZ] __page_aligned_bss;
601
602extern asmlinkage void ignore_sysret(void);
603
604/* May not be marked __init: used by software suspend */
605void syscall_init(void)
606{
607 /*
608 * LSTAR and STAR live in a bit strange symbiosis.
609 * They both write to the same internal register. STAR allows to
610 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
611 */
612 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
613 wrmsrl(MSR_LSTAR, system_call);
614 wrmsrl(MSR_CSTAR, ignore_sysret);
615
616#ifdef CONFIG_IA32_EMULATION
617 syscall32_cpu_init();
618#endif
619
620 /* Flags to clear on syscall */
621 wrmsrl(MSR_SYSCALL_MASK,
622 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
623}
624
625void __cpuinit check_efer(void)
626{
627 unsigned long efer;
628
629 rdmsrl(MSR_EFER, efer);
630 if (!(efer & EFER_NX) || do_not_nx)
631 __supported_pte_mask &= ~_PAGE_NX;
632}
633
634unsigned long kernel_eflags;
635
636/*
637 * Copies of the original ist values from the tss are only accessed during
638 * debugging, no special alignment required.
639 */
640DEFINE_PER_CPU(struct orig_ist, orig_ist);
641
642/*
643 * cpu_init() initializes state that is per-CPU. Some data is already
644 * initialized (naturally) in the bootstrap process, such as the GDT
645 * and IDT. We reload them nevertheless, this function acts as a
646 * 'CPU state barrier', nothing should get across.
647 * A lot of state is already set up in PDA init.
648 */
649void __cpuinit cpu_init(void)
650{
651 int cpu = stack_smp_processor_id();
652 struct tss_struct *t = &per_cpu(init_tss, cpu);
653 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
654 unsigned long v;
655 char *estacks = NULL;
656 struct task_struct *me;
657 int i;
658
659 /* CPU 0 is initialised in head64.c */
660 if (cpu != 0)
661 pda_init(cpu);
662 else
663 estacks = boot_exception_stacks;
664
665 me = current;
666
667 if (cpu_test_and_set(cpu, cpu_initialized))
668 panic("CPU#%d already initialized!\n", cpu);
669
670 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
671
672 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
673
674 /*
675 * Initialize the per-CPU GDT with the boot GDT,
676 * and set up the GDT descriptor:
677 */
678
679 switch_to_new_gdt();
680 load_idt((const struct desc_ptr *)&idt_descr);
681
682 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
683 syscall_init();
684
685 wrmsrl(MSR_FS_BASE, 0);
686 wrmsrl(MSR_KERNEL_GS_BASE, 0);
687 barrier();
688
689 check_efer();
690
691 /*
692 * set up and load the per-CPU TSS
693 */
694 if (!orig_ist->ist[0]) {
695 static const unsigned int order[N_EXCEPTION_STACKS] = {
696 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
697 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
698 };
699 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
700 if (cpu) {
701 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
702 if (!estacks)
703 panic("Cannot allocate exception "
704 "stack %ld %d\n", v, cpu);
705 }
706 estacks += PAGE_SIZE << order[v];
707 orig_ist->ist[v] = t->x86_tss.ist[v] =
708 (unsigned long)estacks;
709 }
710 }
711
712 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
713 /*
714 * <= is required because the CPU will access up to
715 * 8 bits beyond the end of the IO permission bitmap.
716 */
717 for (i = 0; i <= IO_BITMAP_LONGS; i++)
718 t->io_bitmap[i] = ~0UL;
719
720 atomic_inc(&init_mm.mm_count);
721 me->active_mm = &init_mm;
722 if (me->mm)
723 BUG();
724 enter_lazy_tlb(&init_mm, me);
725
726 load_sp0(t, &current->thread);
727 set_tss_desc(cpu, t);
728 load_TR_desc();
729 load_LDT(&init_mm.context);
730
731#ifdef CONFIG_KGDB
732 /*
733 * If the kgdb is connected no debug regs should be altered. This
734 * is only applicable when KGDB and a KGDB I/O module are built
735 * into the kernel and you are using early debugging with
736 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
737 */
738 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
739 arch_kgdb_ops.correct_hw_break();
740 else {
741#endif
742 /*
743 * Clear all 6 debug registers:
744 */
745
746 set_debugreg(0UL, 0);
747 set_debugreg(0UL, 1);
748 set_debugreg(0UL, 2);
749 set_debugreg(0UL, 3);
750 set_debugreg(0UL, 6);
751 set_debugreg(0UL, 7);
752#ifdef CONFIG_KGDB
753 /* If the kgdb is connected no debug regs should be altered. */
754 }
755#endif
756
757 fpu_init();
758
759 raw_local_save_flags(kernel_eflags);
760
761 if (is_uv_system())
762 uv_cpu_init();
763}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 4d894e8565fe..de4094a39210 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -21,23 +21,16 @@ struct cpu_dev {
21 void (*c_init)(struct cpuinfo_x86 * c); 21 void (*c_init)(struct cpuinfo_x86 * c);
22 void (*c_identify)(struct cpuinfo_x86 * c); 22 void (*c_identify)(struct cpuinfo_x86 * c);
23 unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); 23 unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
24 int c_x86_vendor;
24}; 25};
25 26
26extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; 27#define cpu_dev_register(cpu_devX) \
28 static struct cpu_dev *__cpu_dev_##cpu_devX __used \
29 __attribute__((__section__(".x86_cpu_dev.init"))) = \
30 &cpu_devX;
27 31
28struct cpu_vendor_dev { 32extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[];
29 int vendor;
30 struct cpu_dev *cpu_dev;
31};
32
33#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \
34 static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \
35 __attribute__((__section__(".x86cpuvendor.init"))) = \
36 { cpu_vendor_id, cpu_dev }
37
38extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
39 33
40extern int get_model_name(struct cpuinfo_x86 *c);
41extern void display_cacheinfo(struct cpuinfo_x86 *c); 34extern void display_cacheinfo(struct cpuinfo_x86 *c);
42 35
43#endif 36#endif
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 898a5a2002ed..ffd0f5ed071a 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -121,7 +121,7 @@ static void __cpuinit set_cx86_reorder(void)
121 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 121 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
122 122
123 /* Load/Store Serialize to mem access disable (=reorder it) */ 123 /* Load/Store Serialize to mem access disable (=reorder it) */
124 setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); 124 setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80);
125 /* set load/store serialize from 1GB to 4GB */ 125 /* set load/store serialize from 1GB to 4GB */
126 ccr3 |= 0xe0; 126 ccr3 |= 0xe0;
127 setCx86(CX86_CCR3, ccr3); 127 setCx86(CX86_CCR3, ccr3);
@@ -132,11 +132,11 @@ static void __cpuinit set_cx86_memwb(void)
132 printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); 132 printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
133 133
134 /* CCR2 bit 2: unlock NW bit */ 134 /* CCR2 bit 2: unlock NW bit */
135 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); 135 setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
136 /* set 'Not Write-through' */ 136 /* set 'Not Write-through' */
137 write_cr0(read_cr0() | X86_CR0_NW); 137 write_cr0(read_cr0() | X86_CR0_NW);
138 /* CCR2 bit 2: lock NW bit and set WT1 */ 138 /* CCR2 bit 2: lock NW bit and set WT1 */
139 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); 139 setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14);
140} 140}
141 141
142/* 142/*
@@ -150,14 +150,14 @@ static void __cpuinit geode_configure(void)
150 local_irq_save(flags); 150 local_irq_save(flags);
151 151
152 /* Suspend on halt power saving and enable #SUSP pin */ 152 /* Suspend on halt power saving and enable #SUSP pin */
153 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); 153 setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88);
154 154
155 ccr3 = getCx86(CX86_CCR3); 155 ccr3 = getCx86(CX86_CCR3);
156 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 156 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
157 157
158 158
159 /* FPU fast, DTE cache, Mem bypass */ 159 /* FPU fast, DTE cache, Mem bypass */
160 setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); 160 setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38);
161 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ 161 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
162 162
163 set_cx86_memwb(); 163 set_cx86_memwb();
@@ -291,7 +291,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
291 /* GXm supports extended cpuid levels 'ala' AMD */ 291 /* GXm supports extended cpuid levels 'ala' AMD */
292 if (c->cpuid_level == 2) { 292 if (c->cpuid_level == 2) {
293 /* Enable cxMMX extensions (GX1 Datasheet 54) */ 293 /* Enable cxMMX extensions (GX1 Datasheet 54) */
294 setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); 294 setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1);
295 295
296 /* 296 /*
297 * GXm : 0x30 ... 0x5f GXm datasheet 51 297 * GXm : 0x30 ... 0x5f GXm datasheet 51
@@ -301,7 +301,6 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
301 */ 301 */
302 if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) 302 if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f))
303 geode_configure(); 303 geode_configure();
304 get_model_name(c); /* get CPU marketing name */
305 return; 304 return;
306 } else { /* MediaGX */ 305 } else { /* MediaGX */
307 Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; 306 Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
@@ -314,7 +313,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
314 if (dir1 > 7) { 313 if (dir1 > 7) {
315 dir0_msn++; /* M II */ 314 dir0_msn++; /* M II */
316 /* Enable MMX extensions (App note 108) */ 315 /* Enable MMX extensions (App note 108) */
317 setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); 316 setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1);
318 } else { 317 } else {
319 c->coma_bug = 1; /* 6x86MX, it has the bug. */ 318 c->coma_bug = 1; /* 6x86MX, it has the bug. */
320 } 319 }
@@ -429,7 +428,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
429 local_irq_save(flags); 428 local_irq_save(flags);
430 ccr3 = getCx86(CX86_CCR3); 429 ccr3 = getCx86(CX86_CCR3);
431 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 430 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
432 setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ 431 setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */
433 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ 432 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
434 local_irq_restore(flags); 433 local_irq_restore(flags);
435 } 434 }
@@ -442,14 +441,16 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
442 .c_early_init = early_init_cyrix, 441 .c_early_init = early_init_cyrix,
443 .c_init = init_cyrix, 442 .c_init = init_cyrix,
444 .c_identify = cyrix_identify, 443 .c_identify = cyrix_identify,
444 .c_x86_vendor = X86_VENDOR_CYRIX,
445}; 445};
446 446
447cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); 447cpu_dev_register(cyrix_cpu_dev);
448 448
449static struct cpu_dev nsc_cpu_dev __cpuinitdata = { 449static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
450 .c_vendor = "NSC", 450 .c_vendor = "NSC",
451 .c_ident = { "Geode by NSC" }, 451 .c_ident = { "Geode by NSC" },
452 .c_init = init_nsc, 452 .c_init = init_nsc,
453 .c_x86_vendor = X86_VENDOR_NSC,
453}; 454};
454 455
455cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev); 456cpu_dev_register(nsc_cpu_dev);
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
deleted file mode 100644
index c9017799497c..000000000000
--- a/arch/x86/kernel/cpu/feature_names.c
+++ /dev/null
@@ -1,84 +0,0 @@
1/*
2 * Strings for the various x86 capability flags.
3 *
4 * This file must not contain any executable code.
5 */
6
7#include <asm/cpufeature.h>
8
9/*
10 * These flag bits must match the definitions in <asm/cpufeature.h>.
11 * NULL means this bit is undefined or reserved; either way it doesn't
12 * have meaning as far as Linux is concerned. Note that it's important
13 * to realize there is a difference between this table and CPUID -- if
14 * applications want to get the raw CPUID data, they should access
15 * /dev/cpu/<cpu_nr>/cpuid instead.
16 */
17const char * const x86_cap_flags[NCAPINTS*32] = {
18 /* Intel-defined */
19 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
20 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
21 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
22 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
23
24 /* AMD-defined */
25 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
26 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
27 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
28 NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
29 "3dnowext", "3dnow",
30
31 /* Transmeta-defined */
32 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
33 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
34 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
35 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
36
37 /* Other (Linux-defined) */
38 "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
39 NULL, NULL, NULL, NULL,
40 "constant_tsc", "up", NULL, "arch_perfmon",
41 "pebs", "bts", NULL, NULL,
42 "rep_good", NULL, NULL, NULL,
43 "nopl", NULL, NULL, NULL,
44 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
45
46 /* Intel-defined (#2) */
47 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
48 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
49 NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
50 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
51
52 /* VIA/Cyrix/Centaur-defined */
53 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
54 "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
55 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
56 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
57
58 /* AMD-defined (#2) */
59 "lahf_lm", "cmp_legacy", "svm", "extapic",
60 "cr8_legacy", "abm", "sse4a", "misalignsse",
61 "3dnowprefetch", "osvw", "ibs", "sse5",
62 "skinit", "wdt", NULL, NULL,
63 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
64 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
65
66 /* Auxiliary (Linux-defined) */
67 "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
68 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
69 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
70 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
71};
72
73const char *const x86_power_flags[32] = {
74 "ts", /* temperature sensor */
75 "fid", /* frequency id control */
76 "vid", /* voltage id control */
77 "ttp", /* thermal trip */
78 "tm",
79 "stc",
80 "100mhzsteps",
81 "hwpstate",
82 "", /* tsc invariant mapped to constant_tsc */
83 /* nothing */
84};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f113ef4595f6..99468dbd08da 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -15,6 +15,11 @@
15#include <asm/ds.h> 15#include <asm/ds.h>
16#include <asm/bugs.h> 16#include <asm/bugs.h>
17 17
18#ifdef CONFIG_X86_64
19#include <asm/topology.h>
20#include <asm/numa_64.h>
21#endif
22
18#include "cpu.h" 23#include "cpu.h"
19 24
20#ifdef CONFIG_X86_LOCAL_APIC 25#ifdef CONFIG_X86_LOCAL_APIC
@@ -23,23 +28,22 @@
23#include <mach_apic.h> 28#include <mach_apic.h>
24#endif 29#endif
25 30
26#ifdef CONFIG_X86_INTEL_USERCOPY
27/*
28 * Alignment at which movsl is preferred for bulk memory copies.
29 */
30struct movsl_mask movsl_mask __read_mostly;
31#endif
32
33static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 31static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
34{ 32{
35 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
36 if (c->x86 == 15 && c->x86_cache_alignment == 64)
37 c->x86_cache_alignment = 128;
38 if ((c->x86 == 0xf && c->x86_model >= 0x03) || 33 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
39 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 34 (c->x86 == 0x6 && c->x86_model >= 0x0e))
40 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 35 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
36
37#ifdef CONFIG_X86_64
38 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
39#else
40 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
41 if (c->x86 == 15 && c->x86_cache_alignment == 64)
42 c->x86_cache_alignment = 128;
43#endif
41} 44}
42 45
46#ifdef CONFIG_X86_32
43/* 47/*
44 * Early probe support logic for ppro memory erratum #50 48 * Early probe support logic for ppro memory erratum #50
45 * 49 *
@@ -59,15 +63,54 @@ int __cpuinit ppro_with_ram_bug(void)
59 return 0; 63 return 0;
60} 64}
61 65
66#ifdef CONFIG_X86_F00F_BUG
67static void __cpuinit trap_init_f00f_bug(void)
68{
69 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
62 70
63/* 71 /*
64 * P4 Xeon errata 037 workaround. 72 * Update the IDT descriptor and reload the IDT so that
65 * Hardware prefetcher may cause stale data to be loaded into the cache. 73 * it uses the read-only mapped virtual address.
66 */ 74 */
67static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) 75 idt_descr.address = fix_to_virt(FIX_F00F_IDT);
76 load_idt(&idt_descr);
77}
78#endif
79
80static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
68{ 81{
69 unsigned long lo, hi; 82 unsigned long lo, hi;
70 83
84#ifdef CONFIG_X86_F00F_BUG
85 /*
86 * All current models of Pentium and Pentium with MMX technology CPUs
87 * have the F0 0F bug, which lets nonprivileged users lock up the system.
88 * Note that the workaround only should be initialized once...
89 */
90 c->f00f_bug = 0;
91 if (!paravirt_enabled() && c->x86 == 5) {
92 static int f00f_workaround_enabled;
93
94 c->f00f_bug = 1;
95 if (!f00f_workaround_enabled) {
96 trap_init_f00f_bug();
97 printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
98 f00f_workaround_enabled = 1;
99 }
100 }
101#endif
102
103 /*
104 * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
105 * model 3 mask 3
106 */
107 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
108 clear_cpu_cap(c, X86_FEATURE_SEP);
109
110 /*
111 * P4 Xeon errata 037 workaround.
112 * Hardware prefetcher may cause stale data to be loaded into the cache.
113 */
71 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { 114 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
72 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); 115 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
73 if ((lo & (1<<9)) == 0) { 116 if ((lo & (1<<9)) == 0) {
@@ -77,13 +120,68 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
77 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); 120 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
78 } 121 }
79 } 122 }
123
124 /*
125 * See if we have a good local APIC by checking for buggy Pentia,
126 * i.e. all B steppings and the C2 stepping of P54C when using their
127 * integrated APIC (see 11AP erratum in "Pentium Processor
128 * Specification Update").
129 */
130 if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
131 (c->x86_mask < 0x6 || c->x86_mask == 0xb))
132 set_cpu_cap(c, X86_FEATURE_11AP);
133
134
135#ifdef CONFIG_X86_INTEL_USERCOPY
136 /*
137 * Set up the preferred alignment for movsl bulk memory moves
138 */
139 switch (c->x86) {
140 case 4: /* 486: untested */
141 break;
142 case 5: /* Old Pentia: untested */
143 break;
144 case 6: /* PII/PIII only like movsl with 8-byte alignment */
145 movsl_mask.mask = 7;
146 break;
147 case 15: /* P4 is OK down to 8-byte alignment */
148 movsl_mask.mask = 7;
149 break;
150 }
151#endif
152
153#ifdef CONFIG_X86_NUMAQ
154 numaq_tsc_disable();
155#endif
80} 156}
157#else
158static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
159{
160}
161#endif
81 162
163static void __cpuinit srat_detect_node(void)
164{
165#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
166 unsigned node;
167 int cpu = smp_processor_id();
168 int apicid = hard_smp_processor_id();
169
170 /* Don't do the funky fallback heuristics the AMD version employs
171 for now. */
172 node = apicid_to_node[apicid];
173 if (node == NUMA_NO_NODE || !node_online(node))
174 node = first_node(node_online_map);
175 numa_set_node(cpu, node);
176
177 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
178#endif
179}
82 180
83/* 181/*
84 * find out the number of processor cores on the die 182 * find out the number of processor cores on the die
85 */ 183 */
86static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) 184static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
87{ 185{
88 unsigned int eax, ebx, ecx, edx; 186 unsigned int eax, ebx, ecx, edx;
89 187
@@ -98,45 +196,51 @@ static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c)
98 return 1; 196 return 1;
99} 197}
100 198
101#ifdef CONFIG_X86_F00F_BUG 199static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c)
102static void __cpuinit trap_init_f00f_bug(void)
103{ 200{
104 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); 201 /* Intel VMX MSR indicated features */
105 202#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
106 /* 203#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
107 * Update the IDT descriptor and reload the IDT so that 204#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
108 * it uses the read-only mapped virtual address. 205#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
109 */ 206#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
110 idt_descr.address = fix_to_virt(FIX_F00F_IDT); 207#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
111 load_idt(&idt_descr); 208
209 u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
210
211 clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
212 clear_cpu_cap(c, X86_FEATURE_VNMI);
213 clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
214 clear_cpu_cap(c, X86_FEATURE_EPT);
215 clear_cpu_cap(c, X86_FEATURE_VPID);
216
217 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
218 msr_ctl = vmx_msr_high | vmx_msr_low;
219 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
220 set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
221 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
222 set_cpu_cap(c, X86_FEATURE_VNMI);
223 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
224 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
225 vmx_msr_low, vmx_msr_high);
226 msr_ctl2 = vmx_msr_high | vmx_msr_low;
227 if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
228 (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
229 set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
230 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
231 set_cpu_cap(c, X86_FEATURE_EPT);
232 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
233 set_cpu_cap(c, X86_FEATURE_VPID);
234 }
112} 235}
113#endif
114 236
115static void __cpuinit init_intel(struct cpuinfo_x86 *c) 237static void __cpuinit init_intel(struct cpuinfo_x86 *c)
116{ 238{
117 unsigned int l2 = 0; 239 unsigned int l2 = 0;
118 char *p = NULL;
119 240
120 early_init_intel(c); 241 early_init_intel(c);
121 242
122#ifdef CONFIG_X86_F00F_BUG 243 intel_workarounds(c);
123 /*
124 * All current models of Pentium and Pentium with MMX technology CPUs
125 * have the F0 0F bug, which lets nonprivileged users lock up the system.
126 * Note that the workaround only should be initialized once...
127 */
128 c->f00f_bug = 0;
129 if (!paravirt_enabled() && c->x86 == 5) {
130 static int f00f_workaround_enabled;
131
132 c->f00f_bug = 1;
133 if (!f00f_workaround_enabled) {
134 trap_init_f00f_bug();
135 printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
136 f00f_workaround_enabled = 1;
137 }
138 }
139#endif
140 244
141 l2 = init_intel_cacheinfo(c); 245 l2 = init_intel_cacheinfo(c);
142 if (c->cpuid_level > 9) { 246 if (c->cpuid_level > 9) {
@@ -146,16 +250,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
146 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); 250 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
147 } 251 }
148 252
149 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ 253 if (cpu_has_xmm2)
150 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) 254 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
151 clear_cpu_cap(c, X86_FEATURE_SEP); 255 if (cpu_has_ds) {
256 unsigned int l1;
257 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
258 if (!(l1 & (1<<11)))
259 set_cpu_cap(c, X86_FEATURE_BTS);
260 if (!(l1 & (1<<12)))
261 set_cpu_cap(c, X86_FEATURE_PEBS);
262 ds_init_intel(c);
263 }
152 264
265#ifdef CONFIG_X86_64
266 if (c->x86 == 15)
267 c->x86_cache_alignment = c->x86_clflush_size * 2;
268 if (c->x86 == 6)
269 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
270#else
153 /* 271 /*
154 * Names for the Pentium II/Celeron processors 272 * Names for the Pentium II/Celeron processors
155 * detectable only by also checking the cache size. 273 * detectable only by also checking the cache size.
156 * Dixon is NOT a Celeron. 274 * Dixon is NOT a Celeron.
157 */ 275 */
158 if (c->x86 == 6) { 276 if (c->x86 == 6) {
277 char *p = NULL;
278
159 switch (c->x86_model) { 279 switch (c->x86_model) {
160 case 5: 280 case 5:
161 if (c->x86_mask == 0) { 281 if (c->x86_mask == 0) {
@@ -178,71 +298,41 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
178 p = "Celeron (Coppermine)"; 298 p = "Celeron (Coppermine)";
179 break; 299 break;
180 } 300 }
181 }
182
183 if (p)
184 strcpy(c->x86_model_id, p);
185
186 c->x86_max_cores = num_cpu_cores(c);
187
188 detect_ht(c);
189 301
190 /* Work around errata */ 302 if (p)
191 Intel_errata_workarounds(c); 303 strcpy(c->x86_model_id, p);
192
193#ifdef CONFIG_X86_INTEL_USERCOPY
194 /*
195 * Set up the preferred alignment for movsl bulk memory moves
196 */
197 switch (c->x86) {
198 case 4: /* 486: untested */
199 break;
200 case 5: /* Old Pentia: untested */
201 break;
202 case 6: /* PII/PIII only like movsl with 8-byte alignment */
203 movsl_mask.mask = 7;
204 break;
205 case 15: /* P4 is OK down to 8-byte alignment */
206 movsl_mask.mask = 7;
207 break;
208 } 304 }
209#endif
210 305
211 if (cpu_has_xmm2) 306 if (c->x86 == 15)
212 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
213 if (c->x86 == 15) {
214 set_cpu_cap(c, X86_FEATURE_P4); 307 set_cpu_cap(c, X86_FEATURE_P4);
215 }
216 if (c->x86 == 6) 308 if (c->x86 == 6)
217 set_cpu_cap(c, X86_FEATURE_P3); 309 set_cpu_cap(c, X86_FEATURE_P3);
218 if (cpu_has_ds) {
219 unsigned int l1;
220 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
221 if (!(l1 & (1<<11)))
222 set_cpu_cap(c, X86_FEATURE_BTS);
223 if (!(l1 & (1<<12)))
224 set_cpu_cap(c, X86_FEATURE_PEBS);
225 ds_init_intel(c);
226 }
227 310
228 if (cpu_has_bts) 311 if (cpu_has_bts)
229 ptrace_bts_init_intel(c); 312 ptrace_bts_init_intel(c);
230 313
231 /* 314#endif
232 * See if we have a good local APIC by checking for buggy Pentia,
233 * i.e. all B steppings and the C2 stepping of P54C when using their
234 * integrated APIC (see 11AP erratum in "Pentium Processor
235 * Specification Update").
236 */
237 if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
238 (c->x86_mask < 0x6 || c->x86_mask == 0xb))
239 set_cpu_cap(c, X86_FEATURE_11AP);
240 315
241#ifdef CONFIG_X86_NUMAQ 316 detect_extended_topology(c);
242 numaq_tsc_disable(); 317 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
318 /*
319 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
320 * detection.
321 */
322 c->x86_max_cores = intel_num_cpu_cores(c);
323#ifdef CONFIG_X86_32
324 detect_ht(c);
243#endif 325#endif
326 }
327
328 /* Work around errata */
329 srat_detect_node();
330
331 if (cpu_has(c, X86_FEATURE_VMX))
332 detect_vmx_virtcap(c);
244} 333}
245 334
335#ifdef CONFIG_X86_32
246static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) 336static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
247{ 337{
248 /* 338 /*
@@ -255,10 +345,12 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
255 size = 256; 345 size = 256;
256 return size; 346 return size;
257} 347}
348#endif
258 349
259static struct cpu_dev intel_cpu_dev __cpuinitdata = { 350static struct cpu_dev intel_cpu_dev __cpuinitdata = {
260 .c_vendor = "Intel", 351 .c_vendor = "Intel",
261 .c_ident = { "GenuineIntel" }, 352 .c_ident = { "GenuineIntel" },
353#ifdef CONFIG_X86_32
262 .c_models = { 354 .c_models = {
263 { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = 355 { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names =
264 { 356 {
@@ -308,76 +400,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
308 } 400 }
309 }, 401 },
310 }, 402 },
403 .c_size_cache = intel_size_cache,
404#endif
311 .c_early_init = early_init_intel, 405 .c_early_init = early_init_intel,
312 .c_init = init_intel, 406 .c_init = init_intel,
313 .c_size_cache = intel_size_cache, 407 .c_x86_vendor = X86_VENDOR_INTEL,
314}; 408};
315 409
316cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); 410cpu_dev_register(intel_cpu_dev);
317
318#ifndef CONFIG_X86_CMPXCHG
319unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
320{
321 u8 prev;
322 unsigned long flags;
323
324 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
325 local_irq_save(flags);
326 prev = *(u8 *)ptr;
327 if (prev == old)
328 *(u8 *)ptr = new;
329 local_irq_restore(flags);
330 return prev;
331}
332EXPORT_SYMBOL(cmpxchg_386_u8);
333
334unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
335{
336 u16 prev;
337 unsigned long flags;
338
339 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
340 local_irq_save(flags);
341 prev = *(u16 *)ptr;
342 if (prev == old)
343 *(u16 *)ptr = new;
344 local_irq_restore(flags);
345 return prev;
346}
347EXPORT_SYMBOL(cmpxchg_386_u16);
348
349unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
350{
351 u32 prev;
352 unsigned long flags;
353
354 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
355 local_irq_save(flags);
356 prev = *(u32 *)ptr;
357 if (prev == old)
358 *(u32 *)ptr = new;
359 local_irq_restore(flags);
360 return prev;
361}
362EXPORT_SYMBOL(cmpxchg_386_u32);
363#endif
364
365#ifndef CONFIG_X86_CMPXCHG64
366unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
367{
368 u64 prev;
369 unsigned long flags;
370
371 /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
372 local_irq_save(flags);
373 prev = *(u64 *)ptr;
374 if (prev == old)
375 *(u64 *)ptr = new;
376 local_irq_restore(flags);
377 return prev;
378}
379EXPORT_SYMBOL(cmpxchg_486_u64);
380#endif
381
382/* arch_initcall(intel_cpu_init); */
383 411
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
deleted file mode 100644
index 1019c58d39f0..000000000000
--- a/arch/x86/kernel/cpu/intel_64.c
+++ /dev/null
@@ -1,95 +0,0 @@
1#include <linux/init.h>
2#include <linux/smp.h>
3#include <asm/processor.h>
4#include <asm/ptrace.h>
5#include <asm/topology.h>
6#include <asm/numa_64.h>
7
8#include "cpu.h"
9
10static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
11{
12 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
13 (c->x86 == 0x6 && c->x86_model >= 0x0e))
14 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
15
16 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
17}
18
19/*
20 * find out the number of processor cores on the die
21 */
22static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
23{
24 unsigned int eax, t;
25
26 if (c->cpuid_level < 4)
27 return 1;
28
29 cpuid_count(4, 0, &eax, &t, &t, &t);
30
31 if (eax & 0x1f)
32 return ((eax >> 26) + 1);
33 else
34 return 1;
35}
36
37static void __cpuinit srat_detect_node(void)
38{
39#ifdef CONFIG_NUMA
40 unsigned node;
41 int cpu = smp_processor_id();
42 int apicid = hard_smp_processor_id();
43
44 /* Don't do the funky fallback heuristics the AMD version employs
45 for now. */
46 node = apicid_to_node[apicid];
47 if (node == NUMA_NO_NODE || !node_online(node))
48 node = first_node(node_online_map);
49 numa_set_node(cpu, node);
50
51 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
52#endif
53}
54
55static void __cpuinit init_intel(struct cpuinfo_x86 *c)
56{
57 init_intel_cacheinfo(c);
58 if (c->cpuid_level > 9) {
59 unsigned eax = cpuid_eax(10);
60 /* Check for version and the number of counters */
61 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
62 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
63 }
64
65 if (cpu_has_ds) {
66 unsigned int l1, l2;
67 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
68 if (!(l1 & (1<<11)))
69 set_cpu_cap(c, X86_FEATURE_BTS);
70 if (!(l1 & (1<<12)))
71 set_cpu_cap(c, X86_FEATURE_PEBS);
72 }
73
74
75 if (cpu_has_bts)
76 ds_init_intel(c);
77
78 if (c->x86 == 15)
79 c->x86_cache_alignment = c->x86_clflush_size * 2;
80 if (c->x86 == 6)
81 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
82 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
83 c->x86_max_cores = intel_num_cpu_cores(c);
84
85 srat_detect_node();
86}
87
88static struct cpu_dev intel_cpu_dev __cpuinitdata = {
89 .c_vendor = "Intel",
90 .c_ident = { "GenuineIntel" },
91 .c_early_init = early_init_intel,
92 .c_init = init_intel,
93};
94cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
95
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6b0a10b002f1..3f46afbb1cf1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1,8 +1,8 @@
1/* 1/*
2 * Routines to indentify caches on Intel CPU. 2 * Routines to indentify caches on Intel CPU.
3 * 3 *
4 * Changes: 4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. 7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */ 8 */
@@ -13,6 +13,7 @@
13#include <linux/compiler.h> 13#include <linux/compiler.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/pci.h>
16 17
17#include <asm/processor.h> 18#include <asm/processor.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
@@ -130,9 +131,18 @@ struct _cpuid4_info {
130 union _cpuid4_leaf_ebx ebx; 131 union _cpuid4_leaf_ebx ebx;
131 union _cpuid4_leaf_ecx ecx; 132 union _cpuid4_leaf_ecx ecx;
132 unsigned long size; 133 unsigned long size;
134 unsigned long can_disable;
133 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ 135 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
134}; 136};
135 137
138#ifdef CONFIG_PCI
139static struct pci_device_id k8_nb_id[] = {
140 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
141 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
142 {}
143};
144#endif
145
136unsigned short num_cache_leaves; 146unsigned short num_cache_leaves;
137 147
138/* AMD doesn't have CPUID4. Emulate it here to report the same 148/* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -182,9 +192,10 @@ static unsigned short assocs[] __cpuinitdata = {
182static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; 192static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
183static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; 193static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
184 194
185static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, 195static void __cpuinit
186 union _cpuid4_leaf_ebx *ebx, 196amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
187 union _cpuid4_leaf_ecx *ecx) 197 union _cpuid4_leaf_ebx *ebx,
198 union _cpuid4_leaf_ecx *ecx)
188{ 199{
189 unsigned dummy; 200 unsigned dummy;
190 unsigned line_size, lines_per_tag, assoc, size_in_kb; 201 unsigned line_size, lines_per_tag, assoc, size_in_kb;
@@ -251,27 +262,40 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
251 (ebx->split.ways_of_associativity + 1) - 1; 262 (ebx->split.ways_of_associativity + 1) - 1;
252} 263}
253 264
254static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 265static void __cpuinit
266amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
267{
268 if (index < 3)
269 return;
270 this_leaf->can_disable = 1;
271}
272
273static int
274__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
255{ 275{
256 union _cpuid4_leaf_eax eax; 276 union _cpuid4_leaf_eax eax;
257 union _cpuid4_leaf_ebx ebx; 277 union _cpuid4_leaf_ebx ebx;
258 union _cpuid4_leaf_ecx ecx; 278 union _cpuid4_leaf_ecx ecx;
259 unsigned edx; 279 unsigned edx;
260 280
261 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 281 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
262 amd_cpuid4(index, &eax, &ebx, &ecx); 282 amd_cpuid4(index, &eax, &ebx, &ecx);
263 else 283 if (boot_cpu_data.x86 >= 0x10)
264 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 284 amd_check_l3_disable(index, this_leaf);
285 } else {
286 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
287 }
288
265 if (eax.split.type == CACHE_TYPE_NULL) 289 if (eax.split.type == CACHE_TYPE_NULL)
266 return -EIO; /* better error ? */ 290 return -EIO; /* better error ? */
267 291
268 this_leaf->eax = eax; 292 this_leaf->eax = eax;
269 this_leaf->ebx = ebx; 293 this_leaf->ebx = ebx;
270 this_leaf->ecx = ecx; 294 this_leaf->ecx = ecx;
271 this_leaf->size = (ecx.split.number_of_sets + 1) * 295 this_leaf->size = (ecx.split.number_of_sets + 1) *
272 (ebx.split.coherency_line_size + 1) * 296 (ebx.split.coherency_line_size + 1) *
273 (ebx.split.physical_line_partition + 1) * 297 (ebx.split.physical_line_partition + 1) *
274 (ebx.split.ways_of_associativity + 1); 298 (ebx.split.ways_of_associativity + 1);
275 return 0; 299 return 0;
276} 300}
277 301
@@ -453,7 +477,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
453 477
454/* pointer to _cpuid4_info array (for each cache leaf) */ 478/* pointer to _cpuid4_info array (for each cache leaf) */
455static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); 479static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
456#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) 480#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
457 481
458#ifdef CONFIG_SMP 482#ifdef CONFIG_SMP
459static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 483static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
@@ -490,7 +514,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
490 514
491 this_leaf = CPUID4_INFO_IDX(cpu, index); 515 this_leaf = CPUID4_INFO_IDX(cpu, index);
492 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { 516 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
493 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 517 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
494 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 518 cpu_clear(cpu, sibling_leaf->shared_cpu_map);
495 } 519 }
496} 520}
@@ -572,7 +596,7 @@ struct _index_kobject {
572 596
573/* pointer to array of kobjects for cpuX/cache/indexY */ 597/* pointer to array of kobjects for cpuX/cache/indexY */
574static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); 598static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
575#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) 599#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
576 600
577#define show_one_plus(file_name, object, val) \ 601#define show_one_plus(file_name, object, val) \
578static ssize_t show_##file_name \ 602static ssize_t show_##file_name \
@@ -637,6 +661,99 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
637 } 661 }
638} 662}
639 663
664#define to_object(k) container_of(k, struct _index_kobject, kobj)
665#define to_attr(a) container_of(a, struct _cache_attr, attr)
666
667#ifdef CONFIG_PCI
668static struct pci_dev *get_k8_northbridge(int node)
669{
670 struct pci_dev *dev = NULL;
671 int i;
672
673 for (i = 0; i <= node; i++) {
674 do {
675 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
676 if (!dev)
677 break;
678 } while (!pci_match_id(&k8_nb_id[0], dev));
679 if (!dev)
680 break;
681 }
682 return dev;
683}
684#else
685static struct pci_dev *get_k8_northbridge(int node)
686{
687 return NULL;
688}
689#endif
690
691static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
692{
693 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
694 struct pci_dev *dev = NULL;
695 ssize_t ret = 0;
696 int i;
697
698 if (!this_leaf->can_disable)
699 return sprintf(buf, "Feature not enabled\n");
700
701 dev = get_k8_northbridge(node);
702 if (!dev) {
703 printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
704 return -EINVAL;
705 }
706
707 for (i = 0; i < 2; i++) {
708 unsigned int reg;
709
710 pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
711
712 ret += sprintf(buf, "%sEntry: %d\n", buf, i);
713 ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n",
714 buf,
715 reg & 0x80000000 ? "Disabled" : "Allowed",
716 reg & 0x40000000 ? "Disabled" : "Allowed");
717 ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
718 buf, (reg & 0x30000) >> 16, reg & 0xfff);
719 }
720 return ret;
721}
722
723static ssize_t
724store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
725 size_t count)
726{
727 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
728 struct pci_dev *dev = NULL;
729 unsigned int ret, index, val;
730
731 if (!this_leaf->can_disable)
732 return 0;
733
734 if (strlen(buf) > 15)
735 return -EINVAL;
736
737 ret = sscanf(buf, "%x %x", &index, &val);
738 if (ret != 2)
739 return -EINVAL;
740 if (index > 1)
741 return -EINVAL;
742
743 val |= 0xc0000000;
744 dev = get_k8_northbridge(node);
745 if (!dev) {
746 printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
747 return -EINVAL;
748 }
749
750 pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
751 wbinvd();
752 pci_write_config_dword(dev, 0x1BC + index * 4, val);
753
754 return 1;
755}
756
640struct _cache_attr { 757struct _cache_attr {
641 struct attribute attr; 758 struct attribute attr;
642 ssize_t (*show)(struct _cpuid4_info *, char *); 759 ssize_t (*show)(struct _cpuid4_info *, char *);
@@ -657,6 +774,8 @@ define_one_ro(size);
657define_one_ro(shared_cpu_map); 774define_one_ro(shared_cpu_map);
658define_one_ro(shared_cpu_list); 775define_one_ro(shared_cpu_list);
659 776
777static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
778
660static struct attribute * default_attrs[] = { 779static struct attribute * default_attrs[] = {
661 &type.attr, 780 &type.attr,
662 &level.attr, 781 &level.attr,
@@ -667,12 +786,10 @@ static struct attribute * default_attrs[] = {
667 &size.attr, 786 &size.attr,
668 &shared_cpu_map.attr, 787 &shared_cpu_map.attr,
669 &shared_cpu_list.attr, 788 &shared_cpu_list.attr,
789 &cache_disable.attr,
670 NULL 790 NULL
671}; 791};
672 792
673#define to_object(k) container_of(k, struct _index_kobject, kobj)
674#define to_attr(a) container_of(a, struct _cache_attr, attr)
675
676static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) 793static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
677{ 794{
678 struct _cache_attr *fattr = to_attr(attr); 795 struct _cache_attr *fattr = to_attr(attr);
@@ -682,14 +799,22 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
682 ret = fattr->show ? 799 ret = fattr->show ?
683 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 800 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
684 buf) : 801 buf) :
685 0; 802 0;
686 return ret; 803 return ret;
687} 804}
688 805
689static ssize_t store(struct kobject * kobj, struct attribute * attr, 806static ssize_t store(struct kobject * kobj, struct attribute * attr,
690 const char * buf, size_t count) 807 const char * buf, size_t count)
691{ 808{
692 return 0; 809 struct _cache_attr *fattr = to_attr(attr);
810 struct _index_kobject *this_leaf = to_object(kobj);
811 ssize_t ret;
812
813 ret = fattr->store ?
814 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
815 buf, count) :
816 0;
817 return ret;
693} 818}
694 819
695static struct sysfs_ops sysfs_ops = { 820static struct sysfs_ops sysfs_ops = {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 726a5fcdf341..4b031a4ac856 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -860,7 +860,7 @@ error:
860 return err; 860 return err;
861} 861}
862 862
863static void mce_remove_device(unsigned int cpu) 863static __cpuinit void mce_remove_device(unsigned int cpu)
864{ 864{
865 int i; 865 int i;
866 866
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
new file mode 100644
index 000000000000..dfea390e1608
--- /dev/null
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -0,0 +1,32 @@
1#!/usr/bin/perl
2#
3# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
4#
5
6($in, $out) = @ARGV;
7
8open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
9open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
10
11print OUT "#include <asm/cpufeature.h>\n\n";
12print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
13
14while (defined($line = <IN>)) {
15 if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
16 $macro = $1;
17 $feature = $2;
18 $tail = $3;
19 if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
20 $feature = $1;
21 }
22
23 if ($feature ne '') {
24 printf OUT "\t%-32s = \"%s\",\n",
25 "[$macro]", "\L$feature";
26 }
27 }
28}
29print OUT "};\n";
30
31close(IN);
32close(OUT);
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
new file mode 100644
index 000000000000..5abbea297e0c
--- /dev/null
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -0,0 +1,20 @@
1/*
2 * Strings for the various x86 power flags
3 *
4 * This file must not contain any executable code.
5 */
6
7#include <asm/cpufeature.h>
8
9const char *const x86_power_flags[32] = {
10 "ts", /* temperature sensor */
11 "fid", /* frequency id control */
12 "vid", /* voltage id control */
13 "ttp", /* thermal trip */
14 "tm",
15 "stc",
16 "100mhzsteps",
17 "hwpstate",
18 "", /* tsc invariant mapped to constant_tsc */
19 /* nothing */
20};
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index b911a2c61b8f..52b3fefbd5af 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -5,6 +5,18 @@
5#include <asm/msr.h> 5#include <asm/msr.h>
6#include "cpu.h" 6#include "cpu.h"
7 7
8static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c)
9{
10 u32 xlvl;
11
12 /* Transmeta-defined flags: level 0x80860001 */
13 xlvl = cpuid_eax(0x80860000);
14 if ((xlvl & 0xffff0000) == 0x80860000) {
15 if (xlvl >= 0x80860001)
16 c->x86_capability[2] = cpuid_edx(0x80860001);
17 }
18}
19
8static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) 20static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
9{ 21{
10 unsigned int cap_mask, uk, max, dummy; 22 unsigned int cap_mask, uk, max, dummy;
@@ -12,7 +24,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
12 unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; 24 unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev;
13 char cpu_info[65]; 25 char cpu_info[65];
14 26
15 get_model_name(c); /* Same as AMD/Cyrix */ 27 early_init_transmeta(c);
28
16 display_cacheinfo(c); 29 display_cacheinfo(c);
17 30
18 /* Print CMS and CPU revision */ 31 /* Print CMS and CPU revision */
@@ -85,23 +98,12 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
85#endif 98#endif
86} 99}
87 100
88static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c)
89{
90 u32 xlvl;
91
92 /* Transmeta-defined flags: level 0x80860001 */
93 xlvl = cpuid_eax(0x80860000);
94 if ((xlvl & 0xffff0000) == 0x80860000) {
95 if (xlvl >= 0x80860001)
96 c->x86_capability[2] = cpuid_edx(0x80860001);
97 }
98}
99
100static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { 101static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
101 .c_vendor = "Transmeta", 102 .c_vendor = "Transmeta",
102 .c_ident = { "GenuineTMx86", "TransmetaCPU" }, 103 .c_ident = { "GenuineTMx86", "TransmetaCPU" },
104 .c_early_init = early_init_transmeta,
103 .c_init = init_transmeta, 105 .c_init = init_transmeta,
104 .c_identify = transmeta_identify, 106 .c_x86_vendor = X86_VENDOR_TRANSMETA,
105}; 107};
106 108
107cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev); 109cpu_dev_register(transmeta_cpu_dev);
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index b1fc90989d75..e777f79e0960 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -19,7 +19,8 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = {
19 } 19 }
20 }, 20 },
21 }, 21 },
22 .c_x86_vendor = X86_VENDOR_UMC,
22}; 23};
23 24
24cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev); 25cpu_dev_register(umc_cpu_dev);
25 26
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 66e48aa2dd1b..78e642feac30 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -148,6 +148,9 @@ void __init e820_print_map(char *who)
148 case E820_NVS: 148 case E820_NVS:
149 printk(KERN_CONT "(ACPI NVS)\n"); 149 printk(KERN_CONT "(ACPI NVS)\n");
150 break; 150 break;
151 case E820_UNUSABLE:
152 printk("(unusable)\n");
153 break;
151 default: 154 default:
152 printk(KERN_CONT "type %u\n", e820.map[i].type); 155 printk(KERN_CONT "type %u\n", e820.map[i].type);
153 break; 156 break;
@@ -1260,6 +1263,7 @@ static inline const char *e820_type_to_string(int e820_type)
1260 case E820_RAM: return "System RAM"; 1263 case E820_RAM: return "System RAM";
1261 case E820_ACPI: return "ACPI Tables"; 1264 case E820_ACPI: return "ACPI Tables";
1262 case E820_NVS: return "ACPI Non-volatile Storage"; 1265 case E820_NVS: return "ACPI Non-volatile Storage";
1266 case E820_UNUSABLE: return "Unusable memory";
1263 default: return "reserved"; 1267 default: return "reserved";
1264 } 1268 }
1265} 1269}
@@ -1267,6 +1271,7 @@ static inline const char *e820_type_to_string(int e820_type)
1267/* 1271/*
1268 * Mark e820 reserved areas as busy for the resource manager. 1272 * Mark e820 reserved areas as busy for the resource manager.
1269 */ 1273 */
1274static struct resource __initdata *e820_res;
1270void __init e820_reserve_resources(void) 1275void __init e820_reserve_resources(void)
1271{ 1276{
1272 int i; 1277 int i;
@@ -1274,6 +1279,7 @@ void __init e820_reserve_resources(void)
1274 u64 end; 1279 u64 end;
1275 1280
1276 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); 1281 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
1282 e820_res = res;
1277 for (i = 0; i < e820.nr_map; i++) { 1283 for (i = 0; i < e820.nr_map; i++) {
1278 end = e820.map[i].addr + e820.map[i].size - 1; 1284 end = e820.map[i].addr + e820.map[i].size - 1;
1279#ifndef CONFIG_RESOURCES_64BIT 1285#ifndef CONFIG_RESOURCES_64BIT
@@ -1287,7 +1293,14 @@ void __init e820_reserve_resources(void)
1287 res->end = end; 1293 res->end = end;
1288 1294
1289 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 1295 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1290 insert_resource(&iomem_resource, res); 1296
1297 /*
1298 * don't register the region that could be conflicted with
1299 * pci device BAR resource and insert them later in
1300 * pcibios_resource_survey()
1301 */
1302 if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20))
1303 insert_resource(&iomem_resource, res);
1291 res++; 1304 res++;
1292 } 1305 }
1293 1306
@@ -1299,6 +1312,19 @@ void __init e820_reserve_resources(void)
1299 } 1312 }
1300} 1313}
1301 1314
1315void __init e820_reserve_resources_late(void)
1316{
1317 int i;
1318 struct resource *res;
1319
1320 res = e820_res;
1321 for (i = 0; i < e820.nr_map; i++) {
1322 if (!res->parent && res->end)
1323 reserve_region_with_split(&iomem_resource, res->start, res->end, res->name);
1324 res++;
1325 }
1326}
1327
1302char *__init default_machine_specific_memory_setup(void) 1328char *__init default_machine_specific_memory_setup(void)
1303{ 1329{
1304 char *who = "BIOS-e820"; 1330 char *who = "BIOS-e820";
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
new file mode 100644
index 000000000000..849e5cd485b8
--- /dev/null
+++ b/arch/x86/kernel/es7000_32.c
@@ -0,0 +1,345 @@
1/*
2 * Written by: Garry Forsgren, Unisys Corporation
3 * Natalie Protasevich, Unisys Corporation
4 * This file contains the code to configure and interface
5 * with Unisys ES7000 series hardware system manager.
6 *
7 * Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it would be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write the Free Software Foundation, Inc., 59
19 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
20 *
21 * Contact information: Unisys Corporation, Township Line & Union Meeting
22 * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
23 *
24 * http://www.unisys.com
25 */
26
27#include <linux/module.h>
28#include <linux/types.h>
29#include <linux/kernel.h>
30#include <linux/smp.h>
31#include <linux/string.h>
32#include <linux/spinlock.h>
33#include <linux/errno.h>
34#include <linux/notifier.h>
35#include <linux/reboot.h>
36#include <linux/init.h>
37#include <linux/acpi.h>
38#include <asm/io.h>
39#include <asm/nmi.h>
40#include <asm/smp.h>
41#include <asm/apicdef.h>
42#include <mach_mpparse.h>
43
44/*
45 * ES7000 chipsets
46 */
47
48#define NON_UNISYS 0
49#define ES7000_CLASSIC 1
50#define ES7000_ZORRO 2
51
52
53#define MIP_REG 1
54#define MIP_PSAI_REG 4
55
56#define MIP_BUSY 1
57#define MIP_SPIN 0xf0000
58#define MIP_VALID 0x0100000000000000ULL
59#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
60
61#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
62
63struct mip_reg_info {
64 unsigned long long mip_info;
65 unsigned long long delivery_info;
66 unsigned long long host_reg;
67 unsigned long long mip_reg;
68};
69
70struct part_info {
71 unsigned char type;
72 unsigned char length;
73 unsigned char part_id;
74 unsigned char apic_mode;
75 unsigned long snum;
76 char ptype[16];
77 char sname[64];
78 char pname[64];
79};
80
81struct psai {
82 unsigned long long entry_type;
83 unsigned long long addr;
84 unsigned long long bep_addr;
85};
86
87struct es7000_mem_info {
88 unsigned char type;
89 unsigned char length;
90 unsigned char resv[6];
91 unsigned long long start;
92 unsigned long long size;
93};
94
95struct es7000_oem_table {
96 unsigned long long hdr;
97 struct mip_reg_info mip;
98 struct part_info pif;
99 struct es7000_mem_info shm;
100 struct psai psai;
101};
102
103#ifdef CONFIG_ACPI
104
105struct oem_table {
106 struct acpi_table_header Header;
107 u32 OEMTableAddr;
108 u32 OEMTableSize;
109};
110
111extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
112#endif
113
114struct mip_reg {
115 unsigned long long off_0;
116 unsigned long long off_8;
117 unsigned long long off_10;
118 unsigned long long off_18;
119 unsigned long long off_20;
120 unsigned long long off_28;
121 unsigned long long off_30;
122 unsigned long long off_38;
123};
124
125#define MIP_SW_APIC 0x1020b
126#define MIP_FUNC(VALUE) (VALUE & 0xff)
127
128/*
129 * ES7000 Globals
130 */
131
132static volatile unsigned long *psai = NULL;
133static struct mip_reg *mip_reg;
134static struct mip_reg *host_reg;
135static int mip_port;
136static unsigned long mip_addr, host_addr;
137
138int es7000_plat;
139
140/*
141 * GSI override for ES7000 platforms.
142 */
143
144static unsigned int base;
145
146static int
147es7000_rename_gsi(int ioapic, int gsi)
148{
149 if (es7000_plat == ES7000_ZORRO)
150 return gsi;
151
152 if (!base) {
153 int i;
154 for (i = 0; i < nr_ioapics; i++)
155 base += nr_ioapic_registers[i];
156 }
157
158 if (!ioapic && (gsi < 16))
159 gsi += base;
160 return gsi;
161}
162
163void __init
164setup_unisys(void)
165{
166 /*
167 * Determine the generation of the ES7000 currently running.
168 *
169 * es7000_plat = 1 if the machine is a 5xx ES7000 box
170 * es7000_plat = 2 if the machine is a x86_64 ES7000 box
171 *
172 */
173 if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
174 es7000_plat = ES7000_ZORRO;
175 else
176 es7000_plat = ES7000_CLASSIC;
177 ioapic_renumber_irq = es7000_rename_gsi;
178}
179
180/*
181 * Parse the OEM Table
182 */
183
184int __init
185parse_unisys_oem (char *oemptr)
186{
187 int i;
188 int success = 0;
189 unsigned char type, size;
190 unsigned long val;
191 char *tp = NULL;
192 struct psai *psaip = NULL;
193 struct mip_reg_info *mi;
194 struct mip_reg *host, *mip;
195
196 tp = oemptr;
197
198 tp += 8;
199
200 for (i=0; i <= 6; i++) {
201 type = *tp++;
202 size = *tp++;
203 tp -= 2;
204 switch (type) {
205 case MIP_REG:
206 mi = (struct mip_reg_info *)tp;
207 val = MIP_RD_LO(mi->host_reg);
208 host_addr = val;
209 host = (struct mip_reg *)val;
210 host_reg = __va(host);
211 val = MIP_RD_LO(mi->mip_reg);
212 mip_port = MIP_PORT(mi->mip_info);
213 mip_addr = val;
214 mip = (struct mip_reg *)val;
215 mip_reg = __va(mip);
216 pr_debug("es7000_mipcfg: host_reg = 0x%lx \n",
217 (unsigned long)host_reg);
218 pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n",
219 (unsigned long)mip_reg);
220 success++;
221 break;
222 case MIP_PSAI_REG:
223 psaip = (struct psai *)tp;
224 if (tp != NULL) {
225 if (psaip->addr)
226 psai = __va(psaip->addr);
227 else
228 psai = NULL;
229 success++;
230 }
231 break;
232 default:
233 break;
234 }
235 tp += size;
236 }
237
238 if (success < 2) {
239 es7000_plat = NON_UNISYS;
240 } else
241 setup_unisys();
242 return es7000_plat;
243}
244
245#ifdef CONFIG_ACPI
246int __init
247find_unisys_acpi_oem_table(unsigned long *oem_addr)
248{
249 struct acpi_table_header *header = NULL;
250 int i = 0;
251 while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
252 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
253 struct oem_table *t = (struct oem_table *)header;
254 *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr,
255 t->OEMTableSize);
256 return 0;
257 }
258 }
259 return -1;
260}
261#endif
262
263static void
264es7000_spin(int n)
265{
266 int i = 0;
267
268 while (i++ < n)
269 rep_nop();
270}
271
272static int __init
273es7000_mip_write(struct mip_reg *mip_reg)
274{
275 int status = 0;
276 int spin;
277
278 spin = MIP_SPIN;
279 while (((unsigned long long)host_reg->off_38 &
280 (unsigned long long)MIP_VALID) != 0) {
281 if (--spin <= 0) {
282 printk("es7000_mip_write: Timeout waiting for Host Valid Flag");
283 return -1;
284 }
285 es7000_spin(MIP_SPIN);
286 }
287
288 memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
289 outb(1, mip_port);
290
291 spin = MIP_SPIN;
292
293 while (((unsigned long long)mip_reg->off_38 &
294 (unsigned long long)MIP_VALID) == 0) {
295 if (--spin <= 0) {
296 printk("es7000_mip_write: Timeout waiting for MIP Valid Flag");
297 return -1;
298 }
299 es7000_spin(MIP_SPIN);
300 }
301
302 status = ((unsigned long long)mip_reg->off_0 &
303 (unsigned long long)0xffff0000000000ULL) >> 48;
304 mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 &
305 (unsigned long long)~MIP_VALID);
306 return status;
307}
308
309int
310es7000_start_cpu(int cpu, unsigned long eip)
311{
312 unsigned long vect = 0, psaival = 0;
313
314 if (psai == NULL)
315 return -1;
316
317 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
318 psaival = (0x1000000 | vect | cpu);
319
320 while (*psai & 0x1000000)
321 ;
322
323 *psai = psaival;
324
325 return 0;
326
327}
328
329void __init
330es7000_sw_apic(void)
331{
332 if (es7000_plat) {
333 int mip_status;
334 struct mip_reg es7000_mip_reg;
335
336 printk("ES7000: Enabling APIC mode.\n");
337 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
338 es7000_mip_reg.off_0 = MIP_SW_APIC;
339 es7000_mip_reg.off_38 = (MIP_VALID);
340 while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
341 printk("es7000_sw_apic: command failed, status = %x\n",
342 mip_status);
343 return;
344 }
345}
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index eaff0bbb1444..6c9bfc9e1e95 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -16,87 +16,63 @@
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h> 18#include <linux/hardirq.h>
19#include <linux/dmar.h>
19 20
20#include <asm/smp.h> 21#include <asm/smp.h>
21#include <asm/ipi.h> 22#include <asm/ipi.h>
22#include <asm/genapic.h> 23#include <asm/genapic.h>
23 24
24#ifdef CONFIG_ACPI 25extern struct genapic apic_flat;
25#include <acpi/acpi_bus.h> 26extern struct genapic apic_physflat;
26#endif 27extern struct genapic apic_x2xpic_uv_x;
27 28extern struct genapic apic_x2apic_phys;
28DEFINE_PER_CPU(int, x2apic_extra_bits); 29extern struct genapic apic_x2apic_cluster;
29 30
30struct genapic __read_mostly *genapic = &apic_flat; 31struct genapic __read_mostly *genapic = &apic_flat;
31 32
32static enum uv_system_type uv_system_type; 33static struct genapic *apic_probe[] __initdata = {
34 &apic_x2apic_uv_x,
35 &apic_x2apic_phys,
36 &apic_x2apic_cluster,
37 &apic_physflat,
38 NULL,
39};
33 40
34/* 41/*
35 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 42 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
36 */ 43 */
37void __init setup_apic_routing(void) 44void __init setup_apic_routing(void)
38{ 45{
39 if (uv_system_type == UV_NON_UNIQUE_APIC) 46 if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
40 genapic = &apic_x2apic_uv_x; 47 if (!intr_remapping_enabled)
41 else 48 genapic = &apic_flat;
42#ifdef CONFIG_ACPI 49 }
43 /*
44 * Quirk: some x86_64 machines can only use physical APIC mode
45 * regardless of how many processors are present (x86_64 ES7000
46 * is an example).
47 */
48 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
49 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
50 genapic = &apic_physflat;
51 else
52#endif
53
54 if (max_physical_apicid < 8)
55 genapic = &apic_flat;
56 else
57 genapic = &apic_physflat;
58 50
59 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 51 if (genapic == &apic_flat) {
52 if (max_physical_apicid >= 8)
53 genapic = &apic_physflat;
54 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
55 }
60} 56}
61 57
62/* Same for both flat and physical. */ 58/* Same for both flat and physical. */
63 59
64void send_IPI_self(int vector) 60void apic_send_IPI_self(int vector)
65{ 61{
66 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 62 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
67} 63}
68 64
69int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 65int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
70{ 66{
71 if (!strcmp(oem_id, "SGI")) { 67 int i;
72 if (!strcmp(oem_table_id, "UVL")) 68
73 uv_system_type = UV_LEGACY_APIC; 69 for (i = 0; apic_probe[i]; ++i) {
74 else if (!strcmp(oem_table_id, "UVX")) 70 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
75 uv_system_type = UV_X2APIC; 71 genapic = apic_probe[i];
76 else if (!strcmp(oem_table_id, "UVH")) 72 printk(KERN_INFO "Setting APIC routing to %s.\n",
77 uv_system_type = UV_NON_UNIQUE_APIC; 73 genapic->name);
74 return 1;
75 }
78 } 76 }
79 return 0; 77 return 0;
80} 78}
81
82unsigned int read_apic_id(void)
83{
84 unsigned int id;
85
86 WARN_ON(preemptible() && num_online_cpus() > 1);
87 id = apic_read(APIC_ID);
88 if (uv_system_type >= UV_X2APIC)
89 id |= __get_cpu_var(x2apic_extra_bits);
90 return id;
91}
92
93enum uv_system_type get_uv_system_type(void)
94{
95 return uv_system_type;
96}
97
98int is_uv_system(void)
99{
100 return uv_system_type != UV_NONE;
101}
102EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 786548a62d38..9eca5ba7a6b1 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,20 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
19#include <asm/ipi.h> 20#include <asm/ipi.h>
20#include <asm/genapic.h> 21#include <asm/genapic.h>
22#include <mach_apicdef.h>
23
24#ifdef CONFIG_ACPI
25#include <acpi/acpi_bus.h>
26#endif
27
28static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29{
30 return 1;
31}
21 32
22static cpumask_t flat_target_cpus(void) 33static cpumask_t flat_target_cpus(void)
23{ 34{
@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
95 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
96} 107}
97 108
109static unsigned int get_apic_id(unsigned long x)
110{
111 unsigned int id;
112
113 id = (((x)>>24) & 0xFFu);
114 return id;
115}
116
117static unsigned long set_apic_id(unsigned int id)
118{
119 unsigned long x;
120
121 x = ((id & 0xFFu)<<24);
122 return x;
123}
124
125static unsigned int read_xapic_id(void)
126{
127 unsigned int id;
128
129 id = get_apic_id(apic_read(APIC_ID));
130 return id;
131}
132
98static int flat_apic_id_registered(void) 133static int flat_apic_id_registered(void)
99{ 134{
100 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 135 return physid_isset(read_xapic_id(), phys_cpu_present_map);
101} 136}
102 137
103static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
112 147
113struct genapic apic_flat = { 148struct genapic apic_flat = {
114 .name = "flat", 149 .name = "flat",
150 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
115 .int_delivery_mode = dest_LowestPrio, 151 .int_delivery_mode = dest_LowestPrio,
116 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 152 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
117 .target_cpus = flat_target_cpus, 153 .target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@ struct genapic apic_flat = {
121 .send_IPI_all = flat_send_IPI_all, 157 .send_IPI_all = flat_send_IPI_all,
122 .send_IPI_allbutself = flat_send_IPI_allbutself, 158 .send_IPI_allbutself = flat_send_IPI_allbutself,
123 .send_IPI_mask = flat_send_IPI_mask, 159 .send_IPI_mask = flat_send_IPI_mask,
160 .send_IPI_self = apic_send_IPI_self,
124 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
125 .phys_pkg_id = phys_pkg_id, 162 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id,
165 .apic_id_mask = (0xFFu<<24),
126}; 166};
127 167
128/* 168/*
@@ -130,6 +170,21 @@ struct genapic apic_flat = {
130 * We cannot use logical delivery in this case because the mask 170 * We cannot use logical delivery in this case because the mask
131 * overflows, so use physical mode. 171 * overflows, so use physical mode.
132 */ 172 */
173static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
174{
175#ifdef CONFIG_ACPI
176 /*
177 * Quirk: some x86_64 machines can only use physical APIC mode
178 * regardless of how many processors are present (x86_64 ES7000
179 * is an example).
180 */
181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
183 return 1;
184#endif
185
186 return 0;
187}
133 188
134static cpumask_t physflat_target_cpus(void) 189static cpumask_t physflat_target_cpus(void)
135{ 190{
@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
176 231
177struct genapic apic_physflat = { 232struct genapic apic_physflat = {
178 .name = "physical flat", 233 .name = "physical flat",
234 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
179 .int_delivery_mode = dest_Fixed, 235 .int_delivery_mode = dest_Fixed,
180 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 236 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
181 .target_cpus = physflat_target_cpus, 237 .target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@ struct genapic apic_physflat = {
185 .send_IPI_all = physflat_send_IPI_all, 241 .send_IPI_all = physflat_send_IPI_all,
186 .send_IPI_allbutself = physflat_send_IPI_allbutself, 242 .send_IPI_allbutself = physflat_send_IPI_allbutself,
187 .send_IPI_mask = physflat_send_IPI_mask, 243 .send_IPI_mask = physflat_send_IPI_mask,
244 .send_IPI_self = apic_send_IPI_self,
188 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 245 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
189 .phys_pkg_id = phys_pkg_id, 246 .phys_pkg_id = phys_pkg_id,
247 .get_apic_id = get_apic_id,
248 .set_apic_id = set_apic_id,
249 .apic_id_mask = (0xFFu<<24),
190}; 250};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 000000000000..e4bf2cc0d743
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,159 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14
15static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{
17 if (cpu_has_x2apic)
18 return 1;
19
20 return 0;
21}
22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24
25static cpumask_t x2apic_target_cpus(void)
26{
27 return cpumask_of_cpu(0);
28}
29
30/*
31 * for now each logical cpu is in its own vector allocation domain.
32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu)
34{
35 cpumask_t domain = CPU_MASK_NONE;
36 cpu_set(cpu, domain);
37 return domain;
38}
39
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
41 unsigned int dest)
42{
43 unsigned long cfg;
44
45 cfg = __prepare_ICR(0, vector, dest);
46
47 /*
48 * send the IPI.
49 */
50 x2apic_icr_write(cfg, apicid);
51}
52
53/*
54 * for now, we send the IPI's one by one in the cpumask.
55 * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes.
58 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
60{
61 unsigned long flags;
62 unsigned long query_cpu;
63
64 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) {
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
67 vector, APIC_DEST_LOGICAL);
68 }
69 local_irq_restore(flags);
70}
71
72static void x2apic_send_IPI_allbutself(int vector)
73{
74 cpumask_t mask = cpu_online_map;
75
76 cpu_clear(smp_processor_id(), mask);
77
78 if (!cpus_empty(mask))
79 x2apic_send_IPI_mask(mask, vector);
80}
81
82static void x2apic_send_IPI_all(int vector)
83{
84 x2apic_send_IPI_mask(cpu_online_map, vector);
85}
86
87static int x2apic_apic_id_registered(void)
88{
89 return 1;
90}
91
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
93{
94 int cpu;
95
96 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID.
98 * May as well be the first.
99 */
100 cpu = first_cpu(cpumask);
101 if ((unsigned)cpu < NR_CPUS)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else
104 return BAD_APICID;
105}
106
107static unsigned int get_apic_id(unsigned long x)
108{
109 unsigned int id;
110
111 id = x;
112 return id;
113}
114
115static unsigned long set_apic_id(unsigned int id)
116{
117 unsigned long x;
118
119 x = id;
120 return x;
121}
122
123static unsigned int phys_pkg_id(int index_msb)
124{
125 return current_cpu_data.initial_apicid >> index_msb;
126}
127
128static void x2apic_send_IPI_self(int vector)
129{
130 apic_write(APIC_SELF_IPI, vector);
131}
132
133static void init_x2apic_ldr(void)
134{
135 int cpu = smp_processor_id();
136
137 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
138 return;
139}
140
141struct genapic apic_x2apic_cluster = {
142 .name = "cluster x2apic",
143 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
144 .int_delivery_mode = dest_LowestPrio,
145 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
146 .target_cpus = x2apic_target_cpus,
147 .vector_allocation_domain = x2apic_vector_allocation_domain,
148 .apic_id_registered = x2apic_apic_id_registered,
149 .init_apic_ldr = init_x2apic_ldr,
150 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask,
153 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
155 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id,
158 .apic_id_mask = (0xFFFFFFFFu),
159};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 000000000000..8f1343df2627
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,154 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13static int x2apic_phys;
14
15static int set_x2apic_phys_mode(char *arg)
16{
17 x2apic_phys = 1;
18 return 0;
19}
20early_param("x2apic_phys", set_x2apic_phys_mode);
21
22static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
23{
24 if (cpu_has_x2apic && x2apic_phys)
25 return 1;
26
27 return 0;
28}
29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31
32static cpumask_t x2apic_target_cpus(void)
33{
34 return cpumask_of_cpu(0);
35}
36
37static cpumask_t x2apic_vector_allocation_domain(int cpu)
38{
39 cpumask_t domain = CPU_MASK_NONE;
40 cpu_set(cpu, domain);
41 return domain;
42}
43
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
45 unsigned int dest)
46{
47 unsigned long cfg;
48
49 cfg = __prepare_ICR(0, vector, dest);
50
51 /*
52 * send the IPI.
53 */
54 x2apic_icr_write(cfg, apicid);
55}
56
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
58{
59 unsigned long flags;
60 unsigned long query_cpu;
61
62 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL);
66 }
67 local_irq_restore(flags);
68}
69
70static void x2apic_send_IPI_allbutself(int vector)
71{
72 cpumask_t mask = cpu_online_map;
73
74 cpu_clear(smp_processor_id(), mask);
75
76 if (!cpus_empty(mask))
77 x2apic_send_IPI_mask(mask, vector);
78}
79
80static void x2apic_send_IPI_all(int vector)
81{
82 x2apic_send_IPI_mask(cpu_online_map, vector);
83}
84
85static int x2apic_apic_id_registered(void)
86{
87 return 1;
88}
89
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
91{
92 int cpu;
93
94 /*
95 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first.
97 */
98 cpu = first_cpu(cpumask);
99 if ((unsigned)cpu < NR_CPUS)
100 return per_cpu(x86_cpu_to_apicid, cpu);
101 else
102 return BAD_APICID;
103}
104
105static unsigned int get_apic_id(unsigned long x)
106{
107 unsigned int id;
108
109 id = x;
110 return id;
111}
112
113static unsigned long set_apic_id(unsigned int id)
114{
115 unsigned long x;
116
117 x = id;
118 return x;
119}
120
121static unsigned int phys_pkg_id(int index_msb)
122{
123 return current_cpu_data.initial_apicid >> index_msb;
124}
125
126void x2apic_send_IPI_self(int vector)
127{
128 apic_write(APIC_SELF_IPI, vector);
129}
130
131void init_x2apic_ldr(void)
132{
133 return;
134}
135
136struct genapic apic_x2apic_phys = {
137 .name = "physical x2apic",
138 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
139 .int_delivery_mode = dest_Fixed,
140 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
141 .target_cpus = x2apic_target_cpus,
142 .vector_allocation_domain = x2apic_vector_allocation_domain,
143 .apic_id_registered = x2apic_apic_id_registered,
144 .init_apic_ldr = init_x2apic_ldr,
145 .send_IPI_all = x2apic_send_IPI_all,
146 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
147 .send_IPI_mask = x2apic_send_IPI_mask,
148 .send_IPI_self = x2apic_send_IPI_self,
149 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
150 .phys_pkg_id = phys_pkg_id,
151 .get_apic_id = get_apic_id,
152 .set_apic_id = set_apic_id,
153 .apic_id_mask = (0xFFFFFFFFu),
154};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index bfa837cb16be..ae2ffc8a400c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -12,12 +12,12 @@
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h> 15#include <linux/ctype.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/sched.h> 17#include <linux/sched.h>
19#include <linux/bootmem.h> 18#include <linux/bootmem.h>
20#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/hardirq.h>
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
@@ -26,6 +26,36 @@
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27#include <asm/uv/bios.h> 27#include <asm/uv/bios.h>
28 28
29DEFINE_PER_CPU(int, x2apic_extra_bits);
30
31static enum uv_system_type uv_system_type;
32
33static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
34{
35 if (!strcmp(oem_id, "SGI")) {
36 if (!strcmp(oem_table_id, "UVL"))
37 uv_system_type = UV_LEGACY_APIC;
38 else if (!strcmp(oem_table_id, "UVX"))
39 uv_system_type = UV_X2APIC;
40 else if (!strcmp(oem_table_id, "UVH")) {
41 uv_system_type = UV_NON_UNIQUE_APIC;
42 return 1;
43 }
44 }
45 return 0;
46}
47
48enum uv_system_type get_uv_system_type(void)
49{
50 return uv_system_type;
51}
52
53int is_uv_system(void)
54{
55 return uv_system_type != UV_NONE;
56}
57EXPORT_SYMBOL_GPL(is_uv_system);
58
29DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 59DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
30EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); 60EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
31 61
@@ -123,6 +153,10 @@ static int uv_apic_id_registered(void)
123 return 1; 153 return 1;
124} 154}
125 155
156static void uv_init_apic_ldr(void)
157{
158}
159
126static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 160static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
127{ 161{
128 int cpu; 162 int cpu;
@@ -138,9 +172,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
138 return BAD_APICID; 172 return BAD_APICID;
139} 173}
140 174
175static unsigned int get_apic_id(unsigned long x)
176{
177 unsigned int id;
178
179 WARN_ON(preemptible() && num_online_cpus() > 1);
180 id = x | __get_cpu_var(x2apic_extra_bits);
181
182 return id;
183}
184
185static unsigned long set_apic_id(unsigned int id)
186{
187 unsigned long x;
188
189 /* maskout x2apic_extra_bits ? */
190 x = id;
191 return x;
192}
193
194static unsigned int uv_read_apic_id(void)
195{
196
197 return get_apic_id(apic_read(APIC_ID));
198}
199
141static unsigned int phys_pkg_id(int index_msb) 200static unsigned int phys_pkg_id(int index_msb)
142{ 201{
143 return GET_APIC_ID(read_apic_id()) >> index_msb; 202 return uv_read_apic_id() >> index_msb;
144} 203}
145 204
146#ifdef ZZZ /* Needs x2apic patch */ 205#ifdef ZZZ /* Needs x2apic patch */
@@ -152,17 +211,22 @@ static void uv_send_IPI_self(int vector)
152 211
153struct genapic apic_x2apic_uv_x = { 212struct genapic apic_x2apic_uv_x = {
154 .name = "UV large system", 213 .name = "UV large system",
214 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
155 .int_delivery_mode = dest_Fixed, 215 .int_delivery_mode = dest_Fixed,
156 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 216 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
157 .target_cpus = uv_target_cpus, 217 .target_cpus = uv_target_cpus,
158 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ 218 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
159 .apic_id_registered = uv_apic_id_registered, 219 .apic_id_registered = uv_apic_id_registered,
220 .init_apic_ldr = uv_init_apic_ldr,
160 .send_IPI_all = uv_send_IPI_all, 221 .send_IPI_all = uv_send_IPI_all,
161 .send_IPI_allbutself = uv_send_IPI_allbutself, 222 .send_IPI_allbutself = uv_send_IPI_allbutself,
162 .send_IPI_mask = uv_send_IPI_mask, 223 .send_IPI_mask = uv_send_IPI_mask,
163 /* ZZZ.send_IPI_self = uv_send_IPI_self, */ 224 /* ZZZ.send_IPI_self = uv_send_IPI_self, */
164 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 225 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
165 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 226 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
227 .get_apic_id = get_apic_id,
228 .set_apic_id = set_apic_id,
229 .apic_id_mask = (0xFFFFFFFFu),
166}; 230};
167 231
168static __cpuinit void set_x2apic_extra_bits(int pnode) 232static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -401,3 +465,5 @@ void __cpuinit uv_cpu_init(void)
401 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) 465 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
402 set_x2apic_extra_bits(uv_hub_info->pnode); 466 set_x2apic_extra_bits(uv_hub_info->pnode);
403} 467}
468
469
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index eb9ddd8efb82..45723f1fe198 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -21,9 +21,12 @@
21# include <asm/sigcontext32.h> 21# include <asm/sigcontext32.h>
22# include <asm/user32.h> 22# include <asm/user32.h>
23#else 23#else
24# define save_i387_ia32 save_i387 24# define save_i387_xstate_ia32 save_i387_xstate
25# define restore_i387_ia32 restore_i387 25# define restore_i387_xstate_ia32 restore_i387_xstate
26# define _fpstate_ia32 _fpstate 26# define _fpstate_ia32 _fpstate
27# define _xstate_ia32 _xstate
28# define sig_xstate_ia32_size sig_xstate_size
29# define fx_sw_reserved_ia32 fx_sw_reserved
27# define user_i387_ia32_struct user_i387_struct 30# define user_i387_ia32_struct user_i387_struct
28# define user32_fxsr_struct user_fxsr_struct 31# define user32_fxsr_struct user_fxsr_struct
29#endif 32#endif
@@ -36,6 +39,7 @@
36 39
37static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 40static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
38unsigned int xstate_size; 41unsigned int xstate_size;
42unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
39static struct i387_fxsave_struct fx_scratch __cpuinitdata; 43static struct i387_fxsave_struct fx_scratch __cpuinitdata;
40 44
41void __cpuinit mxcsr_feature_mask_init(void) 45void __cpuinit mxcsr_feature_mask_init(void)
@@ -61,6 +65,11 @@ void __init init_thread_xstate(void)
61 return; 65 return;
62 } 66 }
63 67
68 if (cpu_has_xsave) {
69 xsave_cntxt_init();
70 return;
71 }
72
64 if (cpu_has_fxsr) 73 if (cpu_has_fxsr)
65 xstate_size = sizeof(struct i387_fxsave_struct); 74 xstate_size = sizeof(struct i387_fxsave_struct);
66#ifdef CONFIG_X86_32 75#ifdef CONFIG_X86_32
@@ -83,9 +92,19 @@ void __cpuinit fpu_init(void)
83 92
84 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ 93 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
85 94
95 /*
96 * Boot processor to setup the FP and extended state context info.
97 */
98 if (!smp_processor_id())
99 init_thread_xstate();
100 xsave_init();
101
86 mxcsr_feature_mask_init(); 102 mxcsr_feature_mask_init();
87 /* clean state in init */ 103 /* clean state in init */
88 current_thread_info()->status = 0; 104 if (cpu_has_xsave)
105 current_thread_info()->status = TS_XSAVE;
106 else
107 current_thread_info()->status = 0;
89 clear_used_math(); 108 clear_used_math();
90} 109}
91#endif /* CONFIG_X86_64 */ 110#endif /* CONFIG_X86_64 */
@@ -195,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
195 */ 214 */
196 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; 215 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
197 216
217 /*
218 * update the header bits in the xsave header, indicating the
219 * presence of FP and SSE state.
220 */
221 if (cpu_has_xsave)
222 target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
223
198 return ret; 224 return ret;
199} 225}
200 226
@@ -395,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
395 if (!ret) 421 if (!ret)
396 convert_to_fxsr(target, &env); 422 convert_to_fxsr(target, &env);
397 423
424 /*
425 * update the header bit in the xsave header, indicating the
426 * presence of FP.
427 */
428 if (cpu_has_xsave)
429 target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
398 return ret; 430 return ret;
399} 431}
400 432
@@ -407,7 +439,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
407 struct task_struct *tsk = current; 439 struct task_struct *tsk = current;
408 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; 440 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
409 441
410 unlazy_fpu(tsk);
411 fp->status = fp->swd; 442 fp->status = fp->swd;
412 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) 443 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
413 return -1; 444 return -1;
@@ -421,8 +452,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
421 struct user_i387_ia32_struct env; 452 struct user_i387_ia32_struct env;
422 int err = 0; 453 int err = 0;
423 454
424 unlazy_fpu(tsk);
425
426 convert_from_fxsr(&env, tsk); 455 convert_from_fxsr(&env, tsk);
427 if (__copy_to_user(buf, &env, sizeof(env))) 456 if (__copy_to_user(buf, &env, sizeof(env)))
428 return -1; 457 return -1;
@@ -432,16 +461,40 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
432 if (err) 461 if (err)
433 return -1; 462 return -1;
434 463
435 if (__copy_to_user(&buf->_fxsr_env[0], fx, 464 if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
436 sizeof(struct i387_fxsave_struct)))
437 return -1; 465 return -1;
438 return 1; 466 return 1;
439} 467}
440 468
441int save_i387_ia32(struct _fpstate_ia32 __user *buf) 469static int save_i387_xsave(void __user *buf)
470{
471 struct _fpstate_ia32 __user *fx = buf;
472 int err = 0;
473
474 if (save_i387_fxsave(fx) < 0)
475 return -1;
476
477 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
478 sizeof(struct _fpx_sw_bytes));
479 err |= __put_user(FP_XSTATE_MAGIC2,
480 (__u32 __user *) (buf + sig_xstate_ia32_size
481 - FP_XSTATE_MAGIC2_SIZE));
482 if (err)
483 return -1;
484
485 return 1;
486}
487
488int save_i387_xstate_ia32(void __user *buf)
442{ 489{
490 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
491 struct task_struct *tsk = current;
492
443 if (!used_math()) 493 if (!used_math())
444 return 0; 494 return 0;
495
496 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
497 return -EACCES;
445 /* 498 /*
446 * This will cause a "finit" to be triggered by the next 499 * This will cause a "finit" to be triggered by the next
447 * attempted FPU operation by the 'current' process. 500 * attempted FPU operation by the 'current' process.
@@ -451,13 +504,17 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf)
451 if (!HAVE_HWFP) { 504 if (!HAVE_HWFP) {
452 return fpregs_soft_get(current, NULL, 505 return fpregs_soft_get(current, NULL,
453 0, sizeof(struct user_i387_ia32_struct), 506 0, sizeof(struct user_i387_ia32_struct),
454 NULL, buf) ? -1 : 1; 507 NULL, fp) ? -1 : 1;
455 } 508 }
456 509
510 unlazy_fpu(tsk);
511
512 if (cpu_has_xsave)
513 return save_i387_xsave(fp);
457 if (cpu_has_fxsr) 514 if (cpu_has_fxsr)
458 return save_i387_fxsave(buf); 515 return save_i387_fxsave(fp);
459 else 516 else
460 return save_i387_fsave(buf); 517 return save_i387_fsave(fp);
461} 518}
462 519
463static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) 520static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
@@ -468,14 +525,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
468 sizeof(struct i387_fsave_struct)); 525 sizeof(struct i387_fsave_struct));
469} 526}
470 527
471static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) 528static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
529 unsigned int size)
472{ 530{
473 struct task_struct *tsk = current; 531 struct task_struct *tsk = current;
474 struct user_i387_ia32_struct env; 532 struct user_i387_ia32_struct env;
475 int err; 533 int err;
476 534
477 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], 535 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
478 sizeof(struct i387_fxsave_struct)); 536 size);
479 /* mxcsr reserved bits must be masked to zero for security reasons */ 537 /* mxcsr reserved bits must be masked to zero for security reasons */
480 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; 538 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
481 if (err || __copy_from_user(&env, buf, sizeof(env))) 539 if (err || __copy_from_user(&env, buf, sizeof(env)))
@@ -485,14 +543,69 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
485 return 0; 543 return 0;
486} 544}
487 545
488int restore_i387_ia32(struct _fpstate_ia32 __user *buf) 546static int restore_i387_xsave(void __user *buf)
547{
548 struct _fpx_sw_bytes fx_sw_user;
549 struct _fpstate_ia32 __user *fx_user =
550 ((struct _fpstate_ia32 __user *) buf);
551 struct i387_fxsave_struct __user *fx =
552 (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
553 struct xsave_hdr_struct *xsave_hdr =
554 &current->thread.xstate->xsave.xsave_hdr;
555 u64 mask;
556 int err;
557
558 if (check_for_xstate(fx, buf, &fx_sw_user))
559 goto fx_only;
560
561 mask = fx_sw_user.xstate_bv;
562
563 err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
564
565 xsave_hdr->xstate_bv &= pcntxt_mask;
566 /*
567 * These bits must be zero.
568 */
569 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
570
571 /*
572 * Init the state that is not present in the memory layout
573 * and enabled by the OS.
574 */
575 mask = ~(pcntxt_mask & ~mask);
576 xsave_hdr->xstate_bv &= mask;
577
578 return err;
579fx_only:
580 /*
581 * Couldn't find the extended state information in the memory
582 * layout. Restore the FP/SSE and init the other extended state
583 * enabled by the OS.
584 */
585 xsave_hdr->xstate_bv = XSTATE_FPSSE;
586 return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
587}
588
589int restore_i387_xstate_ia32(void __user *buf)
489{ 590{
490 int err; 591 int err;
491 struct task_struct *tsk = current; 592 struct task_struct *tsk = current;
593 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
492 594
493 if (HAVE_HWFP) 595 if (HAVE_HWFP)
494 clear_fpu(tsk); 596 clear_fpu(tsk);
495 597
598 if (!buf) {
599 if (used_math()) {
600 clear_fpu(tsk);
601 clear_used_math();
602 }
603
604 return 0;
605 } else
606 if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
607 return -EACCES;
608
496 if (!used_math()) { 609 if (!used_math()) {
497 err = init_fpu(tsk); 610 err = init_fpu(tsk);
498 if (err) 611 if (err)
@@ -500,14 +613,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
500 } 613 }
501 614
502 if (HAVE_HWFP) { 615 if (HAVE_HWFP) {
503 if (cpu_has_fxsr) 616 if (cpu_has_xsave)
504 err = restore_i387_fxsave(buf); 617 err = restore_i387_xsave(buf);
618 else if (cpu_has_fxsr)
619 err = restore_i387_fxsave(fp, sizeof(struct
620 i387_fxsave_struct));
505 else 621 else
506 err = restore_i387_fsave(buf); 622 err = restore_i387_fsave(fp);
507 } else { 623 } else {
508 err = fpregs_soft_set(current, NULL, 624 err = fpregs_soft_set(current, NULL,
509 0, sizeof(struct user_i387_ia32_struct), 625 0, sizeof(struct user_i387_ia32_struct),
510 NULL, buf) != 0; 626 NULL, fp) != 0;
511 } 627 }
512 set_used_math(); 628 set_used_math();
513 629
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49d9204..4b8a53d841f7 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
282 282
283device_initcall(i8259A_init_sysfs); 283device_initcall(i8259A_init_sysfs);
284 284
285void mask_8259A(void)
286{
287 unsigned long flags;
288
289 spin_lock_irqsave(&i8259A_lock, flags);
290
291 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
292 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
293
294 spin_unlock_irqrestore(&i8259A_lock, flags);
295}
296
297void unmask_8259A(void)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&i8259A_lock, flags);
302
303 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
304 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
305
306 spin_unlock_irqrestore(&i8259A_lock, flags);
307}
308
285void init_8259A(int auto_eoi) 309void init_8259A(int auto_eoi)
286{ 310{
287 unsigned long flags; 311 unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 09cddb57bec4..e710289f673e 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -46,10 +46,13 @@
46#include <asm/nmi.h> 46#include <asm/nmi.h>
47#include <asm/msidef.h> 47#include <asm/msidef.h>
48#include <asm/hypertransport.h> 48#include <asm/hypertransport.h>
49#include <asm/setup.h>
49 50
50#include <mach_apic.h> 51#include <mach_apic.h>
51#include <mach_apicdef.h> 52#include <mach_apicdef.h>
52 53
54#define __apicdebuginit(type) static type __init
55
53int (*ioapic_renumber_irq)(int ioapic, int irq); 56int (*ioapic_renumber_irq)(int ioapic, int irq);
54atomic_t irq_mis_count; 57atomic_t irq_mis_count;
55 58
@@ -1341,7 +1344,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1341 ioapic_write_entry(apic, pin, entry); 1344 ioapic_write_entry(apic, pin, entry);
1342} 1345}
1343 1346
1344void __init print_IO_APIC(void) 1347
1348__apicdebuginit(void) print_IO_APIC(void)
1345{ 1349{
1346 int apic, i; 1350 int apic, i;
1347 union IO_APIC_reg_00 reg_00; 1351 union IO_APIC_reg_00 reg_00;
@@ -1456,9 +1460,7 @@ void __init print_IO_APIC(void)
1456 return; 1460 return;
1457} 1461}
1458 1462
1459#if 0 1463__apicdebuginit(void) print_APIC_bitfield(int base)
1460
1461static void print_APIC_bitfield(int base)
1462{ 1464{
1463 unsigned int v; 1465 unsigned int v;
1464 int i, j; 1466 int i, j;
@@ -1479,9 +1481,10 @@ static void print_APIC_bitfield(int base)
1479 } 1481 }
1480} 1482}
1481 1483
1482void /*__init*/ print_local_APIC(void *dummy) 1484__apicdebuginit(void) print_local_APIC(void *dummy)
1483{ 1485{
1484 unsigned int v, ver, maxlvt; 1486 unsigned int v, ver, maxlvt;
1487 u64 icr;
1485 1488
1486 if (apic_verbosity == APIC_QUIET) 1489 if (apic_verbosity == APIC_QUIET)
1487 return; 1490 return;
@@ -1490,7 +1493,7 @@ void /*__init*/ print_local_APIC(void *dummy)
1490 smp_processor_id(), hard_smp_processor_id()); 1493 smp_processor_id(), hard_smp_processor_id());
1491 v = apic_read(APIC_ID); 1494 v = apic_read(APIC_ID);
1492 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, 1495 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1493 GET_APIC_ID(read_apic_id())); 1496 GET_APIC_ID(v));
1494 v = apic_read(APIC_LVR); 1497 v = apic_read(APIC_LVR);
1495 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1498 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1496 ver = GET_APIC_VERSION(v); 1499 ver = GET_APIC_VERSION(v);
@@ -1532,10 +1535,9 @@ void /*__init*/ print_local_APIC(void *dummy)
1532 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1535 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1533 } 1536 }
1534 1537
1535 v = apic_read(APIC_ICR); 1538 icr = apic_icr_read();
1536 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1539 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1537 v = apic_read(APIC_ICR2); 1540 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1538 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1539 1541
1540 v = apic_read(APIC_LVTT); 1542 v = apic_read(APIC_LVTT);
1541 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1543 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1563,12 +1565,12 @@ void /*__init*/ print_local_APIC(void *dummy)
1563 printk("\n"); 1565 printk("\n");
1564} 1566}
1565 1567
1566void print_all_local_APICs(void) 1568__apicdebuginit(void) print_all_local_APICs(void)
1567{ 1569{
1568 on_each_cpu(print_local_APIC, NULL, 1); 1570 on_each_cpu(print_local_APIC, NULL, 1);
1569} 1571}
1570 1572
1571void /*__init*/ print_PIC(void) 1573__apicdebuginit(void) print_PIC(void)
1572{ 1574{
1573 unsigned int v; 1575 unsigned int v;
1574 unsigned long flags; 1576 unsigned long flags;
@@ -1600,7 +1602,17 @@ void /*__init*/ print_PIC(void)
1600 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 1602 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1601} 1603}
1602 1604
1603#endif /* 0 */ 1605__apicdebuginit(int) print_all_ICs(void)
1606{
1607 print_PIC();
1608 print_all_local_APICs();
1609 print_IO_APIC();
1610
1611 return 0;
1612}
1613
1614fs_initcall(print_all_ICs);
1615
1604 1616
1605static void __init enable_IO_APIC(void) 1617static void __init enable_IO_APIC(void)
1606{ 1618{
@@ -1698,8 +1710,7 @@ void disable_IO_APIC(void)
1698 entry.dest_mode = 0; /* Physical */ 1710 entry.dest_mode = 0; /* Physical */
1699 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1711 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1700 entry.vector = 0; 1712 entry.vector = 0;
1701 entry.dest.physical.physical_dest = 1713 entry.dest.physical.physical_dest = read_apic_id();
1702 GET_APIC_ID(read_apic_id());
1703 1714
1704 /* 1715 /*
1705 * Add it to the IO-APIC irq-routing table: 1716 * Add it to the IO-APIC irq-routing table:
@@ -1725,10 +1736,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
1725 unsigned char old_id; 1736 unsigned char old_id;
1726 unsigned long flags; 1737 unsigned long flags;
1727 1738
1728#ifdef CONFIG_X86_NUMAQ 1739 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1729 if (found_numaq)
1730 return; 1740 return;
1731#endif
1732 1741
1733 /* 1742 /*
1734 * Don't check I/O APIC IDs for xAPIC systems. They have 1743 * Don't check I/O APIC IDs for xAPIC systems. They have
@@ -2329,8 +2338,6 @@ void __init setup_IO_APIC(void)
2329 setup_IO_APIC_irqs(); 2338 setup_IO_APIC_irqs();
2330 init_IO_APIC_traps(); 2339 init_IO_APIC_traps();
2331 check_timer(); 2340 check_timer();
2332 if (!acpi_ioapic)
2333 print_IO_APIC();
2334} 2341}
2335 2342
2336/* 2343/*
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 61a83b70c18f..a1bec2969c6a 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
37#include <acpi/acpi_bus.h> 37#include <acpi/acpi_bus.h>
38#endif 38#endif
39#include <linux/bootmem.h> 39#include <linux/bootmem.h>
40#include <linux/dmar.h>
40 41
41#include <asm/idle.h> 42#include <asm/idle.h>
42#include <asm/io.h> 43#include <asm/io.h>
@@ -49,10 +50,13 @@
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50#include <asm/msidef.h> 51#include <asm/msidef.h>
51#include <asm/hypertransport.h> 52#include <asm/hypertransport.h>
53#include <asm/irq_remapping.h>
52 54
53#include <mach_ipi.h> 55#include <mach_ipi.h>
54#include <mach_apic.h> 56#include <mach_apic.h>
55 57
58#define __apicdebuginit(type) static type __init
59
56struct irq_cfg { 60struct irq_cfg {
57 cpumask_t domain; 61 cpumask_t domain;
58 cpumask_t old_domain; 62 cpumask_t old_domain;
@@ -87,8 +91,6 @@ int first_system_vector = 0xfe;
87 91
88char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; 92char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
89 93
90#define __apicdebuginit __init
91
92int sis_apic_bug; /* not actually supported, dummy for compile */ 94int sis_apic_bug; /* not actually supported, dummy for compile */
93 95
94static int no_timer_check; 96static int no_timer_check;
@@ -108,6 +110,9 @@ static DEFINE_SPINLOCK(vector_lock);
108 */ 110 */
109int nr_ioapic_registers[MAX_IO_APICS]; 111int nr_ioapic_registers[MAX_IO_APICS];
110 112
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
111/* I/O APIC entries */ 116/* I/O APIC entries */
112struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
113int nr_ioapics; 118int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
303 pin = entry->pin; 308 pin = entry->pin;
304 if (pin == -1) 309 if (pin == -1)
305 break; 310 break;
306 io_apic_write(apic, 0x11 + pin*2, dest); 311 /*
312 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry.
314 */
315 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest);
307 reg = io_apic_read(apic, 0x10 + pin*2); 317 reg = io_apic_read(apic, 0x10 + pin*2);
308 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 318 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
309 reg |= vector; 319 reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
440 clear_IO_APIC_pin(apic, pin); 450 clear_IO_APIC_pin(apic, pin);
441} 451}
442 452
453/*
454 * Saves and masks all the unmasked IO-APIC RTE's
455 */
456int save_mask_IO_APIC_setup(void)
457{
458 union IO_APIC_reg_01 reg_01;
459 unsigned long flags;
460 int apic, pin;
461
462 /*
463 * The number of IO-APIC IRQ registers (== #pins):
464 */
465 for (apic = 0; apic < nr_ioapics; apic++) {
466 spin_lock_irqsave(&ioapic_lock, flags);
467 reg_01.raw = io_apic_read(apic, 1);
468 spin_unlock_irqrestore(&ioapic_lock, flags);
469 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
470 }
471
472 for (apic = 0; apic < nr_ioapics; apic++) {
473 early_ioapic_entries[apic] =
474 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic])
477 return -ENOMEM;
478 }
479
480 for (apic = 0; apic < nr_ioapics; apic++)
481 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
482 struct IO_APIC_route_entry entry;
483
484 entry = early_ioapic_entries[apic][pin] =
485 ioapic_read_entry(apic, pin);
486 if (!entry.mask) {
487 entry.mask = 1;
488 ioapic_write_entry(apic, pin, entry);
489 }
490 }
491 return 0;
492}
493
494void restore_IO_APIC_setup(void)
495{
496 int apic, pin;
497
498 for (apic = 0; apic < nr_ioapics; apic++)
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]);
502}
503
504void reinit_intr_remapped_IO_APIC(int intr_remapping)
505{
506 /*
507 * for now plain restore of previous settings.
508 * TBD: In the case of OS enabling interrupt-remapping,
509 * IO-APIC RTE's need to be setup to point to interrupt-remapping
510 * table entries. for now, do a plain restore, and wait for
511 * the setup_IO_APIC_irqs() to do proper initialization.
512 */
513 restore_IO_APIC_setup();
514}
515
443int skip_ioapic_setup; 516int skip_ioapic_setup;
444int ioapic_force; 517int ioapic_force;
445 518
@@ -839,18 +912,98 @@ void __setup_vector_irq(int cpu)
839} 912}
840 913
841static struct irq_chip ioapic_chip; 914static struct irq_chip ioapic_chip;
915#ifdef CONFIG_INTR_REMAP
916static struct irq_chip ir_ioapic_chip;
917#endif
842 918
843static void ioapic_register_intr(int irq, unsigned long trigger) 919static void ioapic_register_intr(int irq, unsigned long trigger)
844{ 920{
845 if (trigger) { 921 if (trigger)
846 irq_desc[irq].status |= IRQ_LEVEL; 922 irq_desc[irq].status |= IRQ_LEVEL;
847 set_irq_chip_and_handler_name(irq, &ioapic_chip, 923 else
848 handle_fasteoi_irq, "fasteoi");
849 } else {
850 irq_desc[irq].status &= ~IRQ_LEVEL; 924 irq_desc[irq].status &= ~IRQ_LEVEL;
925
926#ifdef CONFIG_INTR_REMAP
927 if (irq_remapped(irq)) {
928 irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
929 if (trigger)
930 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
931 handle_fasteoi_irq,
932 "fasteoi");
933 else
934 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
935 handle_edge_irq, "edge");
936 return;
937 }
938#endif
939 if (trigger)
940 set_irq_chip_and_handler_name(irq, &ioapic_chip,
941 handle_fasteoi_irq,
942 "fasteoi");
943 else
851 set_irq_chip_and_handler_name(irq, &ioapic_chip, 944 set_irq_chip_and_handler_name(irq, &ioapic_chip,
852 handle_edge_irq, "edge"); 945 handle_edge_irq, "edge");
946}
947
948static int setup_ioapic_entry(int apic, int irq,
949 struct IO_APIC_route_entry *entry,
950 unsigned int destination, int trigger,
951 int polarity, int vector)
952{
953 /*
954 * add it to the IO-APIC irq-routing table:
955 */
956 memset(entry,0,sizeof(*entry));
957
958#ifdef CONFIG_INTR_REMAP
959 if (intr_remapping_enabled) {
960 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
961 struct irte irte;
962 struct IR_IO_APIC_route_entry *ir_entry =
963 (struct IR_IO_APIC_route_entry *) entry;
964 int index;
965
966 if (!iommu)
967 panic("No mapping iommu for ioapic %d\n", apic);
968
969 index = alloc_irte(iommu, irq, 1);
970 if (index < 0)
971 panic("Failed to allocate IRTE for ioapic %d\n", apic);
972
973 memset(&irte, 0, sizeof(irte));
974
975 irte.present = 1;
976 irte.dst_mode = INT_DEST_MODE;
977 irte.trigger_mode = trigger;
978 irte.dlvry_mode = INT_DELIVERY_MODE;
979 irte.vector = vector;
980 irte.dest_id = IRTE_DEST(destination);
981
982 modify_irte(irq, &irte);
983
984 ir_entry->index2 = (index >> 15) & 0x1;
985 ir_entry->zero = 0;
986 ir_entry->format = 1;
987 ir_entry->index = (index & 0x7fff);
988 } else
989#endif
990 {
991 entry->delivery_mode = INT_DELIVERY_MODE;
992 entry->dest_mode = INT_DEST_MODE;
993 entry->dest = destination;
853 } 994 }
995
996 entry->mask = 0; /* enable IRQ */
997 entry->trigger = trigger;
998 entry->polarity = polarity;
999 entry->vector = vector;
1000
1001 /* Mask level triggered irqs.
1002 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1003 */
1004 if (trigger)
1005 entry->mask = 1;
1006 return 0;
854} 1007}
855 1008
856static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1009static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -875,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
875 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1028 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
876 irq, trigger, polarity); 1029 irq, trigger, polarity);
877 1030
878 /*
879 * add it to the IO-APIC irq-routing table:
880 */
881 memset(&entry,0,sizeof(entry));
882
883 entry.delivery_mode = INT_DELIVERY_MODE;
884 entry.dest_mode = INT_DEST_MODE;
885 entry.dest = cpu_mask_to_apicid(mask);
886 entry.mask = 0; /* enable IRQ */
887 entry.trigger = trigger;
888 entry.polarity = polarity;
889 entry.vector = cfg->vector;
890 1031
891 /* Mask level triggered irqs. 1032 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
892 * Use IRQ_DELAYED_DISABLE for edge triggered irqs. 1033 cpu_mask_to_apicid(mask), trigger, polarity,
893 */ 1034 cfg->vector)) {
894 if (trigger) 1035 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
895 entry.mask = 1; 1036 mp_ioapics[apic].mp_apicid, pin);
1037 __clear_irq_vector(irq);
1038 return;
1039 }
896 1040
897 ioapic_register_intr(irq, trigger); 1041 ioapic_register_intr(irq, trigger);
898 if (irq < 16) 1042 if (irq < 16)
@@ -944,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
944{ 1088{
945 struct IO_APIC_route_entry entry; 1089 struct IO_APIC_route_entry entry;
946 1090
1091 if (intr_remapping_enabled)
1092 return;
1093
947 memset(&entry, 0, sizeof(entry)); 1094 memset(&entry, 0, sizeof(entry));
948 1095
949 /* 1096 /*
@@ -970,7 +1117,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
970 ioapic_write_entry(apic, pin, entry); 1117 ioapic_write_entry(apic, pin, entry);
971} 1118}
972 1119
973void __apicdebuginit print_IO_APIC(void) 1120
1121__apicdebuginit(void) print_IO_APIC(void)
974{ 1122{
975 int apic, i; 1123 int apic, i;
976 union IO_APIC_reg_00 reg_00; 1124 union IO_APIC_reg_00 reg_00;
@@ -1064,9 +1212,7 @@ void __apicdebuginit print_IO_APIC(void)
1064 return; 1212 return;
1065} 1213}
1066 1214
1067#if 0 1215__apicdebuginit(void) print_APIC_bitfield(int base)
1068
1069static __apicdebuginit void print_APIC_bitfield (int base)
1070{ 1216{
1071 unsigned int v; 1217 unsigned int v;
1072 int i, j; 1218 int i, j;
@@ -1087,9 +1233,10 @@ static __apicdebuginit void print_APIC_bitfield (int base)
1087 } 1233 }
1088} 1234}
1089 1235
1090void __apicdebuginit print_local_APIC(void * dummy) 1236__apicdebuginit(void) print_local_APIC(void *dummy)
1091{ 1237{
1092 unsigned int v, ver, maxlvt; 1238 unsigned int v, ver, maxlvt;
1239 unsigned long icr;
1093 1240
1094 if (apic_verbosity == APIC_QUIET) 1241 if (apic_verbosity == APIC_QUIET)
1095 return; 1242 return;
@@ -1097,7 +1244,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1097 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1244 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1098 smp_processor_id(), hard_smp_processor_id()); 1245 smp_processor_id(), hard_smp_processor_id());
1099 v = apic_read(APIC_ID); 1246 v = apic_read(APIC_ID);
1100 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1247 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1101 v = apic_read(APIC_LVR); 1248 v = apic_read(APIC_LVR);
1102 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1249 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1103 ver = GET_APIC_VERSION(v); 1250 ver = GET_APIC_VERSION(v);
@@ -1133,10 +1280,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
1133 v = apic_read(APIC_ESR); 1280 v = apic_read(APIC_ESR);
1134 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1281 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1135 1282
1136 v = apic_read(APIC_ICR); 1283 icr = apic_icr_read();
1137 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1284 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1138 v = apic_read(APIC_ICR2); 1285 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1139 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1140 1286
1141 v = apic_read(APIC_LVTT); 1287 v = apic_read(APIC_LVTT);
1142 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1288 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1164,12 +1310,12 @@ void __apicdebuginit print_local_APIC(void * dummy)
1164 printk("\n"); 1310 printk("\n");
1165} 1311}
1166 1312
1167void print_all_local_APICs (void) 1313__apicdebuginit(void) print_all_local_APICs(void)
1168{ 1314{
1169 on_each_cpu(print_local_APIC, NULL, 1); 1315 on_each_cpu(print_local_APIC, NULL, 1);
1170} 1316}
1171 1317
1172void __apicdebuginit print_PIC(void) 1318__apicdebuginit(void) print_PIC(void)
1173{ 1319{
1174 unsigned int v; 1320 unsigned int v;
1175 unsigned long flags; 1321 unsigned long flags;
@@ -1201,7 +1347,17 @@ void __apicdebuginit print_PIC(void)
1201 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 1347 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1202} 1348}
1203 1349
1204#endif /* 0 */ 1350__apicdebuginit(int) print_all_ICs(void)
1351{
1352 print_PIC();
1353 print_all_local_APICs();
1354 print_IO_APIC();
1355
1356 return 0;
1357}
1358
1359fs_initcall(print_all_ICs);
1360
1205 1361
1206void __init enable_IO_APIC(void) 1362void __init enable_IO_APIC(void)
1207{ 1363{
@@ -1291,7 +1447,7 @@ void disable_IO_APIC(void)
1291 entry.dest_mode = 0; /* Physical */ 1447 entry.dest_mode = 0; /* Physical */
1292 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1448 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1293 entry.vector = 0; 1449 entry.vector = 0;
1294 entry.dest = GET_APIC_ID(read_apic_id()); 1450 entry.dest = read_apic_id();
1295 1451
1296 /* 1452 /*
1297 * Add it to the IO-APIC irq-routing table: 1453 * Add it to the IO-APIC irq-routing table:
@@ -1397,6 +1553,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
1397 */ 1553 */
1398 1554
1399#ifdef CONFIG_SMP 1555#ifdef CONFIG_SMP
1556
1557#ifdef CONFIG_INTR_REMAP
1558static void ir_irq_migration(struct work_struct *work);
1559
1560static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1561
1562/*
1563 * Migrate the IO-APIC irq in the presence of intr-remapping.
1564 *
1565 * For edge triggered, irq migration is a simple atomic update(of vector
1566 * and cpu destination) of IRTE and flush the hardware cache.
1567 *
1568 * For level triggered, we need to modify the io-apic RTE aswell with the update
1569 * vector information, along with modifying IRTE with vector and destination.
1570 * So irq migration for level triggered is little bit more complex compared to
1571 * edge triggered migration. But the good news is, we use the same algorithm
1572 * for level triggered migration as we have today, only difference being,
1573 * we now initiate the irq migration from process context instead of the
1574 * interrupt context.
1575 *
1576 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1577 * suppression) to the IO-APIC, level triggered irq migration will also be
1578 * as simple as edge triggered migration and we can do the irq migration
1579 * with a simple atomic update to IO-APIC RTE.
1580 */
1581static void migrate_ioapic_irq(int irq, cpumask_t mask)
1582{
1583 struct irq_cfg *cfg = irq_cfg + irq;
1584 struct irq_desc *desc = irq_desc + irq;
1585 cpumask_t tmp, cleanup_mask;
1586 struct irte irte;
1587 int modify_ioapic_rte = desc->status & IRQ_LEVEL;
1588 unsigned int dest;
1589 unsigned long flags;
1590
1591 cpus_and(tmp, mask, cpu_online_map);
1592 if (cpus_empty(tmp))
1593 return;
1594
1595 if (get_irte(irq, &irte))
1596 return;
1597
1598 if (assign_irq_vector(irq, mask))
1599 return;
1600
1601 cpus_and(tmp, cfg->domain, mask);
1602 dest = cpu_mask_to_apicid(tmp);
1603
1604 if (modify_ioapic_rte) {
1605 spin_lock_irqsave(&ioapic_lock, flags);
1606 __target_IO_APIC_irq(irq, dest, cfg->vector);
1607 spin_unlock_irqrestore(&ioapic_lock, flags);
1608 }
1609
1610 irte.vector = cfg->vector;
1611 irte.dest_id = IRTE_DEST(dest);
1612
1613 /*
1614 * Modified the IRTE and flushes the Interrupt entry cache.
1615 */
1616 modify_irte(irq, &irte);
1617
1618 if (cfg->move_in_progress) {
1619 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1620 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1621 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1622 cfg->move_in_progress = 0;
1623 }
1624
1625 irq_desc[irq].affinity = mask;
1626}
1627
1628static int migrate_irq_remapped_level(int irq)
1629{
1630 int ret = -1;
1631
1632 mask_IO_APIC_irq(irq);
1633
1634 if (io_apic_level_ack_pending(irq)) {
1635 /*
1636 * Interrupt in progress. Migrating irq now will change the
1637 * vector information in the IO-APIC RTE and that will confuse
1638 * the EOI broadcast performed by cpu.
1639 * So, delay the irq migration to the next instance.
1640 */
1641 schedule_delayed_work(&ir_migration_work, 1);
1642 goto unmask;
1643 }
1644
1645 /* everthing is clear. we have right of way */
1646 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
1647
1648 ret = 0;
1649 irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
1650 cpus_clear(irq_desc[irq].pending_mask);
1651
1652unmask:
1653 unmask_IO_APIC_irq(irq);
1654 return ret;
1655}
1656
1657static void ir_irq_migration(struct work_struct *work)
1658{
1659 int irq;
1660
1661 for (irq = 0; irq < NR_IRQS; irq++) {
1662 struct irq_desc *desc = irq_desc + irq;
1663 if (desc->status & IRQ_MOVE_PENDING) {
1664 unsigned long flags;
1665
1666 spin_lock_irqsave(&desc->lock, flags);
1667 if (!desc->chip->set_affinity ||
1668 !(desc->status & IRQ_MOVE_PENDING)) {
1669 desc->status &= ~IRQ_MOVE_PENDING;
1670 spin_unlock_irqrestore(&desc->lock, flags);
1671 continue;
1672 }
1673
1674 desc->chip->set_affinity(irq,
1675 irq_desc[irq].pending_mask);
1676 spin_unlock_irqrestore(&desc->lock, flags);
1677 }
1678 }
1679}
1680
1681/*
1682 * Migrates the IRQ destination in the process context.
1683 */
1684static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1685{
1686 if (irq_desc[irq].status & IRQ_LEVEL) {
1687 irq_desc[irq].status |= IRQ_MOVE_PENDING;
1688 irq_desc[irq].pending_mask = mask;
1689 migrate_irq_remapped_level(irq);
1690 return;
1691 }
1692
1693 migrate_ioapic_irq(irq, mask);
1694}
1695#endif
1696
1400asmlinkage void smp_irq_move_cleanup_interrupt(void) 1697asmlinkage void smp_irq_move_cleanup_interrupt(void)
1401{ 1698{
1402 unsigned vector, me; 1699 unsigned vector, me;
@@ -1453,6 +1750,17 @@ static void irq_complete_move(unsigned int irq)
1453#else 1750#else
1454static inline void irq_complete_move(unsigned int irq) {} 1751static inline void irq_complete_move(unsigned int irq) {}
1455#endif 1752#endif
1753#ifdef CONFIG_INTR_REMAP
1754static void ack_x2apic_level(unsigned int irq)
1755{
1756 ack_x2APIC_irq();
1757}
1758
1759static void ack_x2apic_edge(unsigned int irq)
1760{
1761 ack_x2APIC_irq();
1762}
1763#endif
1456 1764
1457static void ack_apic_edge(unsigned int irq) 1765static void ack_apic_edge(unsigned int irq)
1458{ 1766{
@@ -1527,6 +1835,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
1527 .retrigger = ioapic_retrigger_irq, 1835 .retrigger = ioapic_retrigger_irq,
1528}; 1836};
1529 1837
1838#ifdef CONFIG_INTR_REMAP
1839static struct irq_chip ir_ioapic_chip __read_mostly = {
1840 .name = "IR-IO-APIC",
1841 .startup = startup_ioapic_irq,
1842 .mask = mask_IO_APIC_irq,
1843 .unmask = unmask_IO_APIC_irq,
1844 .ack = ack_x2apic_edge,
1845 .eoi = ack_x2apic_level,
1846#ifdef CONFIG_SMP
1847 .set_affinity = set_ir_ioapic_affinity_irq,
1848#endif
1849 .retrigger = ioapic_retrigger_irq,
1850};
1851#endif
1852
1530static inline void init_IO_APIC_traps(void) 1853static inline void init_IO_APIC_traps(void)
1531{ 1854{
1532 int irq; 1855 int irq;
@@ -1712,6 +2035,8 @@ static inline void __init check_timer(void)
1712 * 8259A. 2035 * 8259A.
1713 */ 2036 */
1714 if (pin1 == -1) { 2037 if (pin1 == -1) {
2038 if (intr_remapping_enabled)
2039 panic("BIOS bug: timer not connected to IO-APIC");
1715 pin1 = pin2; 2040 pin1 = pin2;
1716 apic1 = apic2; 2041 apic1 = apic2;
1717 no_pin1 = 1; 2042 no_pin1 = 1;
@@ -1738,6 +2063,8 @@ static inline void __init check_timer(void)
1738 clear_IO_APIC_pin(0, pin1); 2063 clear_IO_APIC_pin(0, pin1);
1739 goto out; 2064 goto out;
1740 } 2065 }
2066 if (intr_remapping_enabled)
2067 panic("timer doesn't work through Interrupt-remapped IO-APIC");
1741 clear_IO_APIC_pin(apic1, pin1); 2068 clear_IO_APIC_pin(apic1, pin1);
1742 if (!no_pin1) 2069 if (!no_pin1)
1743 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2070 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1854,8 +2181,6 @@ void __init setup_IO_APIC(void)
1854 setup_IO_APIC_irqs(); 2181 setup_IO_APIC_irqs();
1855 init_IO_APIC_traps(); 2182 init_IO_APIC_traps();
1856 check_timer(); 2183 check_timer();
1857 if (!acpi_ioapic)
1858 print_IO_APIC();
1859} 2184}
1860 2185
1861struct sysfs_ioapic_data { 2186struct sysfs_ioapic_data {
@@ -1977,6 +2302,9 @@ void destroy_irq(unsigned int irq)
1977 2302
1978 dynamic_irq_cleanup(irq); 2303 dynamic_irq_cleanup(irq);
1979 2304
2305#ifdef CONFIG_INTR_REMAP
2306 free_irte(irq);
2307#endif
1980 spin_lock_irqsave(&vector_lock, flags); 2308 spin_lock_irqsave(&vector_lock, flags);
1981 __clear_irq_vector(irq); 2309 __clear_irq_vector(irq);
1982 spin_unlock_irqrestore(&vector_lock, flags); 2310 spin_unlock_irqrestore(&vector_lock, flags);
@@ -1995,11 +2323,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1995 2323
1996 tmp = TARGET_CPUS; 2324 tmp = TARGET_CPUS;
1997 err = assign_irq_vector(irq, tmp); 2325 err = assign_irq_vector(irq, tmp);
1998 if (!err) { 2326 if (err)
1999 cpus_and(tmp, cfg->domain, tmp); 2327 return err;
2000 dest = cpu_mask_to_apicid(tmp); 2328
2329 cpus_and(tmp, cfg->domain, tmp);
2330 dest = cpu_mask_to_apicid(tmp);
2331
2332#ifdef CONFIG_INTR_REMAP
2333 if (irq_remapped(irq)) {
2334 struct irte irte;
2335 int ir_index;
2336 u16 sub_handle;
2337
2338 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2339 BUG_ON(ir_index == -1);
2340
2341 memset (&irte, 0, sizeof(irte));
2342
2343 irte.present = 1;
2344 irte.dst_mode = INT_DEST_MODE;
2345 irte.trigger_mode = 0; /* edge */
2346 irte.dlvry_mode = INT_DELIVERY_MODE;
2347 irte.vector = cfg->vector;
2348 irte.dest_id = IRTE_DEST(dest);
2349
2350 modify_irte(irq, &irte);
2001 2351
2002 msg->address_hi = MSI_ADDR_BASE_HI; 2352 msg->address_hi = MSI_ADDR_BASE_HI;
2353 msg->data = sub_handle;
2354 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2355 MSI_ADDR_IR_SHV |
2356 MSI_ADDR_IR_INDEX1(ir_index) |
2357 MSI_ADDR_IR_INDEX2(ir_index);
2358 } else
2359#endif
2360 {
2361 msg->address_hi = MSI_ADDR_BASE_HI;
2003 msg->address_lo = 2362 msg->address_lo =
2004 MSI_ADDR_BASE_LO | 2363 MSI_ADDR_BASE_LO |
2005 ((INT_DEST_MODE == 0) ? 2364 ((INT_DEST_MODE == 0) ?
@@ -2049,6 +2408,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2049 write_msi_msg(irq, &msg); 2408 write_msi_msg(irq, &msg);
2050 irq_desc[irq].affinity = mask; 2409 irq_desc[irq].affinity = mask;
2051} 2410}
2411
2412#ifdef CONFIG_INTR_REMAP
2413/*
2414 * Migrate the MSI irq to another cpumask. This migration is
2415 * done in the process context using interrupt-remapping hardware.
2416 */
2417static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2418{
2419 struct irq_cfg *cfg = irq_cfg + irq;
2420 unsigned int dest;
2421 cpumask_t tmp, cleanup_mask;
2422 struct irte irte;
2423
2424 cpus_and(tmp, mask, cpu_online_map);
2425 if (cpus_empty(tmp))
2426 return;
2427
2428 if (get_irte(irq, &irte))
2429 return;
2430
2431 if (assign_irq_vector(irq, mask))
2432 return;
2433
2434 cpus_and(tmp, cfg->domain, mask);
2435 dest = cpu_mask_to_apicid(tmp);
2436
2437 irte.vector = cfg->vector;
2438 irte.dest_id = IRTE_DEST(dest);
2439
2440 /*
2441 * atomically update the IRTE with the new destination and vector.
2442 */
2443 modify_irte(irq, &irte);
2444
2445 /*
2446 * After this point, all the interrupts will start arriving
2447 * at the new destination. So, time to cleanup the previous
2448 * vector allocation.
2449 */
2450 if (cfg->move_in_progress) {
2451 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2452 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2453 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2454 cfg->move_in_progress = 0;
2455 }
2456
2457 irq_desc[irq].affinity = mask;
2458}
2459#endif
2052#endif /* CONFIG_SMP */ 2460#endif /* CONFIG_SMP */
2053 2461
2054/* 2462/*
@@ -2066,26 +2474,157 @@ static struct irq_chip msi_chip = {
2066 .retrigger = ioapic_retrigger_irq, 2474 .retrigger = ioapic_retrigger_irq,
2067}; 2475};
2068 2476
2069int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 2477#ifdef CONFIG_INTR_REMAP
2478static struct irq_chip msi_ir_chip = {
2479 .name = "IR-PCI-MSI",
2480 .unmask = unmask_msi_irq,
2481 .mask = mask_msi_irq,
2482 .ack = ack_x2apic_edge,
2483#ifdef CONFIG_SMP
2484 .set_affinity = ir_set_msi_irq_affinity,
2485#endif
2486 .retrigger = ioapic_retrigger_irq,
2487};
2488
2489/*
2490 * Map the PCI dev to the corresponding remapping hardware unit
2491 * and allocate 'nvec' consecutive interrupt-remapping table entries
2492 * in it.
2493 */
2494static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2070{ 2495{
2496 struct intel_iommu *iommu;
2497 int index;
2498
2499 iommu = map_dev_to_ir(dev);
2500 if (!iommu) {
2501 printk(KERN_ERR
2502 "Unable to map PCI %s to iommu\n", pci_name(dev));
2503 return -ENOENT;
2504 }
2505
2506 index = alloc_irte(iommu, irq, nvec);
2507 if (index < 0) {
2508 printk(KERN_ERR
2509 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2510 pci_name(dev));
2511 return -ENOSPC;
2512 }
2513 return index;
2514}
2515#endif
2516
2517static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2518{
2519 int ret;
2071 struct msi_msg msg; 2520 struct msi_msg msg;
2521
2522 ret = msi_compose_msg(dev, irq, &msg);
2523 if (ret < 0)
2524 return ret;
2525
2526 set_irq_msi(irq, desc);
2527 write_msi_msg(irq, &msg);
2528
2529#ifdef CONFIG_INTR_REMAP
2530 if (irq_remapped(irq)) {
2531 struct irq_desc *desc = irq_desc + irq;
2532 /*
2533 * irq migration in process context
2534 */
2535 desc->status |= IRQ_MOVE_PCNTXT;
2536 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2537 } else
2538#endif
2539 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2540
2541 return 0;
2542}
2543
2544int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2545{
2072 int irq, ret; 2546 int irq, ret;
2547
2073 irq = create_irq(); 2548 irq = create_irq();
2074 if (irq < 0) 2549 if (irq < 0)
2075 return irq; 2550 return irq;
2076 2551
2077 ret = msi_compose_msg(dev, irq, &msg); 2552#ifdef CONFIG_INTR_REMAP
2553 if (!intr_remapping_enabled)
2554 goto no_ir;
2555
2556 ret = msi_alloc_irte(dev, irq, 1);
2557 if (ret < 0)
2558 goto error;
2559no_ir:
2560#endif
2561 ret = setup_msi_irq(dev, desc, irq);
2078 if (ret < 0) { 2562 if (ret < 0) {
2079 destroy_irq(irq); 2563 destroy_irq(irq);
2080 return ret; 2564 return ret;
2081 } 2565 }
2566 return 0;
2082 2567
2083 set_irq_msi(irq, desc); 2568#ifdef CONFIG_INTR_REMAP
2084 write_msi_msg(irq, &msg); 2569error:
2570 destroy_irq(irq);
2571 return ret;
2572#endif
2573}
2085 2574
2086 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 2575int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2576{
2577 int irq, ret, sub_handle;
2578 struct msi_desc *desc;
2579#ifdef CONFIG_INTR_REMAP
2580 struct intel_iommu *iommu = 0;
2581 int index = 0;
2582#endif
2583
2584 sub_handle = 0;
2585 list_for_each_entry(desc, &dev->msi_list, list) {
2586 irq = create_irq();
2587 if (irq < 0)
2588 return irq;
2589#ifdef CONFIG_INTR_REMAP
2590 if (!intr_remapping_enabled)
2591 goto no_ir;
2087 2592
2593 if (!sub_handle) {
2594 /*
2595 * allocate the consecutive block of IRTE's
2596 * for 'nvec'
2597 */
2598 index = msi_alloc_irte(dev, irq, nvec);
2599 if (index < 0) {
2600 ret = index;
2601 goto error;
2602 }
2603 } else {
2604 iommu = map_dev_to_ir(dev);
2605 if (!iommu) {
2606 ret = -ENOENT;
2607 goto error;
2608 }
2609 /*
2610 * setup the mapping between the irq and the IRTE
2611 * base index, the sub_handle pointing to the
2612 * appropriate interrupt remap table entry.
2613 */
2614 set_irte_irq(irq, iommu, index, sub_handle);
2615 }
2616no_ir:
2617#endif
2618 ret = setup_msi_irq(dev, desc, irq);
2619 if (ret < 0)
2620 goto error;
2621 sub_handle++;
2622 }
2088 return 0; 2623 return 0;
2624
2625error:
2626 destroy_irq(irq);
2627 return ret;
2089} 2628}
2090 2629
2091void arch_teardown_msi_irq(unsigned int irq) 2630void arch_teardown_msi_irq(unsigned int irq)
@@ -2333,6 +2872,10 @@ void __init setup_ioapic_dest(void)
2333 setup_IO_APIC_irq(ioapic, pin, irq, 2872 setup_IO_APIC_irq(ioapic, pin, irq,
2334 irq_trigger(irq_entry), 2873 irq_trigger(irq_entry),
2335 irq_polarity(irq_entry)); 2874 irq_polarity(irq_entry));
2875#ifdef CONFIG_INTR_REMAP
2876 else if (intr_remapping_enabled)
2877 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
2878#endif
2336 else 2879 else
2337 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2880 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2338 } 2881 }
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index d66914287ee1..9200a1e2752d 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -74,6 +74,15 @@ void __init init_ISA_irqs (void)
74 } 74 }
75} 75}
76 76
77/*
78 * IRQ2 is cascade interrupt to second interrupt controller
79 */
80static struct irqaction irq2 = {
81 .handler = no_action,
82 .mask = CPU_MASK_NONE,
83 .name = "cascade",
84};
85
77/* Overridden in paravirt.c */ 86/* Overridden in paravirt.c */
78void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 87void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
79 88
@@ -98,6 +107,46 @@ void __init native_init_IRQ(void)
98 set_intr_gate(vector, interrupt[i]); 107 set_intr_gate(vector, interrupt[i]);
99 } 108 }
100 109
110#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
111 /*
112 * IRQ0 must be given a fixed assignment and initialized,
113 * because it's used before the IO-APIC is set up.
114 */
115 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
116
117 /*
118 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
119 * IPI, driven by wakeup.
120 */
121 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
122
123 /* IPI for invalidation */
124 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
125
126 /* IPI for generic function call */
127 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
128
129 /* IPI for single call function */
130 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
131#endif
132
133#ifdef CONFIG_X86_LOCAL_APIC
134 /* self generated IPI for local APIC timer */
135 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
136
137 /* IPI vectors for APIC spurious and error interrupts */
138 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
139 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
140#endif
141
142#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
143 /* thermal monitor LVT interrupt */
144 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
145#endif
146
147 if (!acpi_ioapic)
148 setup_irq(2, &irq2);
149
101 /* setup after call gates are initialised (usually add in 150 /* setup after call gates are initialised (usually add in
102 * the architecture specific gates) 151 * the architecture specific gates)
103 */ 152 */
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b3fb430725cb..f98f4e1dba09 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
397 generic_bigsmp_probe(); 397 generic_bigsmp_probe();
398#endif 398#endif
399 399
400#ifdef CONFIG_X86_32
400 setup_apic_routing(); 401 setup_apic_routing();
402#endif
401 if (!num_processors) 403 if (!num_processors)
402 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 404 printk(KERN_ERR "MPTABLE: no processors registered!\n");
403 return num_processors; 405 return num_processors;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index eecc8c18f010..4caff39078e0 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
229 } 229 }
230} 230}
231 231
232static int __init numaq_setup_ioapic_ids(void)
233{
234 /* so can skip it */
235 return 1;
236}
237
232static struct x86_quirks numaq_x86_quirks __initdata = { 238static struct x86_quirks numaq_x86_quirks __initdata = {
233 .arch_pre_time_init = numaq_pre_time_init, 239 .arch_pre_time_init = numaq_pre_time_init,
234 .arch_time_init = NULL, 240 .arch_time_init = NULL,
@@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
243 .mpc_oem_bus_info = mpc_oem_bus_info, 249 .mpc_oem_bus_info = mpc_oem_bus_info,
244 .mpc_oem_pci_bus = mpc_oem_pci_bus, 250 .mpc_oem_pci_bus = mpc_oem_pci_bus,
245 .smp_read_mpc_oem = smp_read_mpc_oem, 251 .smp_read_mpc_oem = smp_read_mpc_oem,
252 .setup_ioapic_ids = numaq_setup_ioapic_ids,
246}; 253};
247 254
248void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, 255void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e2f43768723a..6b0bb73998dd 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -374,8 +374,6 @@ struct pv_cpu_ops pv_cpu_ops = {
374 374
375struct pv_apic_ops pv_apic_ops = { 375struct pv_apic_ops pv_apic_ops = {
376#ifdef CONFIG_X86_LOCAL_APIC 376#ifdef CONFIG_X86_LOCAL_APIC
377 .apic_write = native_apic_write,
378 .apic_read = native_apic_read,
379 .setup_boot_clock = setup_boot_APIC_clock, 377 .setup_boot_clock = setup_boot_APIC_clock,
380 .setup_secondary_clock = setup_secondary_APIC_clock, 378 .setup_secondary_clock = setup_secondary_APIC_clock,
381 .startup_ipi_hook = paravirt_nop, 379 .startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ec7a2ba9bce8..c622772744d8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -15,7 +15,6 @@ unsigned long idle_nomwait;
15EXPORT_SYMBOL(idle_nomwait); 15EXPORT_SYMBOL(idle_nomwait);
16 16
17struct kmem_cache *task_xstate_cachep; 17struct kmem_cache *task_xstate_cachep;
18static int force_mwait __cpuinitdata;
19 18
20int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 19int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
21{ 20{
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 141efab52400..46c98efbbf8d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -758,6 +758,8 @@ void __init setup_arch(char **cmdline_p)
758#else 758#else
759 num_physpages = max_pfn; 759 num_physpages = max_pfn;
760 760
761 if (cpu_has_x2apic)
762 check_x2apic();
761 763
762 /* How many end-of-memory variables you have, grandma! */ 764 /* How many end-of-memory variables you have, grandma! */
763 /* need this before calling reserve_initrd */ 765 /* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 8b4956e800ac..cc673aa55ce4 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -3,9 +3,18 @@ struct sigframe {
3 char __user *pretcode; 3 char __user *pretcode;
4 int sig; 4 int sig;
5 struct sigcontext sc; 5 struct sigcontext sc;
6 struct _fpstate fpstate; 6 /*
7 * fpstate is unused. fpstate is moved/allocated after
8 * retcode[] below. This movement allows to have the FP state and the
9 * future state extensions (xsave) stay together.
10 * And at the same time retaining the unused fpstate, prevents changing
11 * the offset of extramask[] in the sigframe and thus prevent any
12 * legacy application accessing/modifying it.
13 */
14 struct _fpstate fpstate_unused;
7 unsigned long extramask[_NSIG_WORDS-1]; 15 unsigned long extramask[_NSIG_WORDS-1];
8 char retcode[8]; 16 char retcode[8];
17 /* fp state follows here */
9}; 18};
10 19
11struct rt_sigframe { 20struct rt_sigframe {
@@ -15,14 +24,15 @@ struct rt_sigframe {
15 void __user *puc; 24 void __user *puc;
16 struct siginfo info; 25 struct siginfo info;
17 struct ucontext uc; 26 struct ucontext uc;
18 struct _fpstate fpstate;
19 char retcode[8]; 27 char retcode[8];
28 /* fp state follows here */
20}; 29};
21#else 30#else
22struct rt_sigframe { 31struct rt_sigframe {
23 char __user *pretcode; 32 char __user *pretcode;
24 struct ucontext uc; 33 struct ucontext uc;
25 struct siginfo info; 34 struct siginfo info;
35 /* fp state follows here */
26}; 36};
27 37
28int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 38int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 2a2435d3037d..b21070ea33a4 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -161,28 +161,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
161 } 161 }
162 162
163 { 163 {
164 struct _fpstate __user *buf; 164 void __user *buf;
165 165
166 err |= __get_user(buf, &sc->fpstate); 166 err |= __get_user(buf, &sc->fpstate);
167 if (buf) { 167 err |= restore_i387_xstate(buf);
168 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
169 goto badframe;
170 err |= restore_i387(buf);
171 } else {
172 struct task_struct *me = current;
173
174 if (used_math()) {
175 clear_fpu(me);
176 clear_used_math();
177 }
178 }
179 } 168 }
180 169
181 err |= __get_user(*pax, &sc->ax); 170 err |= __get_user(*pax, &sc->ax);
182 return err; 171 return err;
183
184badframe:
185 return 1;
186} 172}
187 173
188asmlinkage unsigned long sys_sigreturn(unsigned long __unused) 174asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
@@ -264,7 +250,7 @@ badframe:
264 * Set up a signal frame. 250 * Set up a signal frame.
265 */ 251 */
266static int 252static int
267setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, 253setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
268 struct pt_regs *regs, unsigned long mask) 254 struct pt_regs *regs, unsigned long mask)
269{ 255{
270 int tmp, err = 0; 256 int tmp, err = 0;
@@ -291,7 +277,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
291 err |= __put_user(regs->sp, &sc->sp_at_signal); 277 err |= __put_user(regs->sp, &sc->sp_at_signal);
292 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); 278 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
293 279
294 tmp = save_i387(fpstate); 280 tmp = save_i387_xstate(fpstate);
295 if (tmp < 0) 281 if (tmp < 0)
296 err = 1; 282 err = 1;
297 else 283 else
@@ -308,7 +294,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
308 * Determine which stack to use.. 294 * Determine which stack to use..
309 */ 295 */
310static inline void __user * 296static inline void __user *
311get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) 297get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
298 void **fpstate)
312{ 299{
313 unsigned long sp; 300 unsigned long sp;
314 301
@@ -334,6 +321,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
334 sp = (unsigned long) ka->sa.sa_restorer; 321 sp = (unsigned long) ka->sa.sa_restorer;
335 } 322 }
336 323
324 if (used_math()) {
325 sp = sp - sig_xstate_size;
326 *fpstate = (struct _fpstate *) sp;
327 }
328
337 sp -= frame_size; 329 sp -= frame_size;
338 /* 330 /*
339 * Align the stack pointer according to the i386 ABI, 331 * Align the stack pointer according to the i386 ABI,
@@ -352,8 +344,9 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
352 void __user *restorer; 344 void __user *restorer;
353 int err = 0; 345 int err = 0;
354 int usig; 346 int usig;
347 void __user *fpstate = NULL;
355 348
356 frame = get_sigframe(ka, regs, sizeof(*frame)); 349 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
357 350
358 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 351 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
359 goto give_sigsegv; 352 goto give_sigsegv;
@@ -368,7 +361,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
368 if (err) 361 if (err)
369 goto give_sigsegv; 362 goto give_sigsegv;
370 363
371 err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); 364 err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
372 if (err) 365 if (err)
373 goto give_sigsegv; 366 goto give_sigsegv;
374 367
@@ -429,8 +422,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
429 void __user *restorer; 422 void __user *restorer;
430 int err = 0; 423 int err = 0;
431 int usig; 424 int usig;
425 void __user *fpstate = NULL;
432 426
433 frame = get_sigframe(ka, regs, sizeof(*frame)); 427 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
434 428
435 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 429 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
436 goto give_sigsegv; 430 goto give_sigsegv;
@@ -449,13 +443,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
449 goto give_sigsegv; 443 goto give_sigsegv;
450 444
451 /* Create the ucontext. */ 445 /* Create the ucontext. */
452 err |= __put_user(0, &frame->uc.uc_flags); 446 if (cpu_has_xsave)
447 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
448 else
449 err |= __put_user(0, &frame->uc.uc_flags);
453 err |= __put_user(0, &frame->uc.uc_link); 450 err |= __put_user(0, &frame->uc.uc_link);
454 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 451 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
455 err |= __put_user(sas_ss_flags(regs->sp), 452 err |= __put_user(sas_ss_flags(regs->sp),
456 &frame->uc.uc_stack.ss_flags); 453 &frame->uc.uc_stack.ss_flags);
457 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); 454 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
458 err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, 455 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
459 regs, set->sig[0]); 456 regs, set->sig[0]);
460 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 457 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
461 if (err) 458 if (err)
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 694aa888bb19..823a55bf8c39 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -53,69 +53,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
53} 53}
54 54
55/* 55/*
56 * Signal frame handlers.
57 */
58
59static inline int save_i387(struct _fpstate __user *buf)
60{
61 struct task_struct *tsk = current;
62 int err = 0;
63
64 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
65 sizeof(tsk->thread.xstate->fxsave));
66
67 if ((unsigned long)buf % 16)
68 printk("save_i387: bad fpstate %p\n", buf);
69
70 if (!used_math())
71 return 0;
72 clear_used_math(); /* trigger finit */
73 if (task_thread_info(tsk)->status & TS_USEDFPU) {
74 err = save_i387_checking((struct i387_fxsave_struct __user *)
75 buf);
76 if (err)
77 return err;
78 task_thread_info(tsk)->status &= ~TS_USEDFPU;
79 stts();
80 } else {
81 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
82 sizeof(struct i387_fxsave_struct)))
83 return -1;
84 }
85 return 1;
86}
87
88/*
89 * This restores directly out of user space. Exceptions are handled.
90 */
91static inline int restore_i387(struct _fpstate __user *buf)
92{
93 struct task_struct *tsk = current;
94 int err;
95
96 if (!used_math()) {
97 err = init_fpu(tsk);
98 if (err)
99 return err;
100 }
101
102 if (!(task_thread_info(current)->status & TS_USEDFPU)) {
103 clts();
104 task_thread_info(current)->status |= TS_USEDFPU;
105 }
106 err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
107 if (unlikely(err)) {
108 /*
109 * Encountered an error while doing the restore from the
110 * user buffer, clear the fpu state.
111 */
112 clear_fpu(tsk);
113 clear_used_math();
114 }
115 return err;
116}
117
118/*
119 * Do a signal return; undo the signal stack. 56 * Do a signal return; undo the signal stack.
120 */ 57 */
121static int 58static int
@@ -159,25 +96,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
159 { 96 {
160 struct _fpstate __user *buf; 97 struct _fpstate __user *buf;
161 err |= __get_user(buf, &sc->fpstate); 98 err |= __get_user(buf, &sc->fpstate);
162 99 err |= restore_i387_xstate(buf);
163 if (buf) {
164 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
165 goto badframe;
166 err |= restore_i387(buf);
167 } else {
168 struct task_struct *me = current;
169 if (used_math()) {
170 clear_fpu(me);
171 clear_used_math();
172 }
173 }
174 } 100 }
175 101
176 err |= __get_user(*pax, &sc->ax); 102 err |= __get_user(*pax, &sc->ax);
177 return err; 103 return err;
178
179badframe:
180 return 1;
181} 104}
182 105
183asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) 106asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
@@ -269,26 +192,23 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
269 sp = current->sas_ss_sp + current->sas_ss_size; 192 sp = current->sas_ss_sp + current->sas_ss_size;
270 } 193 }
271 194
272 return (void __user *)round_down(sp - size, 16); 195 return (void __user *)round_down(sp - size, 64);
273} 196}
274 197
275static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 198static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
276 sigset_t *set, struct pt_regs *regs) 199 sigset_t *set, struct pt_regs *regs)
277{ 200{
278 struct rt_sigframe __user *frame; 201 struct rt_sigframe __user *frame;
279 struct _fpstate __user *fp = NULL; 202 void __user *fp = NULL;
280 int err = 0; 203 int err = 0;
281 struct task_struct *me = current; 204 struct task_struct *me = current;
282 205
283 if (used_math()) { 206 if (used_math()) {
284 fp = get_stack(ka, regs, sizeof(struct _fpstate)); 207 fp = get_stack(ka, regs, sig_xstate_size);
285 frame = (void __user *)round_down( 208 frame = (void __user *)round_down(
286 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; 209 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
287 210
288 if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) 211 if (save_i387_xstate(fp) < 0)
289 goto give_sigsegv;
290
291 if (save_i387(fp) < 0)
292 err |= -1; 212 err |= -1;
293 } else 213 } else
294 frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; 214 frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
@@ -303,7 +223,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
303 } 223 }
304 224
305 /* Create the ucontext. */ 225 /* Create the ucontext. */
306 err |= __put_user(0, &frame->uc.uc_flags); 226 if (cpu_has_xsave)
227 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
228 else
229 err |= __put_user(0, &frame->uc.uc_flags);
307 err |= __put_user(0, &frame->uc.uc_link); 230 err |= __put_user(0, &frame->uc.uc_link);
308 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 231 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
309 err |= __put_user(sas_ss_flags(regs->sp), 232 err |= __put_user(sas_ss_flags(regs->sp),
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 4e7ccb0e2a9b..9056f7e272c0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
123 123
124static atomic_t init_deasserted; 124static atomic_t init_deasserted;
125 125
126static int boot_cpu_logical_apicid;
127 126
128/* representing cpus for which sibling maps can be computed */ 127/* representing cpus for which sibling maps can be computed */
129static cpumask_t cpu_sibling_setup_map; 128static cpumask_t cpu_sibling_setup_map;
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
165#endif 164#endif
166 165
167#ifdef CONFIG_X86_32 166#ifdef CONFIG_X86_32
167static int boot_cpu_logical_apicid;
168
168u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = 169u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
169 { [0 ... NR_CPUS-1] = BAD_APICID }; 170 { [0 ... NR_CPUS-1] = BAD_APICID };
170 171
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
210 /* 211 /*
211 * (This works even if the APIC is not enabled.) 212 * (This works even if the APIC is not enabled.)
212 */ 213 */
213 phys_id = GET_APIC_ID(read_apic_id()); 214 phys_id = read_apic_id();
214 cpuid = smp_processor_id(); 215 cpuid = smp_processor_id();
215 if (cpu_isset(cpuid, cpu_callin_map)) { 216 if (cpu_isset(cpuid, cpu_callin_map)) {
216 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 217 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -551,8 +552,7 @@ static inline void __inquire_remote_apic(int apicid)
551 printk(KERN_CONT 552 printk(KERN_CONT
552 "a previous APIC delivery may have failed\n"); 553 "a previous APIC delivery may have failed\n");
553 554
554 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 555 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
555 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
556 556
557 timeout = 0; 557 timeout = 0;
558 do { 558 do {
@@ -584,11 +584,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
584 int maxlvt; 584 int maxlvt;
585 585
586 /* Target chip */ 586 /* Target chip */
587 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
588
589 /* Boot on the stack */ 587 /* Boot on the stack */
590 /* Kick the second */ 588 /* Kick the second */
591 apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); 589 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
592 590
593 pr_debug("Waiting for send to finish...\n"); 591 pr_debug("Waiting for send to finish...\n");
594 send_status = safe_apic_wait_icr_idle(); 592 send_status = safe_apic_wait_icr_idle();
@@ -641,13 +639,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
641 /* 639 /*
642 * Turn INIT on target chip 640 * Turn INIT on target chip
643 */ 641 */
644 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
645
646 /* 642 /*
647 * Send IPI 643 * Send IPI
648 */ 644 */
649 apic_write(APIC_ICR, 645 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
650 APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); 646 phys_apicid);
651 647
652 pr_debug("Waiting for send to finish...\n"); 648 pr_debug("Waiting for send to finish...\n");
653 send_status = safe_apic_wait_icr_idle(); 649 send_status = safe_apic_wait_icr_idle();
@@ -657,10 +653,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
657 pr_debug("Deasserting INIT.\n"); 653 pr_debug("Deasserting INIT.\n");
658 654
659 /* Target chip */ 655 /* Target chip */
660 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
661
662 /* Send IPI */ 656 /* Send IPI */
663 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 657 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
664 658
665 pr_debug("Waiting for send to finish...\n"); 659 pr_debug("Waiting for send to finish...\n");
666 send_status = safe_apic_wait_icr_idle(); 660 send_status = safe_apic_wait_icr_idle();
@@ -703,11 +697,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
703 */ 697 */
704 698
705 /* Target chip */ 699 /* Target chip */
706 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
707
708 /* Boot on the stack */ 700 /* Boot on the stack */
709 /* Kick the second */ 701 /* Kick the second */
710 apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); 702 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
703 phys_apicid);
711 704
712 /* 705 /*
713 * Give the other CPU some time to accept the IPI. 706 * Give the other CPU some time to accept the IPI.
@@ -1176,10 +1169,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1176 * Setup boot CPU information 1169 * Setup boot CPU information
1177 */ 1170 */
1178 smp_store_cpu_info(0); /* Final full version of the data */ 1171 smp_store_cpu_info(0); /* Final full version of the data */
1172#ifdef CONFIG_X86_32
1179 boot_cpu_logical_apicid = logical_smp_processor_id(); 1173 boot_cpu_logical_apicid = logical_smp_processor_id();
1174#endif
1180 current_thread_info()->cpu = 0; /* needed? */ 1175 current_thread_info()->cpu = 0; /* needed? */
1181 set_cpu_sibling_map(0); 1176 set_cpu_sibling_map(0);
1182 1177
1178#ifdef CONFIG_X86_64
1179 enable_IR_x2apic();
1180 setup_apic_routing();
1181#endif
1182
1183 if (smp_sanity_check(max_cpus) < 0) { 1183 if (smp_sanity_check(max_cpus) < 0) {
1184 printk(KERN_INFO "SMP disabled\n"); 1184 printk(KERN_INFO "SMP disabled\n");
1185 disable_smp(); 1185 disable_smp();
@@ -1187,9 +1187,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1187 } 1187 }
1188 1188
1189 preempt_disable(); 1189 preempt_disable();
1190 if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { 1190 if (read_apic_id() != boot_cpu_physical_apicid) {
1191 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1191 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1192 GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); 1192 read_apic_id(), boot_cpu_physical_apicid);
1193 /* Or can we switch back to PIC here? */ 1193 /* Or can we switch back to PIC here? */
1194 } 1194 }
1195 preempt_enable(); 1195 preempt_enable();
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index d67ce5f044ba..7b987852e876 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -30,7 +30,7 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <asm/io.h> 31#include <asm/io.h>
32#include <asm/bios_ebda.h> 32#include <asm/bios_ebda.h>
33#include <asm/mach-summit/mach_mpparse.h> 33#include <asm/summit/mpparse.h>
34 34
35static struct rio_table_hdr *rio_table_hdr __initdata; 35static struct rio_table_hdr *rio_table_hdr __initdata;
36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; 36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 03df8e45e5a1..da5a5964fccb 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1228,7 +1228,6 @@ void __init trap_init(void)
1228 1228
1229 set_bit(SYSCALL_VECTOR, used_vectors); 1229 set_bit(SYSCALL_VECTOR, used_vectors);
1230 1230
1231 init_thread_xstate();
1232 /* 1231 /*
1233 * Should be a barrier for any external CPU state: 1232 * Should be a barrier for any external CPU state:
1234 */ 1233 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 7a31f104bef9..2887a789e38f 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1138,7 +1138,7 @@ asmlinkage void math_state_restore(void)
1138 /* 1138 /*
1139 * Paranoid restore. send a SIGSEGV if we fail to restore the state. 1139 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
1140 */ 1140 */
1141 if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { 1141 if (unlikely(restore_fpu_checking(me))) {
1142 stts(); 1142 stts();
1143 force_sig(SIGSEGV, me); 1143 force_sig(SIGSEGV, me);
1144 return; 1144 return;
@@ -1179,10 +1179,6 @@ void __init trap_init(void)
1179 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 1179 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1180#endif 1180#endif
1181 /* 1181 /*
1182 * initialize the per thread extended state:
1183 */
1184 init_thread_xstate();
1185 /*
1186 * Should be a barrier for any external CPU state: 1182 * Should be a barrier for any external CPU state:
1187 */ 1183 */
1188 cpu_init(); 1184 cpu_init();
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 8c9ad02af5a2..8b6c393ab9fd 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -905,8 +905,8 @@ static inline int __init activate_vmi(void)
905#endif 905#endif
906 906
907#ifdef CONFIG_X86_LOCAL_APIC 907#ifdef CONFIG_X86_LOCAL_APIC
908 para_fill(pv_apic_ops.apic_read, APICRead); 908 para_fill(apic_ops->read, APICRead);
909 para_fill(pv_apic_ops.apic_write, APICWrite); 909 para_fill(apic_ops->write, APICWrite);
910#endif 910#endif
911 911
912 /* 912 /*
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index af5bdad84604..a9b8560adbc2 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -140,10 +140,10 @@ SECTIONS
140 *(.con_initcall.init) 140 *(.con_initcall.init)
141 __con_initcall_end = .; 141 __con_initcall_end = .;
142 } 142 }
143 .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { 143 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
144 __x86cpuvendor_start = .; 144 __x86_cpu_dev_start = .;
145 *(.x86cpuvendor.init) 145 *(.x86_cpu_dev.init)
146 __x86cpuvendor_end = .; 146 __x86_cpu_dev_end = .;
147 } 147 }
148 SECURITY_INIT 148 SECURITY_INIT
149 . = ALIGN(4); 149 . = ALIGN(4);
@@ -180,6 +180,7 @@ SECTIONS
180 . = ALIGN(PAGE_SIZE); 180 . = ALIGN(PAGE_SIZE);
181 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { 181 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
182 __per_cpu_start = .; 182 __per_cpu_start = .;
183 *(.data.percpu.page_aligned)
183 *(.data.percpu) 184 *(.data.percpu)
184 *(.data.percpu.shared_aligned) 185 *(.data.percpu.shared_aligned)
185 __per_cpu_end = .; 186 __per_cpu_end = .;
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 63e5c1a22e88..201e81a91a95 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -168,13 +168,12 @@ SECTIONS
168 *(.con_initcall.init) 168 *(.con_initcall.init)
169 } 169 }
170 __con_initcall_end = .; 170 __con_initcall_end = .;
171 . = ALIGN(16); 171 __x86_cpu_dev_start = .;
172 __x86cpuvendor_start = .; 172 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
173 .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { 173 *(.x86_cpu_dev.init)
174 *(.x86cpuvendor.init)
175 } 174 }
176 __x86cpuvendor_end = .;
177 SECURITY_INIT 175 SECURITY_INIT
176 __x86_cpu_dev_end = .;
178 177
179 . = ALIGN(8); 178 . = ALIGN(8);
180 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { 179 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
new file mode 100644
index 000000000000..07713d64debe
--- /dev/null
+++ b/arch/x86/kernel/xsave.c
@@ -0,0 +1,316 @@
1/*
2 * xsave/xrstor support.
3 *
4 * Author: Suresh Siddha <suresh.b.siddha@intel.com>
5 */
6#include <linux/bootmem.h>
7#include <linux/compat.h>
8#include <asm/i387.h>
9#ifdef CONFIG_IA32_EMULATION
10#include <asm/sigcontext32.h>
11#endif
12#include <asm/xcr.h>
13
14/*
15 * Supported feature mask by the CPU and the kernel.
16 */
17u64 pcntxt_mask;
18
19struct _fpx_sw_bytes fx_sw_reserved;
20#ifdef CONFIG_IA32_EMULATION
21struct _fpx_sw_bytes fx_sw_reserved_ia32;
22#endif
23
24/*
25 * Check for the presence of extended state information in the
26 * user fpstate pointer in the sigcontext.
27 */
28int check_for_xstate(struct i387_fxsave_struct __user *buf,
29 void __user *fpstate,
30 struct _fpx_sw_bytes *fx_sw_user)
31{
32 int min_xstate_size = sizeof(struct i387_fxsave_struct) +
33 sizeof(struct xsave_hdr_struct);
34 unsigned int magic2;
35 int err;
36
37 err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
38 sizeof(struct _fpx_sw_bytes));
39
40 if (err)
41 return err;
42
43 /*
44 * First Magic check failed.
45 */
46 if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
47 return -1;
48
49 /*
50 * Check for error scenarios.
51 */
52 if (fx_sw_user->xstate_size < min_xstate_size ||
53 fx_sw_user->xstate_size > xstate_size ||
54 fx_sw_user->xstate_size > fx_sw_user->extended_size)
55 return -1;
56
57 err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
58 fx_sw_user->extended_size -
59 FP_XSTATE_MAGIC2_SIZE));
60 /*
61 * Check for the presence of second magic word at the end of memory
62 * layout. This detects the case where the user just copied the legacy
63 * fpstate layout with out copying the extended state information
64 * in the memory layout.
65 */
66 if (err || magic2 != FP_XSTATE_MAGIC2)
67 return -1;
68
69 return 0;
70}
71
72#ifdef CONFIG_X86_64
73/*
74 * Signal frame handlers.
75 */
76
77int save_i387_xstate(void __user *buf)
78{
79 struct task_struct *tsk = current;
80 int err = 0;
81
82 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
83 return -EACCES;
84
85 BUG_ON(sig_xstate_size < xstate_size);
86
87 if ((unsigned long)buf % 64)
88 printk("save_i387_xstate: bad fpstate %p\n", buf);
89
90 if (!used_math())
91 return 0;
92 clear_used_math(); /* trigger finit */
93 if (task_thread_info(tsk)->status & TS_USEDFPU) {
94 /*
95 * Start with clearing the user buffer. This will present a
96 * clean context for the bytes not touched by the fxsave/xsave.
97 */
98 __clear_user(buf, sig_xstate_size);
99
100 if (task_thread_info(tsk)->status & TS_XSAVE)
101 err = xsave_user(buf);
102 else
103 err = fxsave_user(buf);
104
105 if (err)
106 return err;
107 task_thread_info(tsk)->status &= ~TS_USEDFPU;
108 stts();
109 } else {
110 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
111 xstate_size))
112 return -1;
113 }
114
115 if (task_thread_info(tsk)->status & TS_XSAVE) {
116 struct _fpstate __user *fx = buf;
117
118 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
119 sizeof(struct _fpx_sw_bytes));
120
121 err |= __put_user(FP_XSTATE_MAGIC2,
122 (__u32 __user *) (buf + sig_xstate_size
123 - FP_XSTATE_MAGIC2_SIZE));
124 }
125
126 return 1;
127}
128
129/*
130 * Restore the extended state if present. Otherwise, restore the FP/SSE
131 * state.
132 */
133int restore_user_xstate(void __user *buf)
134{
135 struct _fpx_sw_bytes fx_sw_user;
136 u64 mask;
137 int err;
138
139 if (((unsigned long)buf % 64) ||
140 check_for_xstate(buf, buf, &fx_sw_user))
141 goto fx_only;
142
143 mask = fx_sw_user.xstate_bv;
144
145 /*
146 * restore the state passed by the user.
147 */
148 err = xrestore_user(buf, mask);
149 if (err)
150 return err;
151
152 /*
153 * init the state skipped by the user.
154 */
155 mask = pcntxt_mask & ~mask;
156
157 xrstor_state(init_xstate_buf, mask);
158
159 return 0;
160
161fx_only:
162 /*
163 * couldn't find the extended state information in the
164 * memory layout. Restore just the FP/SSE and init all
165 * the other extended state.
166 */
167 xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
168 return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
169}
170
171/*
172 * This restores directly out of user space. Exceptions are handled.
173 */
174int restore_i387_xstate(void __user *buf)
175{
176 struct task_struct *tsk = current;
177 int err = 0;
178
179 if (!buf) {
180 if (used_math())
181 goto clear;
182 return 0;
183 } else
184 if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
185 return -EACCES;
186
187 if (!used_math()) {
188 err = init_fpu(tsk);
189 if (err)
190 return err;
191 }
192
193 if (!(task_thread_info(current)->status & TS_USEDFPU)) {
194 clts();
195 task_thread_info(current)->status |= TS_USEDFPU;
196 }
197 if (task_thread_info(tsk)->status & TS_XSAVE)
198 err = restore_user_xstate(buf);
199 else
200 err = fxrstor_checking((__force struct i387_fxsave_struct *)
201 buf);
202 if (unlikely(err)) {
203 /*
204 * Encountered an error while doing the restore from the
205 * user buffer, clear the fpu state.
206 */
207clear:
208 clear_fpu(tsk);
209 clear_used_math();
210 }
211 return err;
212}
213#endif
214
215/*
216 * Prepare the SW reserved portion of the fxsave memory layout, indicating
217 * the presence of the extended state information in the memory layout
218 * pointed by the fpstate pointer in the sigcontext.
219 * This will be saved when ever the FP and extended state context is
220 * saved on the user stack during the signal handler delivery to the user.
221 */
222void prepare_fx_sw_frame(void)
223{
224 int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
225 FP_XSTATE_MAGIC2_SIZE;
226
227 sig_xstate_size = sizeof(struct _fpstate) + size_extended;
228
229#ifdef CONFIG_IA32_EMULATION
230 sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
231#endif
232
233 memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
234
235 fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
236 fx_sw_reserved.extended_size = sig_xstate_size;
237 fx_sw_reserved.xstate_bv = pcntxt_mask;
238 fx_sw_reserved.xstate_size = xstate_size;
239#ifdef CONFIG_IA32_EMULATION
240 memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
241 sizeof(struct _fpx_sw_bytes));
242 fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
243#endif
244}
245
246/*
247 * Represents init state for the supported extended state.
248 */
249struct xsave_struct *init_xstate_buf;
250
251#ifdef CONFIG_X86_64
252unsigned int sig_xstate_size = sizeof(struct _fpstate);
253#endif
254
255/*
256 * Enable the extended processor state save/restore feature
257 */
258void __cpuinit xsave_init(void)
259{
260 if (!cpu_has_xsave)
261 return;
262
263 set_in_cr4(X86_CR4_OSXSAVE);
264
265 /*
266 * Enable all the features that the HW is capable of
267 * and the Linux kernel is aware of.
268 */
269 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
270}
271
272/*
273 * setup the xstate image representing the init state
274 */
275void setup_xstate_init(void)
276{
277 init_xstate_buf = alloc_bootmem(xstate_size);
278 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
279}
280
281/*
282 * Enable and initialize the xsave feature.
283 */
284void __init xsave_cntxt_init(void)
285{
286 unsigned int eax, ebx, ecx, edx;
287
288 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
289 pcntxt_mask = eax + ((u64)edx << 32);
290
291 if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
292 printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n",
293 pcntxt_mask);
294 BUG();
295 }
296
297 /*
298 * for now OS knows only about FP/SSE
299 */
300 pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
301 xsave_init();
302
303 /*
304 * Recompute the context size for enabled features
305 */
306 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
307 xstate_size = ebx;
308
309 prepare_fx_sw_frame();
310
311 setup_xstate_init();
312
313 printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, "
314 "cntxt size 0x%x\n",
315 pcntxt_mask, xstate_size);
316}