aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.S38
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.h5
-rw-r--r--arch/x86/kernel/acpi/sleep.c16
-rw-r--r--arch/x86/kernel/aperture_64.c307
-rw-r--r--arch/x86/kernel/apic_32.c12
-rw-r--r--arch/x86/kernel/apic_64.c36
-rw-r--r--arch/x86/kernel/apm_32.c25
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c17
-rw-r--r--arch/x86/kernel/cpu/amd.c42
-rw-r--r--arch/x86/kernel/cpu/amd_64.c211
-rw-r--r--arch/x86/kernel/cpu/bugs.c27
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c (renamed from arch/x86/kernel/bugs_64.c)0
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c43
-rw-r--r--arch/x86/kernel/cpu/cpu.h5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c44
-rw-r--r--arch/x86/kernel/cpu/intel_64.c103
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c36
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c24
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c90
-rw-r--r--arch/x86/kernel/early-quirks.c13
-rw-r--r--arch/x86/kernel/efi_32.c8
-rw-r--r--arch/x86/kernel/entry_64.S5
-rw-r--r--arch/x86/kernel/geode_32.c5
-rw-r--r--arch/x86/kernel/head_64.S29
-rw-r--r--arch/x86/kernel/hpet.c43
-rw-r--r--arch/x86/kernel/i387.c4
-rw-r--r--arch/x86/kernel/i8259.c (renamed from arch/x86/kernel/i8259_32.c)132
-rw-r--r--arch/x86/kernel/i8259_64.c512
-rw-r--r--arch/x86/kernel/io_apic_32.c474
-rw-r--r--arch/x86/kernel/io_apic_64.c132
-rw-r--r--arch/x86/kernel/ipi.c1
-rw-r--r--arch/x86/kernel/irq_32.c38
-rw-r--r--arch/x86/kernel/irq_64.c28
-rw-r--r--arch/x86/kernel/irqinit_32.c114
-rw-r--r--arch/x86/kernel/irqinit_64.c203
-rw-r--r--arch/x86/kernel/kvmclock.c89
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/microcode.c29
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c1
-rw-r--r--arch/x86/kernel/nmi_32.c13
-rw-r--r--arch/x86/kernel/nmi_64.c11
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/pci-dma.c13
-rw-r--r--arch/x86/kernel/pci-gart_64.c83
-rw-r--r--arch/x86/kernel/process.c190
-rw-r--r--arch/x86/kernel/process_32.c62
-rw-r--r--arch/x86/kernel/process_64.c36
-rw-r--r--arch/x86/kernel/pvclock.c141
-rw-r--r--arch/x86/kernel/quirks.c58
-rw-r--r--arch/x86/kernel/reboot.c18
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c4
-rw-r--r--arch/x86/kernel/setup_32.c10
-rw-r--r--arch/x86/kernel/setup_64.c452
-rw-r--r--arch/x86/kernel/smpboot.c8
-rw-r--r--arch/x86/kernel/sys_i386_32.c64
-rw-r--r--arch/x86/kernel/time_32.c3
-rw-r--r--arch/x86/kernel/traps_64.c3
-rw-r--r--arch/x86/kernel/tsc_32.c24
-rw-r--r--arch/x86/kernel/tsc_64.c2
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S7
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S8
-rw-r--r--arch/x86/kernel/vsmp_64.c3
66 files changed, 2211 insertions, 1966 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b4720..8a42b797cd6b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -18,14 +18,13 @@ CFLAGS_tsc_64.o := $(nostackp)
18obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 18obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
19obj-y += traps_$(BITS).o irq_$(BITS).o 19obj-y += traps_$(BITS).o irq_$(BITS).o
20obj-y += time_$(BITS).o ioport.o ldt.o 20obj-y += time_$(BITS).o ioport.o ldt.o
21obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o 21obj-y += setup_$(BITS).o i8259.o irqinit_$(BITS).o setup.o
22obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 22obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
23obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 23obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
24obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o 24obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o
25obj-y += bootflag.o e820_$(BITS).o 25obj-y += bootflag.o e820_$(BITS).o
26obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 26obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
27obj-y += alternative.o i8253.o pci-nommu.o 27obj-y += alternative.o i8253.o pci-nommu.o
28obj-$(CONFIG_X86_64) += bugs_64.o
29obj-y += tsc_$(BITS).o io_delay.o rtc.o 28obj-y += tsc_$(BITS).o io_delay.o rtc.o
30 29
31obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 30obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
@@ -82,6 +81,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
82obj-$(CONFIG_KVM_GUEST) += kvm.o 81obj-$(CONFIG_KVM_GUEST) += kvm.o
83obj-$(CONFIG_KVM_CLOCK) += kvmclock.o 82obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
84obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 83obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
84obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
85 85
86obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 86obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
87 87
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S
index f9b77fb37e5b..3355973b12ac 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.S
@@ -5,6 +5,7 @@
5#include <asm/msr-index.h> 5#include <asm/msr-index.h>
6#include <asm/page.h> 6#include <asm/page.h>
7#include <asm/pgtable.h> 7#include <asm/pgtable.h>
8#include <asm/processor-flags.h>
8 9
9 .code16 10 .code16
10 .section ".header", "a" 11 .section ".header", "a"
@@ -24,6 +25,11 @@ pmode_gdt: .quad 0
24realmode_flags: .long 0 25realmode_flags: .long 0
25real_magic: .long 0 26real_magic: .long 0
26trampoline_segment: .word 0 27trampoline_segment: .word 0
28_pad1: .byte 0
29wakeup_jmp: .byte 0xea /* ljmpw */
30wakeup_jmp_off: .word 3f
31wakeup_jmp_seg: .word 0
32wakeup_gdt: .quad 0, 0, 0
27signature: .long 0x51ee1111 33signature: .long 0x51ee1111
28 34
29 .text 35 .text
@@ -34,11 +40,34 @@ _start:
34 cli 40 cli
35 cld 41 cld
36 42
43 /* Apparently some dimwit BIOS programmers don't know how to
44 program a PM to RM transition, and we might end up here with
45 junk in the data segment descriptor registers. The only way
46 to repair that is to go into PM and fix it ourselves... */
47 movw $16, %cx
48 lgdtl %cs:wakeup_gdt
49 movl %cr0, %eax
50 orb $X86_CR0_PE, %al
51 movl %eax, %cr0
52 jmp 1f
531: ljmpw $8, $2f
542:
55 movw %cx, %ds
56 movw %cx, %es
57 movw %cx, %ss
58 movw %cx, %fs
59 movw %cx, %gs
60
61 andb $~X86_CR0_PE, %al
62 movl %eax, %cr0
63 jmp wakeup_jmp
643:
37 /* Set up segments */ 65 /* Set up segments */
38 movw %cs, %ax 66 movw %cs, %ax
39 movw %ax, %ds 67 movw %ax, %ds
40 movw %ax, %es 68 movw %ax, %es
41 movw %ax, %ss 69 movw %ax, %ss
70 lidtl wakeup_idt
42 71
43 movl $wakeup_stack_end, %esp 72 movl $wakeup_stack_end, %esp
44 73
@@ -98,7 +127,14 @@ bogus_real_magic:
98 jmp 1b 127 jmp 1b
99 128
100 .data 129 .data
101 .balign 4 130 .balign 8
131
132 /* This is the standard real-mode IDT */
133wakeup_idt:
134 .word 0xffff /* limit */
135 .long 0 /* address */
136 .word 0
137
102 .globl HEAP, heap_end 138 .globl HEAP, heap_end
103HEAP: 139HEAP:
104 .long wakeup_heap 140 .long wakeup_heap
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h
index ef8166fe8020..69d38d0b2b64 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.h
+++ b/arch/x86/kernel/acpi/realmode/wakeup.h
@@ -24,6 +24,11 @@ struct wakeup_header {
24 u32 realmode_flags; 24 u32 realmode_flags;
25 u32 real_magic; 25 u32 real_magic;
26 u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ 26 u16 trampoline_segment; /* segment with trampoline code, 64-bit only */
27 u8 _pad1;
28 u8 wakeup_jmp;
29 u16 wakeup_jmp_off;
30 u16 wakeup_jmp_seg;
31 u64 wakeup_gdt[3];
27 u32 signature; /* To check we have correct structure */ 32 u32 signature; /* To check we have correct structure */
28} __attribute__((__packed__)); 33} __attribute__((__packed__));
29 34
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index afc25ee9964b..36af01f029ed 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -50,6 +50,20 @@ int acpi_save_state_mem(void)
50 50
51 header->video_mode = saved_video_mode; 51 header->video_mode = saved_video_mode;
52 52
53 header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
54 /* GDT[0]: GDT self-pointer */
55 header->wakeup_gdt[0] =
56 (u64)(sizeof(header->wakeup_gdt) - 1) +
57 ((u64)(acpi_wakeup_address +
58 ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
59 << 16);
60 /* GDT[1]: real-mode-like code segment */
61 header->wakeup_gdt[1] = (0x009bULL << 40) +
62 ((u64)acpi_wakeup_address << 16) + 0xffff;
63 /* GDT[2]: real-mode-like data segment */
64 header->wakeup_gdt[2] = (0x0093ULL << 40) +
65 ((u64)acpi_wakeup_address << 16) + 0xffff;
66
53#ifndef CONFIG_64BIT 67#ifndef CONFIG_64BIT
54 store_gdt((struct desc_ptr *)&header->pmode_gdt); 68 store_gdt((struct desc_ptr *)&header->pmode_gdt);
55 69
@@ -111,7 +125,7 @@ void __init acpi_reserve_bootmem(void)
111 return; 125 return;
112 } 126 }
113 127
114 acpi_wakeup_address = acpi_realmode; 128 acpi_wakeup_address = virt_to_phys((void *)acpi_realmode);
115} 129}
116 130
117 131
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 479926d9e004..e819362c7068 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -35,6 +35,18 @@ int fallback_aper_force __initdata;
35 35
36int fix_aperture __initdata = 1; 36int fix_aperture __initdata = 1;
37 37
38struct bus_dev_range {
39 int bus;
40 int dev_base;
41 int dev_limit;
42};
43
44static struct bus_dev_range bus_dev_ranges[] __initdata = {
45 { 0x00, 0x18, 0x20},
46 { 0xff, 0x00, 0x20},
47 { 0xfe, 0x00, 0x20}
48};
49
38static struct resource gart_resource = { 50static struct resource gart_resource = {
39 .name = "GART", 51 .name = "GART",
40 .flags = IORESOURCE_MEM, 52 .flags = IORESOURCE_MEM,
@@ -55,8 +67,9 @@ static u32 __init allocate_aperture(void)
55 u32 aper_size; 67 u32 aper_size;
56 void *p; 68 void *p;
57 69
58 if (fallback_aper_order > 7) 70 /* aper_size should <= 1G */
59 fallback_aper_order = 7; 71 if (fallback_aper_order > 5)
72 fallback_aper_order = 5;
60 aper_size = (32 * 1024 * 1024) << fallback_aper_order; 73 aper_size = (32 * 1024 * 1024) << fallback_aper_order;
61 74
62 /* 75 /*
@@ -65,7 +78,20 @@ static u32 __init allocate_aperture(void)
65 * memory. Unfortunately we cannot move it up because that would 78 * memory. Unfortunately we cannot move it up because that would
66 * make the IOMMU useless. 79 * make the IOMMU useless.
67 */ 80 */
68 p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); 81 /*
82 * using 512M as goal, in case kexec will load kernel_big
83 * that will do the on position decompress, and could overlap with
84 * that positon with gart that is used.
85 * sequende:
86 * kernel_small
87 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
88 * ==> kernel_small(gart area become e820_reserved)
89 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
90 * ==> kerne_big (uncompressed size will be big than 64M or 128M)
91 * so don't use 512M below as gart iommu, leave the space for kernel
92 * code for safe
93 */
94 p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
69 if (!p || __pa(p)+aper_size > 0xffffffff) { 95 if (!p || __pa(p)+aper_size > 0xffffffff) {
70 printk(KERN_ERR 96 printk(KERN_ERR
71 "Cannot allocate aperture memory hole (%p,%uK)\n", 97 "Cannot allocate aperture memory hole (%p,%uK)\n",
@@ -83,69 +109,53 @@ static u32 __init allocate_aperture(void)
83 return (u32)__pa(p); 109 return (u32)__pa(p);
84} 110}
85 111
86static int __init aperture_valid(u64 aper_base, u32 aper_size)
87{
88 if (!aper_base)
89 return 0;
90
91 if (aper_base + aper_size > 0x100000000UL) {
92 printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
93 return 0;
94 }
95 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
96 printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
97 return 0;
98 }
99 if (aper_size < 64*1024*1024) {
100 printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20);
101 return 0;
102 }
103
104 return 1;
105}
106 112
107/* Find a PCI capability */ 113/* Find a PCI capability */
108static __u32 __init find_cap(int num, int slot, int func, int cap) 114static u32 __init find_cap(int bus, int slot, int func, int cap)
109{ 115{
110 int bytes; 116 int bytes;
111 u8 pos; 117 u8 pos;
112 118
113 if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & 119 if (!(read_pci_config_16(bus, slot, func, PCI_STATUS) &
114 PCI_STATUS_CAP_LIST)) 120 PCI_STATUS_CAP_LIST))
115 return 0; 121 return 0;
116 122
117 pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); 123 pos = read_pci_config_byte(bus, slot, func, PCI_CAPABILITY_LIST);
118 for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { 124 for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
119 u8 id; 125 u8 id;
120 126
121 pos &= ~3; 127 pos &= ~3;
122 id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); 128 id = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_ID);
123 if (id == 0xff) 129 if (id == 0xff)
124 break; 130 break;
125 if (id == cap) 131 if (id == cap)
126 return pos; 132 return pos;
127 pos = read_pci_config_byte(num, slot, func, 133 pos = read_pci_config_byte(bus, slot, func,
128 pos+PCI_CAP_LIST_NEXT); 134 pos+PCI_CAP_LIST_NEXT);
129 } 135 }
130 return 0; 136 return 0;
131} 137}
132 138
133/* Read a standard AGPv3 bridge header */ 139/* Read a standard AGPv3 bridge header */
134static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) 140static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
135{ 141{
136 u32 apsize; 142 u32 apsize;
137 u32 apsizereg; 143 u32 apsizereg;
138 int nbits; 144 int nbits;
139 u32 aper_low, aper_hi; 145 u32 aper_low, aper_hi;
140 u64 aper; 146 u64 aper;
147 u32 old_order;
141 148
142 printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func); 149 printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func);
143 apsizereg = read_pci_config_16(num, slot, func, cap + 0x14); 150 apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14);
144 if (apsizereg == 0xffffffff) { 151 if (apsizereg == 0xffffffff) {
145 printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); 152 printk(KERN_ERR "APSIZE in AGP bridge unreadable\n");
146 return 0; 153 return 0;
147 } 154 }
148 155
156 /* old_order could be the value from NB gart setting */
157 old_order = *order;
158
149 apsize = apsizereg & 0xfff; 159 apsize = apsizereg & 0xfff;
150 /* Some BIOS use weird encodings not in the AGPv3 table. */ 160 /* Some BIOS use weird encodings not in the AGPv3 table. */
151 if (apsize & 0xff) 161 if (apsize & 0xff)
@@ -155,14 +165,26 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
155 if ((int)*order < 0) /* < 32MB */ 165 if ((int)*order < 0) /* < 32MB */
156 *order = 0; 166 *order = 0;
157 167
158 aper_low = read_pci_config(num, slot, func, 0x10); 168 aper_low = read_pci_config(bus, slot, func, 0x10);
159 aper_hi = read_pci_config(num, slot, func, 0x14); 169 aper_hi = read_pci_config(bus, slot, func, 0x14);
160 aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); 170 aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
161 171
172 /*
173 * On some sick chips, APSIZE is 0. It means it wants 4G
174 * so let double check that order, and lets trust AMD NB settings:
175 */
176 printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n",
177 aper, 32 << old_order);
178 if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) {
179 printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n",
180 32 << *order, apsizereg);
181 *order = old_order;
182 }
183
162 printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 184 printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
163 aper, 32 << *order, apsizereg); 185 aper, 32 << *order, apsizereg);
164 186
165 if (!aperture_valid(aper, (32*1024*1024) << *order)) 187 if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20))
166 return 0; 188 return 0;
167 return (u32)aper; 189 return (u32)aper;
168} 190}
@@ -180,17 +202,17 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
180 * the AGP bridges should be always an own bus on the HT hierarchy, 202 * the AGP bridges should be always an own bus on the HT hierarchy,
181 * but do it here for future safety. 203 * but do it here for future safety.
182 */ 204 */
183static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) 205static u32 __init search_agp_bridge(u32 *order, int *valid_agp)
184{ 206{
185 int num, slot, func; 207 int bus, slot, func;
186 208
187 /* Poor man's PCI discovery */ 209 /* Poor man's PCI discovery */
188 for (num = 0; num < 256; num++) { 210 for (bus = 0; bus < 256; bus++) {
189 for (slot = 0; slot < 32; slot++) { 211 for (slot = 0; slot < 32; slot++) {
190 for (func = 0; func < 8; func++) { 212 for (func = 0; func < 8; func++) {
191 u32 class, cap; 213 u32 class, cap;
192 u8 type; 214 u8 type;
193 class = read_pci_config(num, slot, func, 215 class = read_pci_config(bus, slot, func,
194 PCI_CLASS_REVISION); 216 PCI_CLASS_REVISION);
195 if (class == 0xffffffff) 217 if (class == 0xffffffff)
196 break; 218 break;
@@ -199,17 +221,17 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
199 case PCI_CLASS_BRIDGE_HOST: 221 case PCI_CLASS_BRIDGE_HOST:
200 case PCI_CLASS_BRIDGE_OTHER: /* needed? */ 222 case PCI_CLASS_BRIDGE_OTHER: /* needed? */
201 /* AGP bridge? */ 223 /* AGP bridge? */
202 cap = find_cap(num, slot, func, 224 cap = find_cap(bus, slot, func,
203 PCI_CAP_ID_AGP); 225 PCI_CAP_ID_AGP);
204 if (!cap) 226 if (!cap)
205 break; 227 break;
206 *valid_agp = 1; 228 *valid_agp = 1;
207 return read_agp(num, slot, func, cap, 229 return read_agp(bus, slot, func, cap,
208 order); 230 order);
209 } 231 }
210 232
211 /* No multi-function device? */ 233 /* No multi-function device? */
212 type = read_pci_config_byte(num, slot, func, 234 type = read_pci_config_byte(bus, slot, func,
213 PCI_HEADER_TYPE); 235 PCI_HEADER_TYPE);
214 if (!(type & 0x80)) 236 if (!(type & 0x80))
215 break; 237 break;
@@ -249,36 +271,50 @@ void __init early_gart_iommu_check(void)
249 * or BIOS forget to put that in reserved. 271 * or BIOS forget to put that in reserved.
250 * try to update e820 to make that region as reserved. 272 * try to update e820 to make that region as reserved.
251 */ 273 */
252 int fix, num; 274 int i, fix, slot;
253 u32 ctl; 275 u32 ctl;
254 u32 aper_size = 0, aper_order = 0, last_aper_order = 0; 276 u32 aper_size = 0, aper_order = 0, last_aper_order = 0;
255 u64 aper_base = 0, last_aper_base = 0; 277 u64 aper_base = 0, last_aper_base = 0;
256 int aper_enabled = 0, last_aper_enabled = 0; 278 int aper_enabled = 0, last_aper_enabled = 0, last_valid = 0;
257 279
258 if (!early_pci_allowed()) 280 if (!early_pci_allowed())
259 return; 281 return;
260 282
283 /* This is mostly duplicate of iommu_hole_init */
261 fix = 0; 284 fix = 0;
262 for (num = 24; num < 32; num++) { 285 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
263 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) 286 int bus;
264 continue; 287 int dev_base, dev_limit;
265 288
266 ctl = read_pci_config(0, num, 3, 0x90); 289 bus = bus_dev_ranges[i].bus;
267 aper_enabled = ctl & 1; 290 dev_base = bus_dev_ranges[i].dev_base;
268 aper_order = (ctl >> 1) & 7; 291 dev_limit = bus_dev_ranges[i].dev_limit;
269 aper_size = (32 * 1024 * 1024) << aper_order; 292
270 aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; 293 for (slot = dev_base; slot < dev_limit; slot++) {
271 aper_base <<= 25; 294 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
272 295 continue;
273 if ((last_aper_order && aper_order != last_aper_order) || 296
274 (last_aper_base && aper_base != last_aper_base) || 297 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
275 (last_aper_enabled && aper_enabled != last_aper_enabled)) { 298 aper_enabled = ctl & AMD64_GARTEN;
276 fix = 1; 299 aper_order = (ctl >> 1) & 7;
277 break; 300 aper_size = (32 * 1024 * 1024) << aper_order;
301 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
302 aper_base <<= 25;
303
304 if (last_valid) {
305 if ((aper_order != last_aper_order) ||
306 (aper_base != last_aper_base) ||
307 (aper_enabled != last_aper_enabled)) {
308 fix = 1;
309 break;
310 }
311 }
312
313 last_aper_order = aper_order;
314 last_aper_base = aper_base;
315 last_aper_enabled = aper_enabled;
316 last_valid = 1;
278 } 317 }
279 last_aper_order = aper_order;
280 last_aper_base = aper_base;
281 last_aper_enabled = aper_enabled;
282 } 318 }
283 319
284 if (!fix && !aper_enabled) 320 if (!fix && !aper_enabled)
@@ -290,32 +326,46 @@ void __init early_gart_iommu_check(void)
290 if (gart_fix_e820 && !fix && aper_enabled) { 326 if (gart_fix_e820 && !fix && aper_enabled) {
291 if (e820_any_mapped(aper_base, aper_base + aper_size, 327 if (e820_any_mapped(aper_base, aper_base + aper_size,
292 E820_RAM)) { 328 E820_RAM)) {
293 /* reserved it, so we can resuse it in second kernel */ 329 /* reserve it, so we can reuse it in second kernel */
294 printk(KERN_INFO "update e820 for GART\n"); 330 printk(KERN_INFO "update e820 for GART\n");
295 add_memory_region(aper_base, aper_size, E820_RESERVED); 331 add_memory_region(aper_base, aper_size, E820_RESERVED);
296 update_e820(); 332 update_e820();
297 } 333 }
298 return;
299 } 334 }
300 335
336 if (!fix)
337 return;
338
301 /* different nodes have different setting, disable them all at first*/ 339 /* different nodes have different setting, disable them all at first*/
302 for (num = 24; num < 32; num++) { 340 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
303 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) 341 int bus;
304 continue; 342 int dev_base, dev_limit;
343
344 bus = bus_dev_ranges[i].bus;
345 dev_base = bus_dev_ranges[i].dev_base;
346 dev_limit = bus_dev_ranges[i].dev_limit;
347
348 for (slot = dev_base; slot < dev_limit; slot++) {
349 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
350 continue;
305 351
306 ctl = read_pci_config(0, num, 3, 0x90); 352 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
307 ctl &= ~1; 353 ctl &= ~AMD64_GARTEN;
308 write_pci_config(0, num, 3, 0x90, ctl); 354 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
355 }
309 } 356 }
310 357
311} 358}
312 359
360static int __initdata printed_gart_size_msg;
361
313void __init gart_iommu_hole_init(void) 362void __init gart_iommu_hole_init(void)
314{ 363{
364 u32 agp_aper_base = 0, agp_aper_order = 0;
315 u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; 365 u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
316 u64 aper_base, last_aper_base = 0; 366 u64 aper_base, last_aper_base = 0;
317 int fix, num, valid_agp = 0; 367 int fix, slot, valid_agp = 0;
318 int node; 368 int i, node;
319 369
320 if (gart_iommu_aperture_disabled || !fix_aperture || 370 if (gart_iommu_aperture_disabled || !fix_aperture ||
321 !early_pci_allowed()) 371 !early_pci_allowed())
@@ -323,38 +373,63 @@ void __init gart_iommu_hole_init(void)
323 373
324 printk(KERN_INFO "Checking aperture...\n"); 374 printk(KERN_INFO "Checking aperture...\n");
325 375
376 if (!fallback_aper_force)
377 agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp);
378
326 fix = 0; 379 fix = 0;
327 node = 0; 380 node = 0;
328 for (num = 24; num < 32; num++) { 381 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
329 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) 382 int bus;
330 continue; 383 int dev_base, dev_limit;
331 384
332 iommu_detected = 1; 385 bus = bus_dev_ranges[i].bus;
333 gart_iommu_aperture = 1; 386 dev_base = bus_dev_ranges[i].dev_base;
334 387 dev_limit = bus_dev_ranges[i].dev_limit;
335 aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; 388
336 aper_size = (32 * 1024 * 1024) << aper_order; 389 for (slot = dev_base; slot < dev_limit; slot++) {
337 aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; 390 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
338 aper_base <<= 25; 391 continue;
339 392
340 printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", 393 iommu_detected = 1;
341 node, aper_base, aper_size >> 20); 394 gart_iommu_aperture = 1;
342 node++; 395
343 396 aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7;
344 if (!aperture_valid(aper_base, aper_size)) { 397 aper_size = (32 * 1024 * 1024) << aper_order;
345 fix = 1; 398 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
346 break; 399 aper_base <<= 25;
347 } 400
401 printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n",
402 node, aper_base, aper_size >> 20);
403 node++;
404
405 if (!aperture_valid(aper_base, aper_size, 64<<20)) {
406 if (valid_agp && agp_aper_base &&
407 agp_aper_base == aper_base &&
408 agp_aper_order == aper_order) {
409 /* the same between two setting from NB and agp */
410 if (!no_iommu && end_pfn > MAX_DMA32_PFN && !printed_gart_size_msg) {
411 printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n");
412 printk(KERN_ERR "please increase GART size in your BIOS setup\n");
413 printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n");
414 printed_gart_size_msg = 1;
415 }
416 } else {
417 fix = 1;
418 goto out;
419 }
420 }
348 421
349 if ((last_aper_order && aper_order != last_aper_order) || 422 if ((last_aper_order && aper_order != last_aper_order) ||
350 (last_aper_base && aper_base != last_aper_base)) { 423 (last_aper_base && aper_base != last_aper_base)) {
351 fix = 1; 424 fix = 1;
352 break; 425 goto out;
426 }
427 last_aper_order = aper_order;
428 last_aper_base = aper_base;
353 } 429 }
354 last_aper_order = aper_order;
355 last_aper_base = aper_base;
356 } 430 }
357 431
432out:
358 if (!fix && !fallback_aper_force) { 433 if (!fix && !fallback_aper_force) {
359 if (last_aper_base) { 434 if (last_aper_base) {
360 unsigned long n = (32 * 1024 * 1024) << last_aper_order; 435 unsigned long n = (32 * 1024 * 1024) << last_aper_order;
@@ -364,8 +439,10 @@ void __init gart_iommu_hole_init(void)
364 return; 439 return;
365 } 440 }
366 441
367 if (!fallback_aper_force) 442 if (!fallback_aper_force) {
368 aper_alloc = search_agp_bridge(&aper_order, &valid_agp); 443 aper_alloc = agp_aper_base;
444 aper_order = agp_aper_order;
445 }
369 446
370 if (aper_alloc) { 447 if (aper_alloc) {
371 /* Got the aperture from the AGP bridge */ 448 /* Got the aperture from the AGP bridge */
@@ -401,16 +478,24 @@ void __init gart_iommu_hole_init(void)
401 } 478 }
402 479
403 /* Fix up the north bridges */ 480 /* Fix up the north bridges */
404 for (num = 24; num < 32; num++) { 481 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
405 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) 482 int bus;
406 continue; 483 int dev_base, dev_limit;
407 484
408 /* 485 bus = bus_dev_ranges[i].bus;
409 * Don't enable translation yet. That is done later. 486 dev_base = bus_dev_ranges[i].dev_base;
410 * Assume this BIOS didn't initialise the GART so 487 dev_limit = bus_dev_ranges[i].dev_limit;
411 * just overwrite all previous bits 488 for (slot = dev_base; slot < dev_limit; slot++) {
412 */ 489 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
413 write_pci_config(0, num, 3, 0x90, aper_order<<1); 490 continue;
414 write_pci_config(0, num, 3, 0x94, aper_alloc>>25); 491
492 /* Don't enable translation yet. That is done later.
493 Assume this BIOS didn't initialise the GART so
494 just overwrite all previous bits */
495 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1);
496 write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25);
497 }
415 } 498 }
499
500 set_up_gart_resume(aper_order, aper_alloc);
416} 501}
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4b99b1bdeb6c..45d8da405ad9 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -64,9 +64,8 @@ static int enable_local_apic __initdata;
64 64
65/* Local APIC timer verification ok */ 65/* Local APIC timer verification ok */
66static int local_apic_timer_verify_ok; 66static int local_apic_timer_verify_ok;
67/* Disable local APIC timer from the kernel commandline or via dmi quirk 67/* Disable local APIC timer from the kernel commandline or via dmi quirk */
68 or using CPU MSR check */ 68static int local_apic_timer_disabled;
69int local_apic_timer_disabled;
70/* Local APIC timer works in C2 */ 69/* Local APIC timer works in C2 */
71int local_apic_timer_c2_ok; 70int local_apic_timer_c2_ok;
72EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 71EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -1154,9 +1153,6 @@ static int __init detect_init_APIC(void)
1154 if (l & MSR_IA32_APICBASE_ENABLE) 1153 if (l & MSR_IA32_APICBASE_ENABLE)
1155 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; 1154 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1156 1155
1157 if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED)
1158 nmi_watchdog = NMI_LOCAL_APIC;
1159
1160 printk(KERN_INFO "Found and enabled local APIC!\n"); 1156 printk(KERN_INFO "Found and enabled local APIC!\n");
1161 1157
1162 apic_pm_activate(); 1158 apic_pm_activate();
@@ -1269,6 +1265,10 @@ int __init APIC_init_uniprocessor(void)
1269 1265
1270 setup_local_APIC(); 1266 setup_local_APIC();
1271 1267
1268#ifdef CONFIG_X86_IO_APIC
1269 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
1270#endif
1271 localise_nmi_watchdog();
1272 end_local_APIC_setup(); 1272 end_local_APIC_setup();
1273#ifdef CONFIG_X86_IO_APIC 1273#ifdef CONFIG_X86_IO_APIC
1274 if (smp_found_config) 1274 if (smp_found_config)
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 0633cfd0dc29..3ef7752aa8e5 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -43,7 +43,7 @@
43#include <mach_ipi.h> 43#include <mach_ipi.h>
44#include <mach_apic.h> 44#include <mach_apic.h>
45 45
46int disable_apic_timer __cpuinitdata; 46static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 47static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 48int disable_apic;
49 49
@@ -422,32 +422,8 @@ void __init setup_boot_APIC_clock(void)
422 setup_APIC_timer(); 422 setup_APIC_timer();
423} 423}
424 424
425/*
426 * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
427 * C1E flag only in the secondary CPU, so when we detect the wreckage
428 * we already have enabled the boot CPU local apic timer. Check, if
429 * disable_apic_timer is set and the DUMMY flag is cleared. If yes,
430 * set the DUMMY flag again and force the broadcast mode in the
431 * clockevents layer.
432 */
433static void __cpuinit check_boot_apic_timer_broadcast(void)
434{
435 if (!disable_apic_timer ||
436 (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
437 return;
438
439 printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
440 lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
441
442 local_irq_enable();
443 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
444 &boot_cpu_physical_apicid);
445 local_irq_disable();
446}
447
448void __cpuinit setup_secondary_APIC_clock(void) 425void __cpuinit setup_secondary_APIC_clock(void)
449{ 426{
450 check_boot_apic_timer_broadcast();
451 setup_APIC_timer(); 427 setup_APIC_timer();
452} 428}
453 429
@@ -875,7 +851,7 @@ static int __init detect_init_APIC(void)
875 851
876void __init early_init_lapic_mapping(void) 852void __init early_init_lapic_mapping(void)
877{ 853{
878 unsigned long apic_phys; 854 unsigned long phys_addr;
879 855
880 /* 856 /*
881 * If no local APIC can be found then go out 857 * If no local APIC can be found then go out
@@ -884,11 +860,11 @@ void __init early_init_lapic_mapping(void)
884 if (!smp_found_config) 860 if (!smp_found_config)
885 return; 861 return;
886 862
887 apic_phys = mp_lapic_addr; 863 phys_addr = mp_lapic_addr;
888 864
889 set_fixmap_nocache(FIX_APIC_BASE, apic_phys); 865 set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
890 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", 866 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
891 APIC_BASE, apic_phys); 867 APIC_BASE, phys_addr);
892 868
893 /* 869 /*
894 * Fetch the APIC ID of the BSP in case we have a 870 * Fetch the APIC ID of the BSP in case we have a
@@ -954,6 +930,8 @@ int __init APIC_init_uniprocessor(void)
954 if (!skip_ioapic_setup && nr_ioapics) 930 if (!skip_ioapic_setup && nr_ioapics)
955 enable_IO_APIC(); 931 enable_IO_APIC();
956 932
933 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
934 localise_nmi_watchdog();
957 end_local_APIC_setup(); 935 end_local_APIC_setup();
958 936
959 if (smp_found_config && !skip_ioapic_setup && nr_ioapics) 937 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index bf9290e29013..00e6d1370954 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -228,6 +228,7 @@
228#include <linux/suspend.h> 228#include <linux/suspend.h>
229#include <linux/kthread.h> 229#include <linux/kthread.h>
230#include <linux/jiffies.h> 230#include <linux/jiffies.h>
231#include <linux/smp_lock.h>
231 232
232#include <asm/system.h> 233#include <asm/system.h>
233#include <asm/uaccess.h> 234#include <asm/uaccess.h>
@@ -1149,7 +1150,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender)
1149 as->event_tail = 0; 1150 as->event_tail = 0;
1150 } 1151 }
1151 as->events[as->event_head] = event; 1152 as->events[as->event_head] = event;
1152 if ((!as->suser) || (!as->writer)) 1153 if (!as->suser || !as->writer)
1153 continue; 1154 continue;
1154 switch (event) { 1155 switch (event) {
1155 case APM_SYS_SUSPEND: 1156 case APM_SYS_SUSPEND:
@@ -1396,7 +1397,7 @@ static void apm_mainloop(void)
1396 1397
1397static int check_apm_user(struct apm_user *as, const char *func) 1398static int check_apm_user(struct apm_user *as, const char *func)
1398{ 1399{
1399 if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) { 1400 if (as == NULL || as->magic != APM_BIOS_MAGIC) {
1400 printk(KERN_ERR "apm: %s passed bad filp\n", func); 1401 printk(KERN_ERR "apm: %s passed bad filp\n", func);
1401 return 1; 1402 return 1;
1402 } 1403 }
@@ -1459,18 +1460,19 @@ static unsigned int do_poll(struct file *fp, poll_table *wait)
1459 return 0; 1460 return 0;
1460} 1461}
1461 1462
1462static int do_ioctl(struct inode *inode, struct file *filp, 1463static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
1463 u_int cmd, u_long arg)
1464{ 1464{
1465 struct apm_user *as; 1465 struct apm_user *as;
1466 int ret;
1466 1467
1467 as = filp->private_data; 1468 as = filp->private_data;
1468 if (check_apm_user(as, "ioctl")) 1469 if (check_apm_user(as, "ioctl"))
1469 return -EIO; 1470 return -EIO;
1470 if ((!as->suser) || (!as->writer)) 1471 if (!as->suser || !as->writer)
1471 return -EPERM; 1472 return -EPERM;
1472 switch (cmd) { 1473 switch (cmd) {
1473 case APM_IOC_STANDBY: 1474 case APM_IOC_STANDBY:
1475 lock_kernel();
1474 if (as->standbys_read > 0) { 1476 if (as->standbys_read > 0) {
1475 as->standbys_read--; 1477 as->standbys_read--;
1476 as->standbys_pending--; 1478 as->standbys_pending--;
@@ -1479,8 +1481,10 @@ static int do_ioctl(struct inode *inode, struct file *filp,
1479 queue_event(APM_USER_STANDBY, as); 1481 queue_event(APM_USER_STANDBY, as);
1480 if (standbys_pending <= 0) 1482 if (standbys_pending <= 0)
1481 standby(); 1483 standby();
1484 unlock_kernel();
1482 break; 1485 break;
1483 case APM_IOC_SUSPEND: 1486 case APM_IOC_SUSPEND:
1487 lock_kernel();
1484 if (as->suspends_read > 0) { 1488 if (as->suspends_read > 0) {
1485 as->suspends_read--; 1489 as->suspends_read--;
1486 as->suspends_pending--; 1490 as->suspends_pending--;
@@ -1488,16 +1492,17 @@ static int do_ioctl(struct inode *inode, struct file *filp,
1488 } else 1492 } else
1489 queue_event(APM_USER_SUSPEND, as); 1493 queue_event(APM_USER_SUSPEND, as);
1490 if (suspends_pending <= 0) { 1494 if (suspends_pending <= 0) {
1491 return suspend(1); 1495 ret = suspend(1);
1492 } else { 1496 } else {
1493 as->suspend_wait = 1; 1497 as->suspend_wait = 1;
1494 wait_event_interruptible(apm_suspend_waitqueue, 1498 wait_event_interruptible(apm_suspend_waitqueue,
1495 as->suspend_wait == 0); 1499 as->suspend_wait == 0);
1496 return as->suspend_result; 1500 ret = as->suspend_result;
1497 } 1501 }
1498 break; 1502 unlock_kernel();
1503 return ret;
1499 default: 1504 default:
1500 return -EINVAL; 1505 return -ENOTTY;
1501 } 1506 }
1502 return 0; 1507 return 0;
1503} 1508}
@@ -1860,7 +1865,7 @@ static const struct file_operations apm_bios_fops = {
1860 .owner = THIS_MODULE, 1865 .owner = THIS_MODULE,
1861 .read = do_read, 1866 .read = do_read,
1862 .poll = do_poll, 1867 .poll = do_poll,
1863 .ioctl = do_ioctl, 1868 .unlocked_ioctl = do_ioctl,
1864 .open = do_open, 1869 .open = do_open,
1865 .release = do_release, 1870 .release = do_release,
1866}; 1871};
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a0c6f8190887..65b1be5fe9ce 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -6,11 +6,15 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o
6obj-y += proc.o feature_names.o 6obj-y += proc.o feature_names.o
7 7
8obj-$(CONFIG_X86_32) += common.o bugs.o 8obj-$(CONFIG_X86_32) += common.o bugs.o
9obj-$(CONFIG_X86_64) += bugs_64.o
9obj-$(CONFIG_X86_32) += amd.o 10obj-$(CONFIG_X86_32) += amd.o
11obj-$(CONFIG_X86_64) += amd_64.o
10obj-$(CONFIG_X86_32) += cyrix.o 12obj-$(CONFIG_X86_32) += cyrix.o
11obj-$(CONFIG_X86_32) += centaur.o 13obj-$(CONFIG_X86_32) += centaur.o
14obj-$(CONFIG_X86_64) += centaur_64.o
12obj-$(CONFIG_X86_32) += transmeta.o 15obj-$(CONFIG_X86_32) += transmeta.o
13obj-$(CONFIG_X86_32) += intel.o 16obj-$(CONFIG_X86_32) += intel.o
17obj-$(CONFIG_X86_64) += intel_64.o
14obj-$(CONFIG_X86_32) += umc.o 18obj-$(CONFIG_X86_32) += umc.o
15 19
16obj-$(CONFIG_X86_MCE) += mcheck/ 20obj-$(CONFIG_X86_MCE) += mcheck/
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index c2e1ce33c7cb..84a8220a6072 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -1,9 +1,7 @@
1
2/* 1/*
3 * Routines to indentify additional cpu features that are scattered in 2 * Routines to indentify additional cpu features that are scattered in
4 * cpuid space. 3 * cpuid space.
5 */ 4 */
6
7#include <linux/cpu.h> 5#include <linux/cpu.h>
8 6
9#include <asm/pat.h> 7#include <asm/pat.h>
@@ -53,19 +51,20 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
53#ifdef CONFIG_X86_PAT 51#ifdef CONFIG_X86_PAT
54void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) 52void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
55{ 53{
54 if (!cpu_has_pat)
55 pat_disable("PAT not supported by CPU.");
56
56 switch (c->x86_vendor) { 57 switch (c->x86_vendor) {
57 case X86_VENDOR_AMD:
58 if (c->x86 >= 0xf && c->x86 <= 0x11)
59 return;
60 break;
61 case X86_VENDOR_INTEL: 58 case X86_VENDOR_INTEL:
62 if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) 59 if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
63 return; 60 return;
64 break; 61 break;
62 case X86_VENDOR_AMD:
63 case X86_VENDOR_CENTAUR:
64 case X86_VENDOR_TRANSMETA:
65 return;
65 } 66 }
66 67
67 pat_disable(cpu_has_pat ? 68 pat_disable("PAT disabled. Not yet verified on this CPU type.");
68 "PAT disabled. Not yet verified on this CPU type." :
69 "PAT not supported by CPU.");
70} 69}
71#endif 70#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 245866828294..81a07ca65d44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -24,43 +24,6 @@
24extern void vide(void); 24extern void vide(void);
25__asm__(".align 4\nvide: ret"); 25__asm__(".align 4\nvide: ret");
26 26
27#ifdef CONFIG_X86_LOCAL_APIC
28#define ENABLE_C1E_MASK 0x18000000
29#define CPUID_PROCESSOR_SIGNATURE 1
30#define CPUID_XFAM 0x0ff00000
31#define CPUID_XFAM_K8 0x00000000
32#define CPUID_XFAM_10H 0x00100000
33#define CPUID_XFAM_11H 0x00200000
34#define CPUID_XMOD 0x000f0000
35#define CPUID_XMOD_REV_F 0x00040000
36
37/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
38static __cpuinit int amd_apic_timer_broken(void)
39{
40 u32 lo, hi;
41 u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
42 switch (eax & CPUID_XFAM) {
43 case CPUID_XFAM_K8:
44 if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
45 break;
46 case CPUID_XFAM_10H:
47 case CPUID_XFAM_11H:
48 rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
49 if (lo & ENABLE_C1E_MASK) {
50 if (smp_processor_id() != boot_cpu_physical_apicid)
51 printk(KERN_INFO "AMD C1E detected late. "
52 " Force timer broadcast.\n");
53 return 1;
54 }
55 break;
56 default:
57 /* err on the side of caution */
58 return 1;
59 }
60 return 0;
61}
62#endif
63
64int force_mwait __cpuinitdata; 27int force_mwait __cpuinitdata;
65 28
66static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) 29static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
@@ -297,11 +260,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
297 num_cache_leaves = 3; 260 num_cache_leaves = 3;
298 } 261 }
299 262
300#ifdef CONFIG_X86_LOCAL_APIC
301 if (amd_apic_timer_broken())
302 local_apic_timer_disabled = 1;
303#endif
304
305 /* K6s reports MCEs but don't actually have all the MSRs */ 263 /* K6s reports MCEs but don't actually have all the MSRs */
306 if (c->x86 < 6) 264 if (c->x86 < 6)
307 clear_cpu_cap(c, X86_FEATURE_MCE); 265 clear_cpu_cap(c, X86_FEATURE_MCE);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
new file mode 100644
index 000000000000..30b7557c9641
--- /dev/null
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -0,0 +1,211 @@
1#include <linux/init.h>
2#include <linux/mm.h>
3
4#include <asm/numa_64.h>
5#include <asm/mmconfig.h>
6#include <asm/cacheflush.h>
7
8#include <mach_apic.h>
9
10#include "cpu.h"
11
12int force_mwait __cpuinitdata;
13
14#ifdef CONFIG_NUMA
15static int __cpuinit nearby_node(int apicid)
16{
17 int i, node;
18
19 for (i = apicid - 1; i >= 0; i--) {
20 node = apicid_to_node[i];
21 if (node != NUMA_NO_NODE && node_online(node))
22 return node;
23 }
24 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
25 node = apicid_to_node[i];
26 if (node != NUMA_NO_NODE && node_online(node))
27 return node;
28 }
29 return first_node(node_online_map); /* Shouldn't happen */
30}
31#endif
32
33/*
34 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
35 * Assumes number of cores is a power of two.
36 */
37static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
38{
39#ifdef CONFIG_SMP
40 unsigned bits;
41#ifdef CONFIG_NUMA
42 int cpu = smp_processor_id();
43 int node = 0;
44 unsigned apicid = hard_smp_processor_id();
45#endif
46 bits = c->x86_coreid_bits;
47
48 /* Low order bits define the core id (index of core in socket) */
49 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
50 /* Convert the initial APIC ID into the socket ID */
51 c->phys_proc_id = c->initial_apicid >> bits;
52
53#ifdef CONFIG_NUMA
54 node = c->phys_proc_id;
55 if (apicid_to_node[apicid] != NUMA_NO_NODE)
56 node = apicid_to_node[apicid];
57 if (!node_online(node)) {
58 /* Two possibilities here:
59 - The CPU is missing memory and no node was created.
60 In that case try picking one from a nearby CPU
61 - The APIC IDs differ from the HyperTransport node IDs
62 which the K8 northbridge parsing fills in.
63 Assume they are all increased by a constant offset,
64 but in the same order as the HT nodeids.
65 If that doesn't result in a usable node fall back to the
66 path for the previous case. */
67
68 int ht_nodeid = c->initial_apicid;
69
70 if (ht_nodeid >= 0 &&
71 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
72 node = apicid_to_node[ht_nodeid];
73 /* Pick a nearby node */
74 if (!node_online(node))
75 node = nearby_node(apicid);
76 }
77 numa_set_node(cpu, node);
78
79 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
80#endif
81#endif
82}
83
84static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
85{
86#ifdef CONFIG_SMP
87 unsigned bits, ecx;
88
89 /* Multi core CPU? */
90 if (c->extended_cpuid_level < 0x80000008)
91 return;
92
93 ecx = cpuid_ecx(0x80000008);
94
95 c->x86_max_cores = (ecx & 0xff) + 1;
96
97 /* CPU telling us the core id bits shift? */
98 bits = (ecx >> 12) & 0xF;
99
100 /* Otherwise recompute */
101 if (bits == 0) {
102 while ((1 << bits) < c->x86_max_cores)
103 bits++;
104 }
105
106 c->x86_coreid_bits = bits;
107
108#endif
109}
110
111static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
112{
113 early_init_amd_mc(c);
114
115 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
116 if (c->x86_power & (1<<8))
117 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
118}
119
120static void __cpuinit init_amd(struct cpuinfo_x86 *c)
121{
122 unsigned level;
123
124#ifdef CONFIG_SMP
125 unsigned long value;
126
127 /*
128 * Disable TLB flush filter by setting HWCR.FFDIS on K8
129 * bit 6 of msr C001_0015
130 *
131 * Errata 63 for SH-B3 steppings
132 * Errata 122 for all steppings (F+ have it disabled by default)
133 */
134 if (c->x86 == 15) {
135 rdmsrl(MSR_K8_HWCR, value);
136 value |= 1 << 6;
137 wrmsrl(MSR_K8_HWCR, value);
138 }
139#endif
140
141 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
142 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
143 clear_cpu_cap(c, 0*32+31);
144
145 /* On C+ stepping K8 rep microcode works well for copy/memset */
146 level = cpuid_eax(1);
147 if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
148 level >= 0x0f58))
149 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
150 if (c->x86 == 0x10 || c->x86 == 0x11)
151 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
152
153 /* Enable workaround for FXSAVE leak */
154 if (c->x86 >= 6)
155 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
156
157 level = get_model_name(c);
158 if (!level) {
159 switch (c->x86) {
160 case 15:
161 /* Should distinguish Models here, but this is only
162 a fallback anyways. */
163 strcpy(c->x86_model_id, "Hammer");
164 break;
165 }
166 }
167 display_cacheinfo(c);
168
169 /* Multi core CPU? */
170 if (c->extended_cpuid_level >= 0x80000008)
171 amd_detect_cmp(c);
172
173 if (c->extended_cpuid_level >= 0x80000006 &&
174 (cpuid_edx(0x80000006) & 0xf000))
175 num_cache_leaves = 4;
176 else
177 num_cache_leaves = 3;
178
179 if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
180 set_cpu_cap(c, X86_FEATURE_K8);
181
182 /* MFENCE stops RDTSC speculation */
183 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
184
185 if (c->x86 == 0x10)
186 fam10h_check_enable_mmcfg();
187
188 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
189 unsigned long long tseg;
190
191 /*
192 * Split up direct mapping around the TSEG SMM area.
193 * Don't do it for gbpages because there seems very little
194 * benefit in doing so.
195 */
196 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
197 (tseg >> PMD_SHIFT) <
198 (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
199 set_memory_4k((unsigned long)__va(tseg), 1);
200 }
201}
202
203static struct cpu_dev amd_cpu_dev __cpuinitdata = {
204 .c_vendor = "AMD",
205 .c_ident = { "AuthenticAMD" },
206 .c_early_init = early_init_amd,
207 .c_init = init_amd,
208};
209
210cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
211
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 170d2f5523b2..1b1c56bb338f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -59,8 +59,12 @@ static void __init check_fpu(void)
59 return; 59 return;
60 } 60 }
61 61
62/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ 62 /*
63 /* Test for the divl bug.. */ 63 * trap_init() enabled FXSR and company _before_ testing for FP
64 * problems here.
65 *
66 * Test for the divl bug..
67 */
64 __asm__("fninit\n\t" 68 __asm__("fninit\n\t"
65 "fldl %1\n\t" 69 "fldl %1\n\t"
66 "fdivl %2\n\t" 70 "fdivl %2\n\t"
@@ -108,10 +112,15 @@ static void __init check_popad(void)
108 "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " 112 "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
109 : "=&a" (res) 113 : "=&a" (res)
110 : "d" (inp) 114 : "d" (inp)
111 : "ecx", "edi" ); 115 : "ecx", "edi");
112 /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ 116 /*
113 if (res != 12345678) printk( "Buggy.\n" ); 117 * If this fails, it means that any user program may lock the
114 else printk( "OK.\n" ); 118 * CPU hard. Too bad.
119 */
120 if (res != 12345678)
121 printk("Buggy.\n");
122 else
123 printk("OK.\n");
115#endif 124#endif
116} 125}
117 126
@@ -137,7 +146,8 @@ static void __init check_config(void)
137 * i486+ only features! (WP works in supervisor mode and the 146 * i486+ only features! (WP works in supervisor mode and the
138 * new "invlpg" and "bswap" instructions) 147 * new "invlpg" and "bswap" instructions)
139 */ 148 */
140#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) 149#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
150 defined(CONFIG_X86_BSWAP)
141 if (boot_cpu_data.x86 == 3) 151 if (boot_cpu_data.x86 == 3)
142 panic("Kernel requires i486+ for 'invlpg' and other features"); 152 panic("Kernel requires i486+ for 'invlpg' and other features");
143#endif 153#endif
@@ -170,6 +180,7 @@ void __init check_bugs(void)
170 check_fpu(); 180 check_fpu();
171 check_hlt(); 181 check_hlt();
172 check_popad(); 182 check_popad();
173 init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); 183 init_utsname()->machine[1] =
184 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
174 alternative_instructions(); 185 alternative_instructions();
175} 186}
diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
index 9a3ed0649d4e..9a3ed0649d4e 100644
--- a/arch/x86/kernel/bugs_64.c
+++ b/arch/x86/kernel/cpu/bugs_64.c
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
new file mode 100644
index 000000000000..13526fd5cce1
--- /dev/null
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -0,0 +1,43 @@
1#include <linux/init.h>
2#include <linux/smp.h>
3
4#include <asm/cpufeature.h>
5#include <asm/processor.h>
6
7#include "cpu.h"
8
9static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
10{
11 if (c->x86 == 0x6 && c->x86_model >= 0xf)
12 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
13}
14
15static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
16{
17 /* Cache sizes */
18 unsigned n;
19
20 n = c->extended_cpuid_level;
21 if (n >= 0x80000008) {
22 unsigned eax = cpuid_eax(0x80000008);
23 c->x86_virt_bits = (eax >> 8) & 0xff;
24 c->x86_phys_bits = eax & 0xff;
25 }
26
27 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
28 c->x86_cache_alignment = c->x86_clflush_size * 2;
29 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
30 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
31 }
32 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
33}
34
35static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
36 .c_vendor = "Centaur",
37 .c_ident = { "CentaurHauls" },
38 .c_early_init = early_init_centaur,
39 .c_init = init_centaur,
40};
41
42cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev);
43
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 783691b2a738..4d894e8565fe 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,3 +1,6 @@
1#ifndef ARCH_X86_CPU_H
2
3#define ARCH_X86_CPU_H
1 4
2struct cpu_model_info { 5struct cpu_model_info {
3 int vendor; 6 int vendor;
@@ -36,3 +39,5 @@ extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
36 39
37extern int get_model_name(struct cpuinfo_x86 *c); 40extern int get_model_name(struct cpuinfo_x86 *c);
38extern void display_cacheinfo(struct cpuinfo_x86 *c); 41extern void display_cacheinfo(struct cpuinfo_x86 *c);
42
43#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index f03e9153618e..965ea52767ac 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -26,9 +26,10 @@
26#define NFORCE2_SAFE_DISTANCE 50 26#define NFORCE2_SAFE_DISTANCE 50
27 27
28/* Delay in ms between FSB changes */ 28/* Delay in ms between FSB changes */
29//#define NFORCE2_DELAY 10 29/* #define NFORCE2_DELAY 10 */
30 30
31/* nforce2_chipset: 31/*
32 * nforce2_chipset:
32 * FSB is changed using the chipset 33 * FSB is changed using the chipset
33 */ 34 */
34static struct pci_dev *nforce2_chipset_dev; 35static struct pci_dev *nforce2_chipset_dev;
@@ -36,13 +37,13 @@ static struct pci_dev *nforce2_chipset_dev;
36/* fid: 37/* fid:
37 * multiplier * 10 38 * multiplier * 10
38 */ 39 */
39static int fid = 0; 40static int fid;
40 41
41/* min_fsb, max_fsb: 42/* min_fsb, max_fsb:
42 * minimum and maximum FSB (= FSB at boot time) 43 * minimum and maximum FSB (= FSB at boot time)
43 */ 44 */
44static int min_fsb = 0; 45static int min_fsb;
45static int max_fsb = 0; 46static int max_fsb;
46 47
47MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); 48MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
48MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); 49MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
@@ -53,7 +54,7 @@ module_param(min_fsb, int, 0444);
53 54
54MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); 55MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
55MODULE_PARM_DESC(min_fsb, 56MODULE_PARM_DESC(min_fsb,
56 "Minimum FSB to use, if not defined: current FSB - 50"); 57 "Minimum FSB to use, if not defined: current FSB - 50");
57 58
58#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) 59#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
59 60
@@ -139,7 +140,7 @@ static unsigned int nforce2_fsb_read(int bootfsb)
139 140
140 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ 141 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
141 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 142 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
142 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); 143 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL);
143 if (!nforce2_sub5) 144 if (!nforce2_sub5)
144 return 0; 145 return 0;
145 146
@@ -147,13 +148,13 @@ static unsigned int nforce2_fsb_read(int bootfsb)
147 fsb /= 1000000; 148 fsb /= 1000000;
148 149
149 /* Check if PLL register is already set */ 150 /* Check if PLL register is already set */
150 pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); 151 pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
151 152
152 if(bootfsb || !temp) 153 if (bootfsb || !temp)
153 return fsb; 154 return fsb;
154 155
155 /* Use PLL register FSB value */ 156 /* Use PLL register FSB value */
156 pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); 157 pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp);
157 fsb = nforce2_calc_fsb(temp); 158 fsb = nforce2_calc_fsb(temp);
158 159
159 return fsb; 160 return fsb;
@@ -184,7 +185,7 @@ static int nforce2_set_fsb(unsigned int fsb)
184 } 185 }
185 186
186 /* First write? Then set actual value */ 187 /* First write? Then set actual value */
187 pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); 188 pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
188 if (!temp) { 189 if (!temp) {
189 pll = nforce2_calc_pll(tfsb); 190 pll = nforce2_calc_pll(tfsb);
190 191
@@ -210,7 +211,8 @@ static int nforce2_set_fsb(unsigned int fsb)
210 tfsb--; 211 tfsb--;
211 212
212 /* Calculate the PLL reg. value */ 213 /* Calculate the PLL reg. value */
213 if ((pll = nforce2_calc_pll(tfsb)) == -1) 214 pll = nforce2_calc_pll(tfsb);
215 if (pll == -1)
214 return -EINVAL; 216 return -EINVAL;
215 217
216 nforce2_write_pll(pll); 218 nforce2_write_pll(pll);
@@ -249,7 +251,7 @@ static unsigned int nforce2_get(unsigned int cpu)
249static int nforce2_target(struct cpufreq_policy *policy, 251static int nforce2_target(struct cpufreq_policy *policy,
250 unsigned int target_freq, unsigned int relation) 252 unsigned int target_freq, unsigned int relation)
251{ 253{
252// unsigned long flags; 254/* unsigned long flags; */
253 struct cpufreq_freqs freqs; 255 struct cpufreq_freqs freqs;
254 unsigned int target_fsb; 256 unsigned int target_fsb;
255 257
@@ -271,17 +273,17 @@ static int nforce2_target(struct cpufreq_policy *policy,
271 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 273 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
272 274
273 /* Disable IRQs */ 275 /* Disable IRQs */
274 //local_irq_save(flags); 276 /* local_irq_save(flags); */
275 277
276 if (nforce2_set_fsb(target_fsb) < 0) 278 if (nforce2_set_fsb(target_fsb) < 0)
277 printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", 279 printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
278 target_fsb); 280 target_fsb);
279 else 281 else
280 dprintk("Changed FSB successfully to %d\n", 282 dprintk("Changed FSB successfully to %d\n",
281 target_fsb); 283 target_fsb);
282 284
283 /* Enable IRQs */ 285 /* Enable IRQs */
284 //local_irq_restore(flags); 286 /* local_irq_restore(flags); */
285 287
286 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 288 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
287 289
@@ -302,8 +304,8 @@ static int nforce2_verify(struct cpufreq_policy *policy)
302 policy->max = (fsb_pol_max + 1) * fid * 100; 304 policy->max = (fsb_pol_max + 1) * fid * 100;
303 305
304 cpufreq_verify_within_limits(policy, 306 cpufreq_verify_within_limits(policy,
305 policy->cpuinfo.min_freq, 307 policy->cpuinfo.min_freq,
306 policy->cpuinfo.max_freq); 308 policy->cpuinfo.max_freq);
307 return 0; 309 return 0;
308} 310}
309 311
@@ -347,7 +349,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
347 /* Set maximum FSB to FSB at boot time */ 349 /* Set maximum FSB to FSB at boot time */
348 max_fsb = nforce2_fsb_read(1); 350 max_fsb = nforce2_fsb_read(1);
349 351
350 if(!max_fsb) 352 if (!max_fsb)
351 return -EIO; 353 return -EIO;
352 354
353 if (!min_fsb) 355 if (!min_fsb)
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
new file mode 100644
index 000000000000..fcb1cc9d75ca
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -0,0 +1,103 @@
1#include <linux/init.h>
2#include <linux/smp.h>
3#include <asm/processor.h>
4#include <asm/ptrace.h>
5#include <asm/topology.h>
6#include <asm/numa_64.h>
7
8#include "cpu.h"
9
10static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
11{
12 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
13 (c->x86 == 0x6 && c->x86_model >= 0x0e))
14 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
15}
16
17/*
18 * find out the number of processor cores on the die
19 */
20static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
21{
22 unsigned int eax, t;
23
24 if (c->cpuid_level < 4)
25 return 1;
26
27 cpuid_count(4, 0, &eax, &t, &t, &t);
28
29 if (eax & 0x1f)
30 return ((eax >> 26) + 1);
31 else
32 return 1;
33}
34
35static void __cpuinit srat_detect_node(void)
36{
37#ifdef CONFIG_NUMA
38 unsigned node;
39 int cpu = smp_processor_id();
40 int apicid = hard_smp_processor_id();
41
42 /* Don't do the funky fallback heuristics the AMD version employs
43 for now. */
44 node = apicid_to_node[apicid];
45 if (node == NUMA_NO_NODE || !node_online(node))
46 node = first_node(node_online_map);
47 numa_set_node(cpu, node);
48
49 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
50#endif
51}
52
53static void __cpuinit init_intel(struct cpuinfo_x86 *c)
54{
55 /* Cache sizes */
56 unsigned n;
57
58 init_intel_cacheinfo(c);
59 if (c->cpuid_level > 9) {
60 unsigned eax = cpuid_eax(10);
61 /* Check for version and the number of counters */
62 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
63 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
64 }
65
66 if (cpu_has_ds) {
67 unsigned int l1, l2;
68 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
69 if (!(l1 & (1<<11)))
70 set_cpu_cap(c, X86_FEATURE_BTS);
71 if (!(l1 & (1<<12)))
72 set_cpu_cap(c, X86_FEATURE_PEBS);
73 }
74
75
76 if (cpu_has_bts)
77 ds_init_intel(c);
78
79 n = c->extended_cpuid_level;
80 if (n >= 0x80000008) {
81 unsigned eax = cpuid_eax(0x80000008);
82 c->x86_virt_bits = (eax >> 8) & 0xff;
83 c->x86_phys_bits = eax & 0xff;
84 }
85
86 if (c->x86 == 15)
87 c->x86_cache_alignment = c->x86_clflush_size * 2;
88 if (c->x86 == 6)
89 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
90 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
91 c->x86_max_cores = intel_num_cpu_cores(c);
92
93 srat_detect_node();
94}
95
96static struct cpu_dev intel_cpu_dev __cpuinitdata = {
97 .c_vendor = "Intel",
98 .c_ident = { "GenuineIntel" },
99 .c_early_init = early_init_intel,
100 .c_init = init_intel,
101};
102cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
103
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 26d615dcb149..2c8afafa18e8 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -62,6 +62,7 @@ static struct _cache_table cache_table[] __cpuinitdata =
62 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 62 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
63 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ 63 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */
64 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ 64 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */
65 { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */
65 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 66 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
66 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 67 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
67 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 68 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index e633c9c2b764..f390c9f66351 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -9,23 +9,23 @@
9#include <linux/interrupt.h> 9#include <linux/interrupt.h>
10#include <linux/smp.h> 10#include <linux/smp.h>
11 11
12#include <asm/processor.h> 12#include <asm/processor.h>
13#include <asm/system.h> 13#include <asm/system.h>
14#include <asm/msr.h> 14#include <asm/msr.h>
15 15
16#include "mce.h" 16#include "mce.h"
17 17
18/* Machine Check Handler For AMD Athlon/Duron */ 18/* Machine Check Handler For AMD Athlon/Duron */
19static void k7_machine_check(struct pt_regs * regs, long error_code) 19static void k7_machine_check(struct pt_regs *regs, long error_code)
20{ 20{
21 int recover=1; 21 int recover = 1;
22 u32 alow, ahigh, high, low; 22 u32 alow, ahigh, high, low;
23 u32 mcgstl, mcgsth; 23 u32 mcgstl, mcgsth;
24 int i; 24 int i;
25 25
26 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 26 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
27 if (mcgstl & (1<<0)) /* Recoverable ? */ 27 if (mcgstl & (1<<0)) /* Recoverable ? */
28 recover=0; 28 recover = 0;
29 29
30 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", 30 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
31 smp_processor_id(), mcgsth, mcgstl); 31 smp_processor_id(), mcgsth, mcgstl);
@@ -60,12 +60,12 @@ static void k7_machine_check(struct pt_regs * regs, long error_code)
60 } 60 }
61 61
62 if (recover&2) 62 if (recover&2)
63 panic ("CPU context corrupt"); 63 panic("CPU context corrupt");
64 if (recover&1) 64 if (recover&1)
65 panic ("Unable to continue"); 65 panic("Unable to continue");
66 printk (KERN_EMERG "Attempting to continue.\n"); 66 printk(KERN_EMERG "Attempting to continue.\n");
67 mcgstl &= ~(1<<2); 67 mcgstl &= ~(1<<2);
68 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); 68 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
69} 69}
70 70
71 71
@@ -81,25 +81,25 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
81 machine_check_vector = k7_machine_check; 81 machine_check_vector = k7_machine_check;
82 wmb(); 82 wmb();
83 83
84 printk (KERN_INFO "Intel machine check architecture supported.\n"); 84 printk(KERN_INFO "Intel machine check architecture supported.\n");
85 rdmsr (MSR_IA32_MCG_CAP, l, h); 85 rdmsr(MSR_IA32_MCG_CAP, l, h);
86 if (l & (1<<8)) /* Control register present ? */ 86 if (l & (1<<8)) /* Control register present ? */
87 wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 87 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
88 nr_mce_banks = l & 0xff; 88 nr_mce_banks = l & 0xff;
89 89
90 /* Clear status for MC index 0 separately, we don't touch CTL, 90 /* Clear status for MC index 0 separately, we don't touch CTL,
91 * as some K7 Athlons cause spurious MCEs when its enabled. */ 91 * as some K7 Athlons cause spurious MCEs when its enabled. */
92 if (boot_cpu_data.x86 == 6) { 92 if (boot_cpu_data.x86 == 6) {
93 wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); 93 wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
94 i = 1; 94 i = 1;
95 } else 95 } else
96 i = 0; 96 i = 0;
97 for (; i<nr_mce_banks; i++) { 97 for (; i < nr_mce_banks; i++) {
98 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); 98 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
99 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); 99 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
100 } 100 }
101 101
102 set_in_cr4 (X86_CR4_MCE); 102 set_in_cr4(X86_CR4_MCE);
103 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", 103 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
104 smp_processor_id()); 104 smp_processor_id());
105} 105}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index e07e8c068ae0..501ca1cea27d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -31,7 +31,7 @@
31#include <asm/idle.h> 31#include <asm/idle.h>
32 32
33#define MISC_MCELOG_MINOR 227 33#define MISC_MCELOG_MINOR 227
34#define NR_BANKS 6 34#define NR_SYSFS_BANKS 6
35 35
36atomic_t mce_entry; 36atomic_t mce_entry;
37 37
@@ -46,7 +46,7 @@ static int mce_dont_init;
46 */ 46 */
47static int tolerant = 1; 47static int tolerant = 1;
48static int banks; 48static int banks;
49static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; 49static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL };
50static unsigned long notify_user; 50static unsigned long notify_user;
51static int rip_msr; 51static int rip_msr;
52static int mce_bootlog = -1; 52static int mce_bootlog = -1;
@@ -209,7 +209,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
209 barrier(); 209 barrier();
210 210
211 for (i = 0; i < banks; i++) { 211 for (i = 0; i < banks; i++) {
212 if (!bank[i]) 212 if (i < NR_SYSFS_BANKS && !bank[i])
213 continue; 213 continue;
214 214
215 m.misc = 0; 215 m.misc = 0;
@@ -444,9 +444,10 @@ static void mce_init(void *dummy)
444 444
445 rdmsrl(MSR_IA32_MCG_CAP, cap); 445 rdmsrl(MSR_IA32_MCG_CAP, cap);
446 banks = cap & 0xff; 446 banks = cap & 0xff;
447 if (banks > NR_BANKS) { 447 if (banks > MCE_EXTENDED_BANK) {
448 printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); 448 banks = MCE_EXTENDED_BANK;
449 banks = NR_BANKS; 449 printk(KERN_INFO "MCE: warning: using only %d banks\n",
450 MCE_EXTENDED_BANK);
450 } 451 }
451 /* Use accurate RIP reporting if available. */ 452 /* Use accurate RIP reporting if available. */
452 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) 453 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
@@ -462,7 +463,11 @@ static void mce_init(void *dummy)
462 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 463 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
463 464
464 for (i = 0; i < banks; i++) { 465 for (i = 0; i < banks; i++) {
465 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); 466 if (i < NR_SYSFS_BANKS)
467 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
468 else
469 wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
470
466 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 471 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
467 } 472 }
468} 473}
@@ -766,7 +771,10 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
766 } \ 771 } \
767 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); 772 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
768 773
769/* TBD should generate these dynamically based on number of available banks */ 774/*
775 * TBD should generate these dynamically based on number of available banks.
776 * Have only 6 contol banks in /sysfs until then.
777 */
770ACCESSOR(bank0ctl,bank[0],mce_restart()) 778ACCESSOR(bank0ctl,bank[0],mce_restart())
771ACCESSOR(bank1ctl,bank[1],mce_restart()) 779ACCESSOR(bank1ctl,bank[1],mce_restart())
772ACCESSOR(bank2ctl,bank[2],mce_restart()) 780ACCESSOR(bank2ctl,bank[2],mce_restart())
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index cb03345554a5..eef001ad3bde 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -8,7 +8,7 @@
8#include <linux/interrupt.h> 8#include <linux/interrupt.h>
9#include <linux/smp.h> 9#include <linux/smp.h>
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/apic.h> 14#include <asm/apic.h>
@@ -32,12 +32,12 @@ struct intel_mce_extended_msrs {
32 /* u32 *reserved[]; */ 32 /* u32 *reserved[]; */
33}; 33};
34 34
35static int mce_num_extended_msrs = 0; 35static int mce_num_extended_msrs;
36 36
37 37
38#ifdef CONFIG_X86_MCE_P4THERMAL 38#ifdef CONFIG_X86_MCE_P4THERMAL
39static void unexpected_thermal_interrupt(struct pt_regs *regs) 39static void unexpected_thermal_interrupt(struct pt_regs *regs)
40{ 40{
41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", 41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
42 smp_processor_id()); 42 smp_processor_id());
43 add_taint(TAINT_MACHINE_CHECK); 43 add_taint(TAINT_MACHINE_CHECK);
@@ -83,7 +83,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
83 * be some SMM goo which handles it, so we can't even put a handler 83 * be some SMM goo which handles it, so we can't even put a handler
84 * since it might be delivered via SMI already -zwanem. 84 * since it might be delivered via SMI already -zwanem.
85 */ 85 */
86 rdmsr (MSR_IA32_MISC_ENABLE, l, h); 86 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
87 h = apic_read(APIC_LVTTHMR); 87 h = apic_read(APIC_LVTTHMR);
88 if ((l & (1<<3)) && (h & APIC_DM_SMI)) { 88 if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", 89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
@@ -91,7 +91,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
91 return; /* -EBUSY */ 91 return; /* -EBUSY */
92 } 92 }
93 93
94 /* check whether a vector already exists, temporarily masked? */ 94 /* check whether a vector already exists, temporarily masked? */
95 if (h & APIC_VECTOR_MASK) { 95 if (h & APIC_VECTOR_MASK) {
96 printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " 96 printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
97 "installed\n", 97 "installed\n",
@@ -104,18 +104,18 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
104 h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ 104 h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
105 apic_write_around(APIC_LVTTHMR, h); 105 apic_write_around(APIC_LVTTHMR, h);
106 106
107 rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); 107 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
108 wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); 108 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
109 109
110 /* ok we're good to go... */ 110 /* ok we're good to go... */
111 vendor_thermal_interrupt = intel_thermal_interrupt; 111 vendor_thermal_interrupt = intel_thermal_interrupt;
112
113 rdmsr (MSR_IA32_MISC_ENABLE, l, h);
114 wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
115 112
116 l = apic_read (APIC_LVTTHMR); 113 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
117 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 114 wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
118 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); 115
116 l = apic_read(APIC_LVTTHMR);
117 apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
118 printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
119 119
120 /* enable thermal throttle processing */ 120 /* enable thermal throttle processing */
121 atomic_set(&therm_throt_en, 1); 121 atomic_set(&therm_throt_en, 1);
@@ -129,28 +129,28 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
129{ 129{
130 u32 h; 130 u32 h;
131 131
132 rdmsr (MSR_IA32_MCG_EAX, r->eax, h); 132 rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
133 rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); 133 rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
134 rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); 134 rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
135 rdmsr (MSR_IA32_MCG_EDX, r->edx, h); 135 rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
136 rdmsr (MSR_IA32_MCG_ESI, r->esi, h); 136 rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
137 rdmsr (MSR_IA32_MCG_EDI, r->edi, h); 137 rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
138 rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); 138 rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
139 rdmsr (MSR_IA32_MCG_ESP, r->esp, h); 139 rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
140 rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); 140 rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
141 rdmsr (MSR_IA32_MCG_EIP, r->eip, h); 141 rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
142} 142}
143 143
144static void intel_machine_check(struct pt_regs * regs, long error_code) 144static void intel_machine_check(struct pt_regs *regs, long error_code)
145{ 145{
146 int recover=1; 146 int recover = 1;
147 u32 alow, ahigh, high, low; 147 u32 alow, ahigh, high, low;
148 u32 mcgstl, mcgsth; 148 u32 mcgstl, mcgsth;
149 int i; 149 int i;
150 150
151 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 151 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
152 if (mcgstl & (1<<0)) /* Recoverable ? */ 152 if (mcgstl & (1<<0)) /* Recoverable ? */
153 recover=0; 153 recover = 0;
154 154
155 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", 155 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
156 smp_processor_id(), mcgsth, mcgstl); 156 smp_processor_id(), mcgsth, mcgstl);
@@ -191,20 +191,20 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
191 } 191 }
192 192
193 if (recover & 2) 193 if (recover & 2)
194 panic ("CPU context corrupt"); 194 panic("CPU context corrupt");
195 if (recover & 1) 195 if (recover & 1)
196 panic ("Unable to continue"); 196 panic("Unable to continue");
197 197
198 printk(KERN_EMERG "Attempting to continue.\n"); 198 printk(KERN_EMERG "Attempting to continue.\n");
199 /* 199 /*
200 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 200 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
201 * recoverable/continuable.This will allow BIOS to look at the MSRs 201 * recoverable/continuable.This will allow BIOS to look at the MSRs
202 * for errors if the OS could not log the error. 202 * for errors if the OS could not log the error.
203 */ 203 */
204 for (i=0; i<nr_mce_banks; i++) { 204 for (i = 0; i < nr_mce_banks; i++) {
205 u32 msr; 205 u32 msr;
206 msr = MSR_IA32_MC0_STATUS+i*4; 206 msr = MSR_IA32_MC0_STATUS+i*4;
207 rdmsr (msr, low, high); 207 rdmsr(msr, low, high);
208 if (high&(1<<31)) { 208 if (high&(1<<31)) {
209 /* Clear it */ 209 /* Clear it */
210 wrmsr(msr, 0UL, 0UL); 210 wrmsr(msr, 0UL, 0UL);
@@ -214,7 +214,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
214 } 214 }
215 } 215 }
216 mcgstl &= ~(1<<2); 216 mcgstl &= ~(1<<2);
217 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); 217 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
218} 218}
219 219
220 220
@@ -222,30 +222,30 @@ void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
222{ 222{
223 u32 l, h; 223 u32 l, h;
224 int i; 224 int i;
225 225
226 machine_check_vector = intel_machine_check; 226 machine_check_vector = intel_machine_check;
227 wmb(); 227 wmb();
228 228
229 printk (KERN_INFO "Intel machine check architecture supported.\n"); 229 printk(KERN_INFO "Intel machine check architecture supported.\n");
230 rdmsr (MSR_IA32_MCG_CAP, l, h); 230 rdmsr(MSR_IA32_MCG_CAP, l, h);
231 if (l & (1<<8)) /* Control register present ? */ 231 if (l & (1<<8)) /* Control register present ? */
232 wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 232 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
233 nr_mce_banks = l & 0xff; 233 nr_mce_banks = l & 0xff;
234 234
235 for (i=0; i<nr_mce_banks; i++) { 235 for (i = 0; i < nr_mce_banks; i++) {
236 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); 236 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
237 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); 237 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
238 } 238 }
239 239
240 set_in_cr4 (X86_CR4_MCE); 240 set_in_cr4(X86_CR4_MCE);
241 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", 241 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
242 smp_processor_id()); 242 smp_processor_id());
243 243
244 /* Check for P4/Xeon extended MCE MSRs */ 244 /* Check for P4/Xeon extended MCE MSRs */
245 rdmsr (MSR_IA32_MCG_CAP, l, h); 245 rdmsr(MSR_IA32_MCG_CAP, l, h);
246 if (l & (1<<9)) {/* MCG_EXT_P */ 246 if (l & (1<<9)) {/* MCG_EXT_P */
247 mce_num_extended_msrs = (l >> 16) & 0xff; 247 mce_num_extended_msrs = (l >> 16) & 0xff;
248 printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" 248 printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
249 " available\n", 249 " available\n",
250 smp_processor_id(), mce_num_extended_msrs); 250 smp_processor_id(), mce_num_extended_msrs);
251 251
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 9f51e1ea9e82..84fd9f2a28ff 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -98,17 +98,6 @@ static void __init nvidia_bugs(int num, int slot, int func)
98 98
99} 99}
100 100
101static void __init ati_bugs(int num, int slot, int func)
102{
103#ifdef CONFIG_X86_IO_APIC
104 if (timer_over_8254 == 1) {
105 timer_over_8254 = 0;
106 printk(KERN_INFO
107 "ATI board detected. Disabling timer routing over 8254.\n");
108 }
109#endif
110}
111
112#define QFLAG_APPLY_ONCE 0x1 101#define QFLAG_APPLY_ONCE 0x1
113#define QFLAG_APPLIED 0x2 102#define QFLAG_APPLIED 0x2
114#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) 103#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -126,8 +115,6 @@ static struct chipset early_qrk[] __initdata = {
126 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, 115 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
127 { PCI_VENDOR_ID_VIA, PCI_ANY_ID, 116 { PCI_VENDOR_ID_VIA, PCI_ANY_ID,
128 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, 117 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
129 { PCI_VENDOR_ID_ATI, PCI_ANY_ID,
130 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, ati_bugs },
131 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, 118 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
132 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, 119 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
133 {} 120 {}
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 5d23d85624d4..4b63c8e1f13b 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -49,13 +49,13 @@ void efi_call_phys_prelog(void)
49 local_irq_save(efi_rt_eflags); 49 local_irq_save(efi_rt_eflags);
50 50
51 /* 51 /*
52 * If I don't have PSE, I should just duplicate two entries in page 52 * If I don't have PAE, I should just duplicate two entries in page
53 * directory. If I have PSE, I just need to duplicate one entry in 53 * directory. If I have PAE, I just need to duplicate one entry in
54 * page directory. 54 * page directory.
55 */ 55 */
56 cr4 = read_cr4(); 56 cr4 = read_cr4();
57 57
58 if (cr4 & X86_CR4_PSE) { 58 if (cr4 & X86_CR4_PAE) {
59 efi_bak_pg_dir_pointer[0].pgd = 59 efi_bak_pg_dir_pointer[0].pgd =
60 swapper_pg_dir[pgd_index(0)].pgd; 60 swapper_pg_dir[pgd_index(0)].pgd;
61 swapper_pg_dir[0].pgd = 61 swapper_pg_dir[0].pgd =
@@ -93,7 +93,7 @@ void efi_call_phys_epilog(void)
93 93
94 cr4 = read_cr4(); 94 cr4 = read_cr4();
95 95
96 if (cr4 & X86_CR4_PSE) { 96 if (cr4 & X86_CR4_PAE) {
97 swapper_pg_dir[pgd_index(0)].pgd = 97 swapper_pg_dir[pgd_index(0)].pgd =
98 efi_bak_pg_dir_pointer[0].pgd; 98 efi_bak_pg_dir_pointer[0].pgd;
99 } else { 99 } else {
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df522a7..5cf0aa993f4f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -420,7 +420,6 @@ END(\label)
420 PTREGSCALL stub_clone, sys_clone, %r8 420 PTREGSCALL stub_clone, sys_clone, %r8
421 PTREGSCALL stub_fork, sys_fork, %rdi 421 PTREGSCALL stub_fork, sys_fork, %rdi
422 PTREGSCALL stub_vfork, sys_vfork, %rdi 422 PTREGSCALL stub_vfork, sys_vfork, %rdi
423 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
424 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx 423 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
425 PTREGSCALL stub_iopl, sys_iopl, %rsi 424 PTREGSCALL stub_iopl, sys_iopl, %rsi
426 425
@@ -1120,10 +1119,6 @@ ENTRY(coprocessor_segment_overrun)
1120 zeroentry do_coprocessor_segment_overrun 1119 zeroentry do_coprocessor_segment_overrun
1121END(coprocessor_segment_overrun) 1120END(coprocessor_segment_overrun)
1122 1121
1123ENTRY(reserved)
1124 zeroentry do_reserved
1125END(reserved)
1126
1127 /* runs on exception stack */ 1122 /* runs on exception stack */
1128ENTRY(double_fault) 1123ENTRY(double_fault)
1129 XCPT_FRAME 1124 XCPT_FRAME
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index e8edd63ab000..9b08e852fd1a 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -166,6 +166,8 @@ int geode_has_vsa2(void)
166 static int has_vsa2 = -1; 166 static int has_vsa2 = -1;
167 167
168 if (has_vsa2 == -1) { 168 if (has_vsa2 == -1) {
169 u16 val;
170
169 /* 171 /*
170 * The VSA has virtual registers that we can query for a 172 * The VSA has virtual registers that we can query for a
171 * signature. 173 * signature.
@@ -173,7 +175,8 @@ int geode_has_vsa2(void)
173 outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); 175 outw(VSA_VR_UNLOCK, VSA_VRC_INDEX);
174 outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); 176 outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX);
175 177
176 has_vsa2 = (inw(VSA_VRC_DATA) == VSA_SIG); 178 val = inw(VSA_VRC_DATA);
179 has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG);
177 } 180 }
178 181
179 return has_vsa2; 182 return has_vsa2;
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 10a1955bb1d1..263b9d14753e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -18,6 +18,7 @@
18#include <asm/page.h> 18#include <asm/page.h>
19#include <asm/msr.h> 19#include <asm/msr.h>
20#include <asm/cache.h> 20#include <asm/cache.h>
21#include <asm/processor-flags.h>
21 22
22#ifdef CONFIG_PARAVIRT 23#ifdef CONFIG_PARAVIRT
23#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
@@ -128,7 +129,7 @@ ident_complete:
128 /* Fixup phys_base */ 129 /* Fixup phys_base */
129 addq %rbp, phys_base(%rip) 130 addq %rbp, phys_base(%rip)
130 131
131#ifdef CONFIG_SMP 132#ifdef CONFIG_X86_TRAMPOLINE
132 addq %rbp, trampoline_level4_pgt + 0(%rip) 133 addq %rbp, trampoline_level4_pgt + 0(%rip)
133 addq %rbp, trampoline_level4_pgt + (511*8)(%rip) 134 addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
134#endif 135#endif
@@ -154,9 +155,7 @@ ENTRY(secondary_startup_64)
154 */ 155 */
155 156
156 /* Enable PAE mode and PGE */ 157 /* Enable PAE mode and PGE */
157 xorq %rax, %rax 158 movl $(X86_CR4_PAE | X86_CR4_PGE), %eax
158 btsq $5, %rax
159 btsq $7, %rax
160 movq %rax, %cr4 159 movq %rax, %cr4
161 160
162 /* Setup early boot stage 4 level pagetables. */ 161 /* Setup early boot stage 4 level pagetables. */
@@ -184,14 +183,10 @@ ENTRY(secondary_startup_64)
1841: wrmsr /* Make changes effective */ 1831: wrmsr /* Make changes effective */
185 184
186 /* Setup cr0 */ 185 /* Setup cr0 */
187#define CR0_PM 1 /* protected mode */ 186#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
188#define CR0_MP (1<<1) 187 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
189#define CR0_ET (1<<4) 188 X86_CR0_PG)
190#define CR0_NE (1<<5) 189 movl $CR0_STATE, %eax
191#define CR0_WP (1<<16)
192#define CR0_AM (1<<18)
193#define CR0_PAGING (1<<31)
194 movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax
195 /* Make changes effective */ 190 /* Make changes effective */
196 movq %rax, %cr0 191 movq %rax, %cr0
197 192
@@ -327,11 +322,11 @@ early_idt_ripmsg:
327ENTRY(name) 322ENTRY(name)
328 323
329/* Automate the creation of 1 to 1 mapping pmd entries */ 324/* Automate the creation of 1 to 1 mapping pmd entries */
330#define PMDS(START, PERM, COUNT) \ 325#define PMDS(START, PERM, COUNT) \
331 i = 0 ; \ 326 i = 0 ; \
332 .rept (COUNT) ; \ 327 .rept (COUNT) ; \
333 .quad (START) + (i << 21) + (PERM) ; \ 328 .quad (START) + (i << PMD_SHIFT) + (PERM) ; \
334 i = i + 1 ; \ 329 i = i + 1 ; \
335 .endr 330 .endr
336 331
337 /* 332 /*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 9b5cfcdfc426..ea230ec69057 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -17,7 +17,7 @@
17 17
18/* FSEC = 10^-15 18/* FSEC = 10^-15
19 NSEC = 10^-9 */ 19 NSEC = 10^-9 */
20#define FSEC_PER_NSEC 1000000 20#define FSEC_PER_NSEC 1000000L
21 21
22/* 22/*
23 * HPET address is set in acpi/boot.c, when an ACPI entry exists 23 * HPET address is set in acpi/boot.c, when an ACPI entry exists
@@ -206,20 +206,19 @@ static void hpet_enable_legacy_int(void)
206 206
207static void hpet_legacy_clockevent_register(void) 207static void hpet_legacy_clockevent_register(void)
208{ 208{
209 uint64_t hpet_freq;
210
211 /* Start HPET legacy interrupts */ 209 /* Start HPET legacy interrupts */
212 hpet_enable_legacy_int(); 210 hpet_enable_legacy_int();
213 211
214 /* 212 /*
215 * The period is a femto seconds value. We need to calculate the 213 * The mult factor is defined as (include/linux/clockchips.h)
216 * scaled math multiplication factor for nanosecond to hpet tick 214 * mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h)
217 * conversion. 215 * hpet_period is in units of femtoseconds (per cycle), so
216 * mult/2^shift = cyc/ns = 10^6/hpet_period
217 * mult = (10^6 * 2^shift)/hpet_period
218 * mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period
218 */ 219 */
219 hpet_freq = 1000000000000000ULL; 220 hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC,
220 do_div(hpet_freq, hpet_period); 221 hpet_period, hpet_clockevent.shift);
221 hpet_clockevent.mult = div_sc((unsigned long) hpet_freq,
222 NSEC_PER_SEC, hpet_clockevent.shift);
223 /* Calculate the min / max delta */ 222 /* Calculate the min / max delta */
224 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, 223 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
225 &hpet_clockevent); 224 &hpet_clockevent);
@@ -324,7 +323,7 @@ static struct clocksource clocksource_hpet = {
324 323
325static int hpet_clocksource_register(void) 324static int hpet_clocksource_register(void)
326{ 325{
327 u64 tmp, start, now; 326 u64 start, now;
328 cycle_t t1; 327 cycle_t t1;
329 328
330 /* Start the counter */ 329 /* Start the counter */
@@ -351,21 +350,15 @@ static int hpet_clocksource_register(void)
351 return -ENODEV; 350 return -ENODEV;
352 } 351 }
353 352
354 /* Initialize and register HPET clocksource 353 /*
355 * 354 * The definition of mult is (include/linux/clocksource.h)
356 * hpet period is in femto seconds per cycle 355 * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc
357 * so we need to convert this to ns/cyc units 356 * so we first need to convert hpet_period to ns/cyc units:
358 * approximated by mult/2^shift 357 * mult/2^shift = ns/cyc = hpet_period/10^6
359 * 358 * mult = (hpet_period * 2^shift)/10^6
360 * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift 359 * mult = (hpet_period << shift)/FSEC_PER_NSEC
361 * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
362 * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
363 * (fsec/cyc << shift)/1000000 = mult
364 * (hpet_period << shift)/FSEC_PER_NSEC = mult
365 */ 360 */
366 tmp = (u64)hpet_period << HPET_SHIFT; 361 clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT);
367 do_div(tmp, FSEC_PER_NSEC);
368 clocksource_hpet.mult = (u32)tmp;
369 362
370 clocksource_register(&clocksource_hpet); 363 clocksource_register(&clocksource_hpet);
371 364
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index eb9ddd8efb82..95e80e5033c3 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -162,7 +162,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
162 int ret; 162 int ret;
163 163
164 if (!cpu_has_fxsr) 164 if (!cpu_has_fxsr)
165 return -ENODEV; 165 return -EIO;
166 166
167 ret = init_fpu(target); 167 ret = init_fpu(target);
168 if (ret) 168 if (ret)
@@ -179,7 +179,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
179 int ret; 179 int ret;
180 180
181 if (!cpu_has_fxsr) 181 if (!cpu_has_fxsr)
182 return -ENODEV; 182 return -EIO;
183 183
184 ret = init_fpu(target); 184 ret = init_fpu(target);
185 if (ret) 185 if (ret)
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259.c
index fe631967d625..7a0fda8f01b5 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259.c
@@ -1,8 +1,10 @@
1#include <linux/linkage.h>
1#include <linux/errno.h> 2#include <linux/errno.h>
2#include <linux/signal.h> 3#include <linux/signal.h>
3#include <linux/sched.h> 4#include <linux/sched.h>
4#include <linux/ioport.h> 5#include <linux/ioport.h>
5#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/timex.h>
6#include <linux/slab.h> 8#include <linux/slab.h>
7#include <linux/random.h> 9#include <linux/random.h>
8#include <linux/init.h> 10#include <linux/init.h>
@@ -10,10 +12,12 @@
10#include <linux/sysdev.h> 12#include <linux/sysdev.h>
11#include <linux/bitops.h> 13#include <linux/bitops.h>
12 14
15#include <asm/acpi.h>
13#include <asm/atomic.h> 16#include <asm/atomic.h>
14#include <asm/system.h> 17#include <asm/system.h>
15#include <asm/io.h> 18#include <asm/io.h>
16#include <asm/timer.h> 19#include <asm/timer.h>
20#include <asm/hw_irq.h>
17#include <asm/pgtable.h> 21#include <asm/pgtable.h>
18#include <asm/delay.h> 22#include <asm/delay.h>
19#include <asm/desc.h> 23#include <asm/desc.h>
@@ -32,7 +36,7 @@ static int i8259A_auto_eoi;
32DEFINE_SPINLOCK(i8259A_lock); 36DEFINE_SPINLOCK(i8259A_lock);
33static void mask_and_ack_8259A(unsigned int); 37static void mask_and_ack_8259A(unsigned int);
34 38
35static struct irq_chip i8259A_chip = { 39struct irq_chip i8259A_chip = {
36 .name = "XT-PIC", 40 .name = "XT-PIC",
37 .mask = disable_8259A_irq, 41 .mask = disable_8259A_irq,
38 .disable = disable_8259A_irq, 42 .disable = disable_8259A_irq,
@@ -125,14 +129,14 @@ static inline int i8259A_irq_real(unsigned int irq)
125 int irqmask = 1<<irq; 129 int irqmask = 1<<irq;
126 130
127 if (irq < 8) { 131 if (irq < 8) {
128 outb(0x0B,PIC_MASTER_CMD); /* ISR register */ 132 outb(0x0B, PIC_MASTER_CMD); /* ISR register */
129 value = inb(PIC_MASTER_CMD) & irqmask; 133 value = inb(PIC_MASTER_CMD) & irqmask;
130 outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */ 134 outb(0x0A, PIC_MASTER_CMD); /* back to the IRR register */
131 return value; 135 return value;
132 } 136 }
133 outb(0x0B,PIC_SLAVE_CMD); /* ISR register */ 137 outb(0x0B, PIC_SLAVE_CMD); /* ISR register */
134 value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); 138 value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
135 outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ 139 outb(0x0A, PIC_SLAVE_CMD); /* back to the IRR register */
136 return value; 140 return value;
137} 141}
138 142
@@ -171,12 +175,14 @@ handle_real_irq:
171 if (irq & 8) { 175 if (irq & 8) {
172 inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ 176 inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */
173 outb(cached_slave_mask, PIC_SLAVE_IMR); 177 outb(cached_slave_mask, PIC_SLAVE_IMR);
174 outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ 178 /* 'Specific EOI' to slave */
175 outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ 179 outb(0x60+(irq&7), PIC_SLAVE_CMD);
180 /* 'Specific EOI' to master-IRQ2 */
181 outb(0x60+PIC_CASCADE_IR, PIC_MASTER_CMD);
176 } else { 182 } else {
177 inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ 183 inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */
178 outb(cached_master_mask, PIC_MASTER_IMR); 184 outb(cached_master_mask, PIC_MASTER_IMR);
179 outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ 185 outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */
180 } 186 }
181 spin_unlock_irqrestore(&i8259A_lock, flags); 187 spin_unlock_irqrestore(&i8259A_lock, flags);
182 return; 188 return;
@@ -199,7 +205,8 @@ spurious_8259A_irq:
199 * lets ACK and report it. [once per IRQ] 205 * lets ACK and report it. [once per IRQ]
200 */ 206 */
201 if (!(spurious_irq_mask & irqmask)) { 207 if (!(spurious_irq_mask & irqmask)) {
202 printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); 208 printk(KERN_DEBUG
209 "spurious 8259A interrupt: IRQ%d.\n", irq);
203 spurious_irq_mask |= irqmask; 210 spurious_irq_mask |= irqmask;
204 } 211 }
205 atomic_inc(&irq_err_count); 212 atomic_inc(&irq_err_count);
@@ -290,17 +297,34 @@ void init_8259A(int auto_eoi)
290 * outb_pic - this has to work on a wide range of PC hardware. 297 * outb_pic - this has to work on a wide range of PC hardware.
291 */ 298 */
292 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ 299 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
300#ifndef CONFIG_X86_64
293 outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ 301 outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
294 outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ 302 outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */
303#else /* CONFIG_X86_64 */
304 /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
305 outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
306 /* 8259A-1 (the master) has a slave on IR2 */
307 outb_pic(0x04, PIC_MASTER_IMR);
308#endif /* CONFIG_X86_64 */
295 if (auto_eoi) /* master does Auto EOI */ 309 if (auto_eoi) /* master does Auto EOI */
296 outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); 310 outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
297 else /* master expects normal EOI */ 311 else /* master expects normal EOI */
298 outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); 312 outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
299 313
300 outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ 314 outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
315#ifndef CONFIG_X86_64
301 outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ 316 outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
302 outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ 317 outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */
303 outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ 318 outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
319#else /* CONFIG_X86_64 */
320 /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
321 outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
322 /* 8259A-2 is a slave on master's IR2 */
323 outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
324 /* (slave's support for AEOI in flat mode is to be investigated) */
325 outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
326
327#endif /* CONFIG_X86_64 */
304 if (auto_eoi) 328 if (auto_eoi)
305 /* 329 /*
306 * In AEOI mode we just have to mask the interrupt 330 * In AEOI mode we just have to mask the interrupt
@@ -317,93 +341,3 @@ void init_8259A(int auto_eoi)
317 341
318 spin_unlock_irqrestore(&i8259A_lock, flags); 342 spin_unlock_irqrestore(&i8259A_lock, flags);
319} 343}
320
321/*
322 * Note that on a 486, we don't want to do a SIGFPE on an irq13
323 * as the irq is unreliable, and exception 16 works correctly
324 * (ie as explained in the intel literature). On a 386, you
325 * can't use exception 16 due to bad IBM design, so we have to
326 * rely on the less exact irq13.
327 *
328 * Careful.. Not only is IRQ13 unreliable, but it is also
329 * leads to races. IBM designers who came up with it should
330 * be shot.
331 */
332
333
334static irqreturn_t math_error_irq(int cpl, void *dev_id)
335{
336 extern void math_error(void __user *);
337 outb(0,0xF0);
338 if (ignore_fpu_irq || !boot_cpu_data.hard_math)
339 return IRQ_NONE;
340 math_error((void __user *)get_irq_regs()->ip);
341 return IRQ_HANDLED;
342}
343
344/*
345 * New motherboards sometimes make IRQ 13 be a PCI interrupt,
346 * so allow interrupt sharing.
347 */
348static struct irqaction fpu_irq = {
349 .handler = math_error_irq,
350 .mask = CPU_MASK_NONE,
351 .name = "fpu",
352};
353
354void __init init_ISA_irqs (void)
355{
356 int i;
357
358#ifdef CONFIG_X86_LOCAL_APIC
359 init_bsp_APIC();
360#endif
361 init_8259A(0);
362
363 /*
364 * 16 old-style INTA-cycle interrupts:
365 */
366 for (i = 0; i < 16; i++) {
367 set_irq_chip_and_handler_name(i, &i8259A_chip,
368 handle_level_irq, "XT");
369 }
370}
371
372/* Overridden in paravirt.c */
373void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
374
375void __init native_init_IRQ(void)
376{
377 int i;
378
379 /* all the set up before the call gates are initialised */
380 pre_intr_init_hook();
381
382 /*
383 * Cover the whole vector space, no vector can escape
384 * us. (some of these will be overridden and become
385 * 'special' SMP interrupts)
386 */
387 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
388 int vector = FIRST_EXTERNAL_VECTOR + i;
389 if (i >= NR_IRQS)
390 break;
391 /* SYSCALL_VECTOR was reserved in trap_init. */
392 if (!test_bit(vector, used_vectors))
393 set_intr_gate(vector, interrupt[i]);
394 }
395
396 /* setup after call gates are initialised (usually add in
397 * the architecture specific gates)
398 */
399 intr_init_hook();
400
401 /*
402 * External FPU? Set up irq13 if so, for
403 * original braindamaged IBM FERR coupling.
404 */
405 if (boot_cpu_data.hard_math && !cpu_has_fpu)
406 setup_irq(FPU_IRQ, &fpu_irq);
407
408 irq_ctx_init(smp_processor_id());
409}
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
deleted file mode 100644
index fa57a1568508..000000000000
--- a/arch/x86/kernel/i8259_64.c
+++ /dev/null
@@ -1,512 +0,0 @@
1#include <linux/linkage.h>
2#include <linux/errno.h>
3#include <linux/signal.h>
4#include <linux/sched.h>
5#include <linux/ioport.h>
6#include <linux/interrupt.h>
7#include <linux/timex.h>
8#include <linux/slab.h>
9#include <linux/random.h>
10#include <linux/init.h>
11#include <linux/kernel_stat.h>
12#include <linux/sysdev.h>
13#include <linux/bitops.h>
14
15#include <asm/acpi.h>
16#include <asm/atomic.h>
17#include <asm/system.h>
18#include <asm/io.h>
19#include <asm/hw_irq.h>
20#include <asm/pgtable.h>
21#include <asm/delay.h>
22#include <asm/desc.h>
23#include <asm/apic.h>
24#include <asm/i8259.h>
25
26/*
27 * Common place to define all x86 IRQ vectors
28 *
29 * This builds up the IRQ handler stubs using some ugly macros in irq.h
30 *
31 * These macros create the low-level assembly IRQ routines that save
32 * register context and call do_IRQ(). do_IRQ() then does all the
33 * operations that are needed to keep the AT (or SMP IOAPIC)
34 * interrupt-controller happy.
35 */
36
37#define BI(x,y) \
38 BUILD_IRQ(x##y)
39
40#define BUILD_16_IRQS(x) \
41 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
42 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
43 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
44 BI(x,c) BI(x,d) BI(x,e) BI(x,f)
45
46/*
47 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
48 * (these are usually mapped to vectors 0x30-0x3f)
49 */
50
51/*
52 * The IO-APIC gives us many more interrupt sources. Most of these
53 * are unused but an SMP system is supposed to have enough memory ...
54 * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
55 * across the spectrum, so we really want to be prepared to get all
56 * of these. Plus, more powerful systems might have more than 64
57 * IO-APIC registers.
58 *
59 * (these are usually mapped into the 0x30-0xff vector range)
60 */
61 BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
62BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
63BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
64BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
65
66#undef BUILD_16_IRQS
67#undef BI
68
69
70#define IRQ(x,y) \
71 IRQ##x##y##_interrupt
72
73#define IRQLIST_16(x) \
74 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
75 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
76 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
77 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
78
79/* for the irq vectors */
80static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
81 IRQLIST_16(0x2), IRQLIST_16(0x3),
82 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
83 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
84 IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
85};
86
87#undef IRQ
88#undef IRQLIST_16
89
90/*
91 * This is the 'legacy' 8259A Programmable Interrupt Controller,
92 * present in the majority of PC/AT boxes.
93 * plus some generic x86 specific things if generic specifics makes
94 * any sense at all.
95 * this file should become arch/i386/kernel/irq.c when the old irq.c
96 * moves to arch independent land
97 */
98
99static int i8259A_auto_eoi;
100DEFINE_SPINLOCK(i8259A_lock);
101static void mask_and_ack_8259A(unsigned int);
102
103static struct irq_chip i8259A_chip = {
104 .name = "XT-PIC",
105 .mask = disable_8259A_irq,
106 .disable = disable_8259A_irq,
107 .unmask = enable_8259A_irq,
108 .mask_ack = mask_and_ack_8259A,
109};
110
111/*
112 * 8259A PIC functions to handle ISA devices:
113 */
114
115/*
116 * This contains the irq mask for both 8259A irq controllers,
117 */
118unsigned int cached_irq_mask = 0xffff;
119
120/*
121 * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
122 * boards the timer interrupt is not really connected to any IO-APIC pin,
123 * it's fed to the master 8259A's IR0 line only.
124 *
125 * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
126 * this 'mixed mode' IRQ handling costs nothing because it's only used
127 * at IRQ setup time.
128 */
129unsigned long io_apic_irqs;
130
131void disable_8259A_irq(unsigned int irq)
132{
133 unsigned int mask = 1 << irq;
134 unsigned long flags;
135
136 spin_lock_irqsave(&i8259A_lock, flags);
137 cached_irq_mask |= mask;
138 if (irq & 8)
139 outb(cached_slave_mask, PIC_SLAVE_IMR);
140 else
141 outb(cached_master_mask, PIC_MASTER_IMR);
142 spin_unlock_irqrestore(&i8259A_lock, flags);
143}
144
145void enable_8259A_irq(unsigned int irq)
146{
147 unsigned int mask = ~(1 << irq);
148 unsigned long flags;
149
150 spin_lock_irqsave(&i8259A_lock, flags);
151 cached_irq_mask &= mask;
152 if (irq & 8)
153 outb(cached_slave_mask, PIC_SLAVE_IMR);
154 else
155 outb(cached_master_mask, PIC_MASTER_IMR);
156 spin_unlock_irqrestore(&i8259A_lock, flags);
157}
158
159int i8259A_irq_pending(unsigned int irq)
160{
161 unsigned int mask = 1<<irq;
162 unsigned long flags;
163 int ret;
164
165 spin_lock_irqsave(&i8259A_lock, flags);
166 if (irq < 8)
167 ret = inb(PIC_MASTER_CMD) & mask;
168 else
169 ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
170 spin_unlock_irqrestore(&i8259A_lock, flags);
171
172 return ret;
173}
174
175void make_8259A_irq(unsigned int irq)
176{
177 disable_irq_nosync(irq);
178 io_apic_irqs &= ~(1<<irq);
179 set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
180 "XT");
181 enable_irq(irq);
182}
183
184/*
185 * This function assumes to be called rarely. Switching between
186 * 8259A registers is slow.
187 * This has to be protected by the irq controller spinlock
188 * before being called.
189 */
190static inline int i8259A_irq_real(unsigned int irq)
191{
192 int value;
193 int irqmask = 1<<irq;
194
195 if (irq < 8) {
196 outb(0x0B,PIC_MASTER_CMD); /* ISR register */
197 value = inb(PIC_MASTER_CMD) & irqmask;
198 outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */
199 return value;
200 }
201 outb(0x0B,PIC_SLAVE_CMD); /* ISR register */
202 value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
203 outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */
204 return value;
205}
206
207/*
208 * Careful! The 8259A is a fragile beast, it pretty
209 * much _has_ to be done exactly like this (mask it
210 * first, _then_ send the EOI, and the order of EOI
211 * to the two 8259s is important!
212 */
213static void mask_and_ack_8259A(unsigned int irq)
214{
215 unsigned int irqmask = 1 << irq;
216 unsigned long flags;
217
218 spin_lock_irqsave(&i8259A_lock, flags);
219 /*
220 * Lightweight spurious IRQ detection. We do not want
221 * to overdo spurious IRQ handling - it's usually a sign
222 * of hardware problems, so we only do the checks we can
223 * do without slowing down good hardware unnecessarily.
224 *
225 * Note that IRQ7 and IRQ15 (the two spurious IRQs
226 * usually resulting from the 8259A-1|2 PICs) occur
227 * even if the IRQ is masked in the 8259A. Thus we
228 * can check spurious 8259A IRQs without doing the
229 * quite slow i8259A_irq_real() call for every IRQ.
230 * This does not cover 100% of spurious interrupts,
231 * but should be enough to warn the user that there
232 * is something bad going on ...
233 */
234 if (cached_irq_mask & irqmask)
235 goto spurious_8259A_irq;
236 cached_irq_mask |= irqmask;
237
238handle_real_irq:
239 if (irq & 8) {
240 inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */
241 outb(cached_slave_mask, PIC_SLAVE_IMR);
242 /* 'Specific EOI' to slave */
243 outb(0x60+(irq&7),PIC_SLAVE_CMD);
244 /* 'Specific EOI' to master-IRQ2 */
245 outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD);
246 } else {
247 inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */
248 outb(cached_master_mask, PIC_MASTER_IMR);
249 /* 'Specific EOI' to master */
250 outb(0x60+irq,PIC_MASTER_CMD);
251 }
252 spin_unlock_irqrestore(&i8259A_lock, flags);
253 return;
254
255spurious_8259A_irq:
256 /*
257 * this is the slow path - should happen rarely.
258 */
259 if (i8259A_irq_real(irq))
260 /*
261 * oops, the IRQ _is_ in service according to the
262 * 8259A - not spurious, go handle it.
263 */
264 goto handle_real_irq;
265
266 {
267 static int spurious_irq_mask;
268 /*
269 * At this point we can be sure the IRQ is spurious,
270 * lets ACK and report it. [once per IRQ]
271 */
272 if (!(spurious_irq_mask & irqmask)) {
273 printk(KERN_DEBUG
274 "spurious 8259A interrupt: IRQ%d.\n", irq);
275 spurious_irq_mask |= irqmask;
276 }
277 atomic_inc(&irq_err_count);
278 /*
279 * Theoretically we do not have to handle this IRQ,
280 * but in Linux this does not cause problems and is
281 * simpler for us.
282 */
283 goto handle_real_irq;
284 }
285}
286
287static char irq_trigger[2];
288/**
289 * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
290 */
291static void restore_ELCR(char *trigger)
292{
293 outb(trigger[0], 0x4d0);
294 outb(trigger[1], 0x4d1);
295}
296
297static void save_ELCR(char *trigger)
298{
299 /* IRQ 0,1,2,8,13 are marked as reserved */
300 trigger[0] = inb(0x4d0) & 0xF8;
301 trigger[1] = inb(0x4d1) & 0xDE;
302}
303
304static int i8259A_resume(struct sys_device *dev)
305{
306 init_8259A(i8259A_auto_eoi);
307 restore_ELCR(irq_trigger);
308 return 0;
309}
310
311static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
312{
313 save_ELCR(irq_trigger);
314 return 0;
315}
316
317static int i8259A_shutdown(struct sys_device *dev)
318{
319 /* Put the i8259A into a quiescent state that
320 * the kernel initialization code can get it
321 * out of.
322 */
323 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
324 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */
325 return 0;
326}
327
328static struct sysdev_class i8259_sysdev_class = {
329 .name = "i8259",
330 .suspend = i8259A_suspend,
331 .resume = i8259A_resume,
332 .shutdown = i8259A_shutdown,
333};
334
335static struct sys_device device_i8259A = {
336 .id = 0,
337 .cls = &i8259_sysdev_class,
338};
339
340static int __init i8259A_init_sysfs(void)
341{
342 int error = sysdev_class_register(&i8259_sysdev_class);
343 if (!error)
344 error = sysdev_register(&device_i8259A);
345 return error;
346}
347
348device_initcall(i8259A_init_sysfs);
349
350void init_8259A(int auto_eoi)
351{
352 unsigned long flags;
353
354 i8259A_auto_eoi = auto_eoi;
355
356 spin_lock_irqsave(&i8259A_lock, flags);
357
358 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
359 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
360
361 /*
362 * outb_pic - this has to work on a wide range of PC hardware.
363 */
364 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
365 /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
366 outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
367 /* 8259A-1 (the master) has a slave on IR2 */
368 outb_pic(0x04, PIC_MASTER_IMR);
369 if (auto_eoi) /* master does Auto EOI */
370 outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
371 else /* master expects normal EOI */
372 outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
373
374 outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
375 /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
376 outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
377 /* 8259A-2 is a slave on master's IR2 */
378 outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
379 /* (slave's support for AEOI in flat mode is to be investigated) */
380 outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
381
382 if (auto_eoi)
383 /*
384 * In AEOI mode we just have to mask the interrupt
385 * when acking.
386 */
387 i8259A_chip.mask_ack = disable_8259A_irq;
388 else
389 i8259A_chip.mask_ack = mask_and_ack_8259A;
390
391 udelay(100); /* wait for 8259A to initialize */
392
393 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
394 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
395
396 spin_unlock_irqrestore(&i8259A_lock, flags);
397}
398
399
400
401
402/*
403 * IRQ2 is cascade interrupt to second interrupt controller
404 */
405
406static struct irqaction irq2 = {
407 .handler = no_action,
408 .mask = CPU_MASK_NONE,
409 .name = "cascade",
410};
411DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
412 [0 ... IRQ0_VECTOR - 1] = -1,
413 [IRQ0_VECTOR] = 0,
414 [IRQ1_VECTOR] = 1,
415 [IRQ2_VECTOR] = 2,
416 [IRQ3_VECTOR] = 3,
417 [IRQ4_VECTOR] = 4,
418 [IRQ5_VECTOR] = 5,
419 [IRQ6_VECTOR] = 6,
420 [IRQ7_VECTOR] = 7,
421 [IRQ8_VECTOR] = 8,
422 [IRQ9_VECTOR] = 9,
423 [IRQ10_VECTOR] = 10,
424 [IRQ11_VECTOR] = 11,
425 [IRQ12_VECTOR] = 12,
426 [IRQ13_VECTOR] = 13,
427 [IRQ14_VECTOR] = 14,
428 [IRQ15_VECTOR] = 15,
429 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
430};
431
432void __init init_ISA_irqs (void)
433{
434 int i;
435
436 init_bsp_APIC();
437 init_8259A(0);
438
439 for (i = 0; i < NR_IRQS; i++) {
440 irq_desc[i].status = IRQ_DISABLED;
441 irq_desc[i].action = NULL;
442 irq_desc[i].depth = 1;
443
444 if (i < 16) {
445 /*
446 * 16 old-style INTA-cycle interrupts:
447 */
448 set_irq_chip_and_handler_name(i, &i8259A_chip,
449 handle_level_irq, "XT");
450 } else {
451 /*
452 * 'high' PCI IRQs filled in on demand
453 */
454 irq_desc[i].chip = &no_irq_chip;
455 }
456 }
457}
458
459void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
460
461void __init native_init_IRQ(void)
462{
463 int i;
464
465 init_ISA_irqs();
466 /*
467 * Cover the whole vector space, no vector can escape
468 * us. (some of these will be overridden and become
469 * 'special' SMP interrupts)
470 */
471 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
472 int vector = FIRST_EXTERNAL_VECTOR + i;
473 if (vector != IA32_SYSCALL_VECTOR)
474 set_intr_gate(vector, interrupt[i]);
475 }
476
477#ifdef CONFIG_SMP
478 /*
479 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
480 * IPI, driven by wakeup.
481 */
482 set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
483
484 /* IPIs for invalidation */
485 set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
486 set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
487 set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
488 set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
489 set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
490 set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
491 set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
492 set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
493
494 /* IPI for generic function call */
495 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
496
497 /* Low priority IPI to cleanup after moving an irq */
498 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
499#endif
500 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
501 set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
502
503 /* self generated IPI for local APIC timer */
504 set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
505
506 /* IPI vectors for APIC spurious and error interrupts */
507 set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
508 set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
509
510 if (!acpi_ioapic)
511 setup_irq(2, &irq2);
512}
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 4dc8600d9d20..d4f9df2b022a 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -58,7 +58,7 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
58static DEFINE_SPINLOCK(ioapic_lock); 58static DEFINE_SPINLOCK(ioapic_lock);
59static DEFINE_SPINLOCK(vector_lock); 59static DEFINE_SPINLOCK(vector_lock);
60 60
61int timer_over_8254 __initdata = 1; 61int timer_through_8259 __initdata;
62 62
63/* 63/*
64 * Is the SiS APIC rmw bug present ? 64 * Is the SiS APIC rmw bug present ?
@@ -239,7 +239,7 @@ static void __init replace_pin_at_irq(unsigned int irq,
239 } 239 }
240} 240}
241 241
242static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) 242static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
243{ 243{
244 struct irq_pin_list *entry = irq_2_pin + irq; 244 struct irq_pin_list *entry = irq_2_pin + irq;
245 unsigned int pin, reg; 245 unsigned int pin, reg;
@@ -259,30 +259,32 @@ static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsign
259} 259}
260 260
261/* mask = 1 */ 261/* mask = 1 */
262static void __mask_IO_APIC_irq (unsigned int irq) 262static void __mask_IO_APIC_irq(unsigned int irq)
263{ 263{
264 __modify_IO_APIC_irq(irq, 0x00010000, 0); 264 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
265} 265}
266 266
267/* mask = 0 */ 267/* mask = 0 */
268static void __unmask_IO_APIC_irq (unsigned int irq) 268static void __unmask_IO_APIC_irq(unsigned int irq)
269{ 269{
270 __modify_IO_APIC_irq(irq, 0, 0x00010000); 270 __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
271} 271}
272 272
273/* mask = 1, trigger = 0 */ 273/* mask = 1, trigger = 0 */
274static void __mask_and_edge_IO_APIC_irq (unsigned int irq) 274static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
275{ 275{
276 __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); 276 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
277 IO_APIC_REDIR_LEVEL_TRIGGER);
277} 278}
278 279
279/* mask = 0, trigger = 1 */ 280/* mask = 0, trigger = 1 */
280static void __unmask_and_level_IO_APIC_irq (unsigned int irq) 281static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
281{ 282{
282 __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); 283 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
284 IO_APIC_REDIR_MASKED);
283} 285}
284 286
285static void mask_IO_APIC_irq (unsigned int irq) 287static void mask_IO_APIC_irq(unsigned int irq)
286{ 288{
287 unsigned long flags; 289 unsigned long flags;
288 290
@@ -291,7 +293,7 @@ static void mask_IO_APIC_irq (unsigned int irq)
291 spin_unlock_irqrestore(&ioapic_lock, flags); 293 spin_unlock_irqrestore(&ioapic_lock, flags);
292} 294}
293 295
294static void unmask_IO_APIC_irq (unsigned int irq) 296static void unmask_IO_APIC_irq(unsigned int irq)
295{ 297{
296 unsigned long flags; 298 unsigned long flags;
297 299
@@ -303,7 +305,7 @@ static void unmask_IO_APIC_irq (unsigned int irq)
303static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 305static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
304{ 306{
305 struct IO_APIC_route_entry entry; 307 struct IO_APIC_route_entry entry;
306 308
307 /* Check delivery_mode to be sure we're not clearing an SMI pin */ 309 /* Check delivery_mode to be sure we're not clearing an SMI pin */
308 entry = ioapic_read_entry(apic, pin); 310 entry = ioapic_read_entry(apic, pin);
309 if (entry.delivery_mode == dest_SMI) 311 if (entry.delivery_mode == dest_SMI)
@@ -315,7 +317,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
315 ioapic_mask_entry(apic, pin); 317 ioapic_mask_entry(apic, pin);
316} 318}
317 319
318static void clear_IO_APIC (void) 320static void clear_IO_APIC(void)
319{ 321{
320 int apic, pin; 322 int apic, pin;
321 323
@@ -332,7 +334,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
332 struct irq_pin_list *entry = irq_2_pin + irq; 334 struct irq_pin_list *entry = irq_2_pin + irq;
333 unsigned int apicid_value; 335 unsigned int apicid_value;
334 cpumask_t tmp; 336 cpumask_t tmp;
335 337
336 cpus_and(tmp, cpumask, cpu_online_map); 338 cpus_and(tmp, cpumask, cpu_online_map);
337 if (cpus_empty(tmp)) 339 if (cpus_empty(tmp))
338 tmp = TARGET_CPUS; 340 tmp = TARGET_CPUS;
@@ -361,7 +363,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
361# include <linux/kernel_stat.h> /* kstat */ 363# include <linux/kernel_stat.h> /* kstat */
362# include <linux/slab.h> /* kmalloc() */ 364# include <linux/slab.h> /* kmalloc() */
363# include <linux/timer.h> 365# include <linux/timer.h>
364 366
365#define IRQBALANCE_CHECK_ARCH -999 367#define IRQBALANCE_CHECK_ARCH -999
366#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) 368#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
367#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) 369#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
@@ -373,14 +375,14 @@ static int physical_balance __read_mostly;
373static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; 375static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
374 376
375static struct irq_cpu_info { 377static struct irq_cpu_info {
376 unsigned long * last_irq; 378 unsigned long *last_irq;
377 unsigned long * irq_delta; 379 unsigned long *irq_delta;
378 unsigned long irq; 380 unsigned long irq;
379} irq_cpu_data[NR_CPUS]; 381} irq_cpu_data[NR_CPUS];
380 382
381#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) 383#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
382#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) 384#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
383#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) 385#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
384 386
385#define IDLE_ENOUGH(cpu,now) \ 387#define IDLE_ENOUGH(cpu,now) \
386 (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) 388 (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
@@ -419,8 +421,8 @@ inside:
419 if (cpu == -1) 421 if (cpu == -1)
420 cpu = NR_CPUS-1; 422 cpu = NR_CPUS-1;
421 } 423 }
422 } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || 424 } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
423 (search_idle && !IDLE_ENOUGH(cpu,now))); 425 (search_idle && !IDLE_ENOUGH(cpu, now)));
424 426
425 return cpu; 427 return cpu;
426} 428}
@@ -430,15 +432,14 @@ static inline void balance_irq(int cpu, int irq)
430 unsigned long now = jiffies; 432 unsigned long now = jiffies;
431 cpumask_t allowed_mask; 433 cpumask_t allowed_mask;
432 unsigned int new_cpu; 434 unsigned int new_cpu;
433 435
434 if (irqbalance_disabled) 436 if (irqbalance_disabled)
435 return; 437 return;
436 438
437 cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); 439 cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
438 new_cpu = move(cpu, allowed_mask, now, 1); 440 new_cpu = move(cpu, allowed_mask, now, 1);
439 if (cpu != new_cpu) { 441 if (cpu != new_cpu)
440 set_pending_irq(irq, cpumask_of_cpu(new_cpu)); 442 set_pending_irq(irq, cpumask_of_cpu(new_cpu));
441 }
442} 443}
443 444
444static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) 445static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
@@ -450,14 +451,14 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
450 if (!irq_desc[j].action) 451 if (!irq_desc[j].action)
451 continue; 452 continue;
452 /* Is it a significant load ? */ 453 /* Is it a significant load ? */
453 if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < 454 if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
454 useful_load_threshold) 455 useful_load_threshold)
455 continue; 456 continue;
456 balance_irq(i, j); 457 balance_irq(i, j);
457 } 458 }
458 } 459 }
459 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, 460 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
460 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); 461 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
461 return; 462 return;
462} 463}
463 464
@@ -486,22 +487,22 @@ static void do_irq_balance(void)
486 /* Is this an active IRQ or balancing disabled ? */ 487 /* Is this an active IRQ or balancing disabled ? */
487 if (!irq_desc[j].action || irq_balancing_disabled(j)) 488 if (!irq_desc[j].action || irq_balancing_disabled(j))
488 continue; 489 continue;
489 if ( package_index == i ) 490 if (package_index == i)
490 IRQ_DELTA(package_index,j) = 0; 491 IRQ_DELTA(package_index, j) = 0;
491 /* Determine the total count per processor per IRQ */ 492 /* Determine the total count per processor per IRQ */
492 value_now = (unsigned long) kstat_cpu(i).irqs[j]; 493 value_now = (unsigned long) kstat_cpu(i).irqs[j];
493 494
494 /* Determine the activity per processor per IRQ */ 495 /* Determine the activity per processor per IRQ */
495 delta = value_now - LAST_CPU_IRQ(i,j); 496 delta = value_now - LAST_CPU_IRQ(i, j);
496 497
497 /* Update last_cpu_irq[][] for the next time */ 498 /* Update last_cpu_irq[][] for the next time */
498 LAST_CPU_IRQ(i,j) = value_now; 499 LAST_CPU_IRQ(i, j) = value_now;
499 500
500 /* Ignore IRQs whose rate is less than the clock */ 501 /* Ignore IRQs whose rate is less than the clock */
501 if (delta < useful_load_threshold) 502 if (delta < useful_load_threshold)
502 continue; 503 continue;
503 /* update the load for the processor or package total */ 504 /* update the load for the processor or package total */
504 IRQ_DELTA(package_index,j) += delta; 505 IRQ_DELTA(package_index, j) += delta;
505 506
506 /* Keep track of the higher numbered sibling as well */ 507 /* Keep track of the higher numbered sibling as well */
507 if (i != package_index) 508 if (i != package_index)
@@ -527,7 +528,8 @@ static void do_irq_balance(void)
527 max_cpu_irq = ULONG_MAX; 528 max_cpu_irq = ULONG_MAX;
528 529
529tryanothercpu: 530tryanothercpu:
530 /* Look for heaviest loaded processor. 531 /*
532 * Look for heaviest loaded processor.
531 * We may come back to get the next heaviest loaded processor. 533 * We may come back to get the next heaviest loaded processor.
532 * Skip processors with trivial loads. 534 * Skip processors with trivial loads.
533 */ 535 */
@@ -536,7 +538,7 @@ tryanothercpu:
536 for_each_online_cpu(i) { 538 for_each_online_cpu(i) {
537 if (i != CPU_TO_PACKAGEINDEX(i)) 539 if (i != CPU_TO_PACKAGEINDEX(i))
538 continue; 540 continue;
539 if (max_cpu_irq <= CPU_IRQ(i)) 541 if (max_cpu_irq <= CPU_IRQ(i))
540 continue; 542 continue;
541 if (tmp_cpu_irq < CPU_IRQ(i)) { 543 if (tmp_cpu_irq < CPU_IRQ(i)) {
542 tmp_cpu_irq = CPU_IRQ(i); 544 tmp_cpu_irq = CPU_IRQ(i);
@@ -545,8 +547,9 @@ tryanothercpu:
545 } 547 }
546 548
547 if (tmp_loaded == -1) { 549 if (tmp_loaded == -1) {
548 /* In the case of small number of heavy interrupt sources, 550 /*
549 * loading some of the cpus too much. We use Ingo's original 551 * In the case of small number of heavy interrupt sources,
552 * loading some of the cpus too much. We use Ingo's original
550 * approach to rotate them around. 553 * approach to rotate them around.
551 */ 554 */
552 if (!first_attempt && imbalance >= useful_load_threshold) { 555 if (!first_attempt && imbalance >= useful_load_threshold) {
@@ -555,13 +558,14 @@ tryanothercpu:
555 } 558 }
556 goto not_worth_the_effort; 559 goto not_worth_the_effort;
557 } 560 }
558 561
559 first_attempt = 0; /* heaviest search */ 562 first_attempt = 0; /* heaviest search */
560 max_cpu_irq = tmp_cpu_irq; /* load */ 563 max_cpu_irq = tmp_cpu_irq; /* load */
561 max_loaded = tmp_loaded; /* processor */ 564 max_loaded = tmp_loaded; /* processor */
562 imbalance = (max_cpu_irq - min_cpu_irq) / 2; 565 imbalance = (max_cpu_irq - min_cpu_irq) / 2;
563 566
564 /* if imbalance is less than approx 10% of max load, then 567 /*
568 * if imbalance is less than approx 10% of max load, then
565 * observe diminishing returns action. - quit 569 * observe diminishing returns action. - quit
566 */ 570 */
567 if (imbalance < (max_cpu_irq >> 3)) 571 if (imbalance < (max_cpu_irq >> 3))
@@ -577,26 +581,25 @@ tryanotherirq:
577 /* Is this an active IRQ? */ 581 /* Is this an active IRQ? */
578 if (!irq_desc[j].action) 582 if (!irq_desc[j].action)
579 continue; 583 continue;
580 if (imbalance <= IRQ_DELTA(max_loaded,j)) 584 if (imbalance <= IRQ_DELTA(max_loaded, j))
581 continue; 585 continue;
582 /* Try to find the IRQ that is closest to the imbalance 586 /* Try to find the IRQ that is closest to the imbalance
583 * without going over. 587 * without going over.
584 */ 588 */
585 if (move_this_load < IRQ_DELTA(max_loaded,j)) { 589 if (move_this_load < IRQ_DELTA(max_loaded, j)) {
586 move_this_load = IRQ_DELTA(max_loaded,j); 590 move_this_load = IRQ_DELTA(max_loaded, j);
587 selected_irq = j; 591 selected_irq = j;
588 } 592 }
589 } 593 }
590 if (selected_irq == -1) { 594 if (selected_irq == -1)
591 goto tryanothercpu; 595 goto tryanothercpu;
592 }
593 596
594 imbalance = move_this_load; 597 imbalance = move_this_load;
595 598
596 /* For physical_balance case, we accumulated both load 599 /* For physical_balance case, we accumulated both load
597 * values in the one of the siblings cpu_irq[], 600 * values in the one of the siblings cpu_irq[],
598 * to use the same code for physical and logical processors 601 * to use the same code for physical and logical processors
599 * as much as possible. 602 * as much as possible.
600 * 603 *
601 * NOTE: the cpu_irq[] array holds the sum of the load for 604 * NOTE: the cpu_irq[] array holds the sum of the load for
602 * sibling A and sibling B in the slot for the lowest numbered 605 * sibling A and sibling B in the slot for the lowest numbered
@@ -625,11 +628,11 @@ tryanotherirq:
625 /* mark for change destination */ 628 /* mark for change destination */
626 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); 629 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
627 630
628 /* Since we made a change, come back sooner to 631 /* Since we made a change, come back sooner to
629 * check for more variation. 632 * check for more variation.
630 */ 633 */
631 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, 634 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
632 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); 635 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
633 return; 636 return;
634 } 637 }
635 goto tryanotherirq; 638 goto tryanotherirq;
@@ -640,7 +643,7 @@ not_worth_the_effort:
640 * upward 643 * upward
641 */ 644 */
642 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, 645 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
643 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); 646 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
644 return; 647 return;
645} 648}
646 649
@@ -679,13 +682,13 @@ static int __init balanced_irq_init(void)
679 cpumask_t tmp; 682 cpumask_t tmp;
680 683
681 cpus_shift_right(tmp, cpu_online_map, 2); 684 cpus_shift_right(tmp, cpu_online_map, 2);
682 c = &boot_cpu_data; 685 c = &boot_cpu_data;
683 /* When not overwritten by the command line ask subarchitecture. */ 686 /* When not overwritten by the command line ask subarchitecture. */
684 if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) 687 if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
685 irqbalance_disabled = NO_BALANCE_IRQ; 688 irqbalance_disabled = NO_BALANCE_IRQ;
686 if (irqbalance_disabled) 689 if (irqbalance_disabled)
687 return 0; 690 return 0;
688 691
689 /* disable irqbalance completely if there is only one processor online */ 692 /* disable irqbalance completely if there is only one processor online */
690 if (num_online_cpus() < 2) { 693 if (num_online_cpus() < 2) {
691 irqbalance_disabled = 1; 694 irqbalance_disabled = 1;
@@ -699,16 +702,14 @@ static int __init balanced_irq_init(void)
699 physical_balance = 1; 702 physical_balance = 1;
700 703
701 for_each_online_cpu(i) { 704 for_each_online_cpu(i) {
702 irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); 705 irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
703 irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); 706 irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
704 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { 707 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
705 printk(KERN_ERR "balanced_irq_init: out of memory"); 708 printk(KERN_ERR "balanced_irq_init: out of memory");
706 goto failed; 709 goto failed;
707 } 710 }
708 memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
709 memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
710 } 711 }
711 712
712 printk(KERN_INFO "Starting balanced_irq\n"); 713 printk(KERN_INFO "Starting balanced_irq\n");
713 if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) 714 if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
714 return 0; 715 return 0;
@@ -843,7 +844,7 @@ static int __init find_isa_irq_apic(int irq, int type)
843 } 844 }
844 if (i < mp_irq_entries) { 845 if (i < mp_irq_entries) {
845 int apic; 846 int apic;
846 for(apic = 0; apic < nr_ioapics; apic++) { 847 for (apic = 0; apic < nr_ioapics; apic++) {
847 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) 848 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
848 return apic; 849 return apic;
849 } 850 }
@@ -880,7 +881,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
880 !mp_irqs[i].mpc_irqtype && 881 !mp_irqs[i].mpc_irqtype &&
881 (bus == lbus) && 882 (bus == lbus) &&
882 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { 883 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
883 int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); 884 int irq = pin_2_irq(i, apic, mp_irqs[i].mpc_dstirq);
884 885
885 if (!(apic || IO_APIC_IRQ(irq))) 886 if (!(apic || IO_APIC_IRQ(irq)))
886 continue; 887 continue;
@@ -900,7 +901,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
900EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); 901EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
901 902
902/* 903/*
903 * This function currently is only a helper for the i386 smp boot process where 904 * This function currently is only a helper for the i386 smp boot process where
904 * we need to reprogram the ioredtbls to cater for the cpus which have come online 905 * we need to reprogram the ioredtbls to cater for the cpus which have come online
905 * so mask in all cases should simply be TARGET_CPUS 906 * so mask in all cases should simply be TARGET_CPUS
906 */ 907 */
@@ -975,37 +976,36 @@ static int MPBIOS_polarity(int idx)
975 /* 976 /*
976 * Determine IRQ line polarity (high active or low active): 977 * Determine IRQ line polarity (high active or low active):
977 */ 978 */
978 switch (mp_irqs[idx].mpc_irqflag & 3) 979 switch (mp_irqs[idx].mpc_irqflag & 3) {
980 case 0: /* conforms, ie. bus-type dependent polarity */
979 { 981 {
980 case 0: /* conforms, ie. bus-type dependent polarity */ 982 polarity = test_bit(bus, mp_bus_not_pci)?
981 { 983 default_ISA_polarity(idx):
982 polarity = test_bit(bus, mp_bus_not_pci)? 984 default_PCI_polarity(idx);
983 default_ISA_polarity(idx): 985 break;
984 default_PCI_polarity(idx); 986 }
985 break; 987 case 1: /* high active */
986 } 988 {
987 case 1: /* high active */ 989 polarity = 0;
988 { 990 break;
989 polarity = 0; 991 }
990 break; 992 case 2: /* reserved */
991 } 993 {
992 case 2: /* reserved */ 994 printk(KERN_WARNING "broken BIOS!!\n");
993 { 995 polarity = 1;
994 printk(KERN_WARNING "broken BIOS!!\n"); 996 break;
995 polarity = 1; 997 }
996 break; 998 case 3: /* low active */
997 } 999 {
998 case 3: /* low active */ 1000 polarity = 1;
999 { 1001 break;
1000 polarity = 1; 1002 }
1001 break; 1003 default: /* invalid */
1002 } 1004 {
1003 default: /* invalid */ 1005 printk(KERN_WARNING "broken BIOS!!\n");
1004 { 1006 polarity = 1;
1005 printk(KERN_WARNING "broken BIOS!!\n"); 1007 break;
1006 polarity = 1; 1008 }
1007 break;
1008 }
1009 } 1009 }
1010 return polarity; 1010 return polarity;
1011} 1011}
@@ -1018,69 +1018,67 @@ static int MPBIOS_trigger(int idx)
1018 /* 1018 /*
1019 * Determine IRQ trigger mode (edge or level sensitive): 1019 * Determine IRQ trigger mode (edge or level sensitive):
1020 */ 1020 */
1021 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) 1021 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) {
1022 case 0: /* conforms, ie. bus-type dependent */
1022 { 1023 {
1023 case 0: /* conforms, ie. bus-type dependent */ 1024 trigger = test_bit(bus, mp_bus_not_pci)?
1024 { 1025 default_ISA_trigger(idx):
1025 trigger = test_bit(bus, mp_bus_not_pci)? 1026 default_PCI_trigger(idx);
1026 default_ISA_trigger(idx):
1027 default_PCI_trigger(idx);
1028#if defined(CONFIG_EISA) || defined(CONFIG_MCA) 1027#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
1029 switch (mp_bus_id_to_type[bus]) 1028 switch (mp_bus_id_to_type[bus]) {
1030 { 1029 case MP_BUS_ISA: /* ISA pin */
1031 case MP_BUS_ISA: /* ISA pin */ 1030 {
1032 { 1031 /* set before the switch */
1033 /* set before the switch */
1034 break;
1035 }
1036 case MP_BUS_EISA: /* EISA pin */
1037 {
1038 trigger = default_EISA_trigger(idx);
1039 break;
1040 }
1041 case MP_BUS_PCI: /* PCI pin */
1042 {
1043 /* set before the switch */
1044 break;
1045 }
1046 case MP_BUS_MCA: /* MCA pin */
1047 {
1048 trigger = default_MCA_trigger(idx);
1049 break;
1050 }
1051 default:
1052 {
1053 printk(KERN_WARNING "broken BIOS!!\n");
1054 trigger = 1;
1055 break;
1056 }
1057 }
1058#endif
1059 break; 1032 break;
1060 } 1033 }
1061 case 1: /* edge */ 1034 case MP_BUS_EISA: /* EISA pin */
1062 { 1035 {
1063 trigger = 0; 1036 trigger = default_EISA_trigger(idx);
1064 break; 1037 break;
1065 } 1038 }
1066 case 2: /* reserved */ 1039 case MP_BUS_PCI: /* PCI pin */
1067 { 1040 {
1068 printk(KERN_WARNING "broken BIOS!!\n"); 1041 /* set before the switch */
1069 trigger = 1;
1070 break; 1042 break;
1071 } 1043 }
1072 case 3: /* level */ 1044 case MP_BUS_MCA: /* MCA pin */
1073 { 1045 {
1074 trigger = 1; 1046 trigger = default_MCA_trigger(idx);
1075 break; 1047 break;
1076 } 1048 }
1077 default: /* invalid */ 1049 default:
1078 { 1050 {
1079 printk(KERN_WARNING "broken BIOS!!\n"); 1051 printk(KERN_WARNING "broken BIOS!!\n");
1080 trigger = 0; 1052 trigger = 1;
1081 break; 1053 break;
1082 } 1054 }
1083 } 1055 }
1056#endif
1057 break;
1058 }
1059 case 1: /* edge */
1060 {
1061 trigger = 0;
1062 break;
1063 }
1064 case 2: /* reserved */
1065 {
1066 printk(KERN_WARNING "broken BIOS!!\n");
1067 trigger = 1;
1068 break;
1069 }
1070 case 3: /* level */
1071 {
1072 trigger = 1;
1073 break;
1074 }
1075 default: /* invalid */
1076 {
1077 printk(KERN_WARNING "broken BIOS!!\n");
1078 trigger = 0;
1079 break;
1080 }
1081 }
1084 return trigger; 1082 return trigger;
1085} 1083}
1086 1084
@@ -1148,8 +1146,8 @@ static inline int IO_APIC_irq_trigger(int irq)
1148 1146
1149 for (apic = 0; apic < nr_ioapics; apic++) { 1147 for (apic = 0; apic < nr_ioapics; apic++) {
1150 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1148 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1151 idx = find_irq_entry(apic,pin,mp_INT); 1149 idx = find_irq_entry(apic, pin, mp_INT);
1152 if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) 1150 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1153 return irq_trigger(idx); 1151 return irq_trigger(idx);
1154 } 1152 }
1155 } 1153 }
@@ -1164,7 +1162,7 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }
1164 1162
1165static int __assign_irq_vector(int irq) 1163static int __assign_irq_vector(int irq)
1166{ 1164{
1167 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1165 static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
1168 int vector, offset; 1166 int vector, offset;
1169 1167
1170 BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); 1168 BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
@@ -1237,15 +1235,15 @@ static void __init setup_IO_APIC_irqs(void)
1237 /* 1235 /*
1238 * add it to the IO-APIC irq-routing table: 1236 * add it to the IO-APIC irq-routing table:
1239 */ 1237 */
1240 memset(&entry,0,sizeof(entry)); 1238 memset(&entry, 0, sizeof(entry));
1241 1239
1242 entry.delivery_mode = INT_DELIVERY_MODE; 1240 entry.delivery_mode = INT_DELIVERY_MODE;
1243 entry.dest_mode = INT_DEST_MODE; 1241 entry.dest_mode = INT_DEST_MODE;
1244 entry.mask = 0; /* enable IRQ */ 1242 entry.mask = 0; /* enable IRQ */
1245 entry.dest.logical.logical_dest = 1243 entry.dest.logical.logical_dest =
1246 cpu_mask_to_apicid(TARGET_CPUS); 1244 cpu_mask_to_apicid(TARGET_CPUS);
1247 1245
1248 idx = find_irq_entry(apic,pin,mp_INT); 1246 idx = find_irq_entry(apic, pin, mp_INT);
1249 if (idx == -1) { 1247 if (idx == -1) {
1250 if (first_notcon) { 1248 if (first_notcon) {
1251 apic_printk(APIC_VERBOSE, KERN_DEBUG 1249 apic_printk(APIC_VERBOSE, KERN_DEBUG
@@ -1289,7 +1287,7 @@ static void __init setup_IO_APIC_irqs(void)
1289 vector = assign_irq_vector(irq); 1287 vector = assign_irq_vector(irq);
1290 entry.vector = vector; 1288 entry.vector = vector;
1291 ioapic_register_intr(irq, vector, IOAPIC_AUTO); 1289 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
1292 1290
1293 if (!apic && (irq < 16)) 1291 if (!apic && (irq < 16))
1294 disable_8259A_irq(irq); 1292 disable_8259A_irq(irq);
1295 } 1293 }
@@ -1302,25 +1300,21 @@ static void __init setup_IO_APIC_irqs(void)
1302} 1300}
1303 1301
1304/* 1302/*
1305 * Set up the 8259A-master output pin: 1303 * Set up the timer pin, possibly with the 8259A-master behind.
1306 */ 1304 */
1307static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) 1305static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1306 int vector)
1308{ 1307{
1309 struct IO_APIC_route_entry entry; 1308 struct IO_APIC_route_entry entry;
1310 1309
1311 memset(&entry,0,sizeof(entry)); 1310 memset(&entry, 0, sizeof(entry));
1312
1313 disable_8259A_irq(0);
1314
1315 /* mask LVT0 */
1316 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
1317 1311
1318 /* 1312 /*
1319 * We use logical delivery to get the timer IRQ 1313 * We use logical delivery to get the timer IRQ
1320 * to the first CPU. 1314 * to the first CPU.
1321 */ 1315 */
1322 entry.dest_mode = INT_DEST_MODE; 1316 entry.dest_mode = INT_DEST_MODE;
1323 entry.mask = 0; /* unmask IRQ now */ 1317 entry.mask = 1; /* mask IRQ now */
1324 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); 1318 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1325 entry.delivery_mode = INT_DELIVERY_MODE; 1319 entry.delivery_mode = INT_DELIVERY_MODE;
1326 entry.polarity = 0; 1320 entry.polarity = 0;
@@ -1329,17 +1323,14 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
1329 1323
1330 /* 1324 /*
1331 * The timer IRQ doesn't have to know that behind the 1325 * The timer IRQ doesn't have to know that behind the
1332 * scene we have a 8259A-master in AEOI mode ... 1326 * scene we may have a 8259A-master in AEOI mode ...
1333 */ 1327 */
1334 irq_desc[0].chip = &ioapic_chip; 1328 ioapic_register_intr(0, vector, IOAPIC_EDGE);
1335 set_irq_handler(0, handle_edge_irq);
1336 1329
1337 /* 1330 /*
1338 * Add it to the IO-APIC irq-routing table: 1331 * Add it to the IO-APIC irq-routing table:
1339 */ 1332 */
1340 ioapic_write_entry(apic, pin, entry); 1333 ioapic_write_entry(apic, pin, entry);
1341
1342 enable_8259A_irq(0);
1343} 1334}
1344 1335
1345void __init print_IO_APIC(void) 1336void __init print_IO_APIC(void)
@@ -1354,7 +1345,7 @@ void __init print_IO_APIC(void)
1354 if (apic_verbosity == APIC_QUIET) 1345 if (apic_verbosity == APIC_QUIET)
1355 return; 1346 return;
1356 1347
1357 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1348 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1358 for (i = 0; i < nr_ioapics; i++) 1349 for (i = 0; i < nr_ioapics; i++)
1359 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1350 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1360 mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); 1351 mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
@@ -1459,7 +1450,7 @@ void __init print_IO_APIC(void)
1459 1450
1460#if 0 1451#if 0
1461 1452
1462static void print_APIC_bitfield (int base) 1453static void print_APIC_bitfield(int base)
1463{ 1454{
1464 unsigned int v; 1455 unsigned int v;
1465 int i, j; 1456 int i, j;
@@ -1480,7 +1471,7 @@ static void print_APIC_bitfield (int base)
1480 } 1471 }
1481} 1472}
1482 1473
1483void /*__init*/ print_local_APIC(void * dummy) 1474void /*__init*/ print_local_APIC(void *dummy)
1484{ 1475{
1485 unsigned int v, ver, maxlvt; 1476 unsigned int v, ver, maxlvt;
1486 1477
@@ -1489,6 +1480,7 @@ void /*__init*/ print_local_APIC(void * dummy)
1489 1480
1490 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1481 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1491 smp_processor_id(), hard_smp_processor_id()); 1482 smp_processor_id(), hard_smp_processor_id());
1483 v = apic_read(APIC_ID);
1492 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, 1484 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1493 GET_APIC_ID(read_apic_id())); 1485 GET_APIC_ID(read_apic_id()));
1494 v = apic_read(APIC_LVR); 1486 v = apic_read(APIC_LVR);
@@ -1563,7 +1555,7 @@ void /*__init*/ print_local_APIC(void * dummy)
1563 printk("\n"); 1555 printk("\n");
1564} 1556}
1565 1557
1566void print_all_local_APICs (void) 1558void print_all_local_APICs(void)
1567{ 1559{
1568 on_each_cpu(print_local_APIC, NULL, 1, 1); 1560 on_each_cpu(print_local_APIC, NULL, 1, 1);
1569} 1561}
@@ -1586,11 +1578,11 @@ void /*__init*/ print_PIC(void)
1586 v = inb(0xa0) << 8 | inb(0x20); 1578 v = inb(0xa0) << 8 | inb(0x20);
1587 printk(KERN_DEBUG "... PIC IRR: %04x\n", v); 1579 printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
1588 1580
1589 outb(0x0b,0xa0); 1581 outb(0x0b, 0xa0);
1590 outb(0x0b,0x20); 1582 outb(0x0b, 0x20);
1591 v = inb(0xa0) << 8 | inb(0x20); 1583 v = inb(0xa0) << 8 | inb(0x20);
1592 outb(0x0a,0xa0); 1584 outb(0x0a, 0xa0);
1593 outb(0x0a,0x20); 1585 outb(0x0a, 0x20);
1594 1586
1595 spin_unlock_irqrestore(&i8259A_lock, flags); 1587 spin_unlock_irqrestore(&i8259A_lock, flags);
1596 1588
@@ -1626,7 +1618,7 @@ static void __init enable_IO_APIC(void)
1626 spin_unlock_irqrestore(&ioapic_lock, flags); 1618 spin_unlock_irqrestore(&ioapic_lock, flags);
1627 nr_ioapic_registers[apic] = reg_01.bits.entries+1; 1619 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1628 } 1620 }
1629 for(apic = 0; apic < nr_ioapics; apic++) { 1621 for (apic = 0; apic < nr_ioapics; apic++) {
1630 int pin; 1622 int pin;
1631 /* See if any of the pins is in ExtINT mode */ 1623 /* See if any of the pins is in ExtINT mode */
1632 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1624 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
@@ -1748,7 +1740,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
1748 spin_lock_irqsave(&ioapic_lock, flags); 1740 spin_lock_irqsave(&ioapic_lock, flags);
1749 reg_00.raw = io_apic_read(apic, 0); 1741 reg_00.raw = io_apic_read(apic, 0);
1750 spin_unlock_irqrestore(&ioapic_lock, flags); 1742 spin_unlock_irqrestore(&ioapic_lock, flags);
1751 1743
1752 old_id = mp_ioapics[apic].mpc_apicid; 1744 old_id = mp_ioapics[apic].mpc_apicid;
1753 1745
1754 if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { 1746 if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
@@ -1800,7 +1792,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
1800 /* 1792 /*
1801 * Read the right value from the MPC table and 1793 * Read the right value from the MPC table and
1802 * write it into the ID register. 1794 * write it into the ID register.
1803 */ 1795 */
1804 apic_printk(APIC_VERBOSE, KERN_INFO 1796 apic_printk(APIC_VERBOSE, KERN_INFO
1805 "...changing IO-APIC physical APIC ID to %d ...", 1797 "...changing IO-APIC physical APIC ID to %d ...",
1806 mp_ioapics[apic].mpc_apicid); 1798 mp_ioapics[apic].mpc_apicid);
@@ -2020,7 +2012,7 @@ static void ack_apic(unsigned int irq)
2020 ack_APIC_irq(); 2012 ack_APIC_irq();
2021} 2013}
2022 2014
2023static void mask_lapic_irq (unsigned int irq) 2015static void mask_lapic_irq(unsigned int irq)
2024{ 2016{
2025 unsigned long v; 2017 unsigned long v;
2026 2018
@@ -2028,7 +2020,7 @@ static void mask_lapic_irq (unsigned int irq)
2028 apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); 2020 apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
2029} 2021}
2030 2022
2031static void unmask_lapic_irq (unsigned int irq) 2023static void unmask_lapic_irq(unsigned int irq)
2032{ 2024{
2033 unsigned long v; 2025 unsigned long v;
2034 2026
@@ -2037,7 +2029,7 @@ static void unmask_lapic_irq (unsigned int irq)
2037} 2029}
2038 2030
2039static struct irq_chip lapic_chip __read_mostly = { 2031static struct irq_chip lapic_chip __read_mostly = {
2040 .name = "local-APIC-edge", 2032 .name = "local-APIC",
2041 .mask = mask_lapic_irq, 2033 .mask = mask_lapic_irq,
2042 .unmask = unmask_lapic_irq, 2034 .unmask = unmask_lapic_irq,
2043 .eoi = ack_apic, 2035 .eoi = ack_apic,
@@ -2046,14 +2038,14 @@ static struct irq_chip lapic_chip __read_mostly = {
2046static void __init setup_nmi(void) 2038static void __init setup_nmi(void)
2047{ 2039{
2048 /* 2040 /*
2049 * Dirty trick to enable the NMI watchdog ... 2041 * Dirty trick to enable the NMI watchdog ...
2050 * We put the 8259A master into AEOI mode and 2042 * We put the 8259A master into AEOI mode and
2051 * unmask on all local APICs LVT0 as NMI. 2043 * unmask on all local APICs LVT0 as NMI.
2052 * 2044 *
2053 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') 2045 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2054 * is from Maciej W. Rozycki - so we do not have to EOI from 2046 * is from Maciej W. Rozycki - so we do not have to EOI from
2055 * the NMI handler or the timer interrupt. 2047 * the NMI handler or the timer interrupt.
2056 */ 2048 */
2057 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); 2049 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2058 2050
2059 enable_NMI_through_LVT0(); 2051 enable_NMI_through_LVT0();
@@ -2129,11 +2121,16 @@ static inline void __init unlock_ExtINT_logic(void)
2129static inline void __init check_timer(void) 2121static inline void __init check_timer(void)
2130{ 2122{
2131 int apic1, pin1, apic2, pin2; 2123 int apic1, pin1, apic2, pin2;
2124 int no_pin1 = 0;
2132 int vector; 2125 int vector;
2126 unsigned int ver;
2133 unsigned long flags; 2127 unsigned long flags;
2134 2128
2135 local_irq_save(flags); 2129 local_irq_save(flags);
2136 2130
2131 ver = apic_read(APIC_LVR);
2132 ver = GET_APIC_VERSION(ver);
2133
2137 /* 2134 /*
2138 * get/set the timer IRQ vector: 2135 * get/set the timer IRQ vector:
2139 */ 2136 */
@@ -2142,17 +2139,17 @@ static inline void __init check_timer(void)
2142 set_intr_gate(vector, interrupt[0]); 2139 set_intr_gate(vector, interrupt[0]);
2143 2140
2144 /* 2141 /*
2145 * Subtle, code in do_timer_interrupt() expects an AEOI 2142 * As IRQ0 is to be enabled in the 8259A, the virtual
2146 * mode for the 8259A whenever interrupts are routed 2143 * wire has to be disabled in the local APIC. Also
2147 * through I/O APICs. Also IRQ0 has to be enabled in 2144 * timer interrupts need to be acknowledged manually in
2148 * the 8259A which implies the virtual wire has to be 2145 * the 8259A for the i82489DX when using the NMI
2149 * disabled in the local APIC. 2146 * watchdog as that APIC treats NMIs as level-triggered.
2147 * The AEOI mode will finish them in the 8259A
2148 * automatically.
2150 */ 2149 */
2151 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2150 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2152 init_8259A(1); 2151 init_8259A(1);
2153 timer_ack = 1; 2152 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2154 if (timer_over_8254 > 0)
2155 enable_8259A_irq(0);
2156 2153
2157 pin1 = find_isa_irq_pin(0, mp_INT); 2154 pin1 = find_isa_irq_pin(0, mp_INT);
2158 apic1 = find_isa_irq_apic(0, mp_INT); 2155 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2162,14 +2159,33 @@ static inline void __init check_timer(void)
2162 printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", 2159 printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
2163 vector, apic1, pin1, apic2, pin2); 2160 vector, apic1, pin1, apic2, pin2);
2164 2161
2162 /*
2163 * Some BIOS writers are clueless and report the ExtINTA
2164 * I/O APIC input from the cascaded 8259A as the timer
2165 * interrupt input. So just in case, if only one pin
2166 * was found above, try it both directly and through the
2167 * 8259A.
2168 */
2169 if (pin1 == -1) {
2170 pin1 = pin2;
2171 apic1 = apic2;
2172 no_pin1 = 1;
2173 } else if (pin2 == -1) {
2174 pin2 = pin1;
2175 apic2 = apic1;
2176 }
2177
2165 if (pin1 != -1) { 2178 if (pin1 != -1) {
2166 /* 2179 /*
2167 * Ok, does IRQ0 through the IOAPIC work? 2180 * Ok, does IRQ0 through the IOAPIC work?
2168 */ 2181 */
2182 if (no_pin1) {
2183 add_pin_to_irq(0, apic1, pin1);
2184 setup_timer_IRQ0_pin(apic1, pin1, vector);
2185 }
2169 unmask_IO_APIC_irq(0); 2186 unmask_IO_APIC_irq(0);
2170 if (timer_irq_works()) { 2187 if (timer_irq_works()) {
2171 if (nmi_watchdog == NMI_IO_APIC) { 2188 if (nmi_watchdog == NMI_IO_APIC) {
2172 disable_8259A_irq(0);
2173 setup_nmi(); 2189 setup_nmi();
2174 enable_8259A_irq(0); 2190 enable_8259A_irq(0);
2175 } 2191 }
@@ -2178,43 +2194,46 @@ static inline void __init check_timer(void)
2178 goto out; 2194 goto out;
2179 } 2195 }
2180 clear_IO_APIC_pin(apic1, pin1); 2196 clear_IO_APIC_pin(apic1, pin1);
2181 printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " 2197 if (!no_pin1)
2182 "IO-APIC\n"); 2198 printk(KERN_ERR "..MP-BIOS bug: "
2183 } 2199 "8254 timer not connected to IO-APIC\n");
2184 2200
2185 printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); 2201 printk(KERN_INFO "...trying to set up timer (IRQ0) "
2186 if (pin2 != -1) { 2202 "through the 8259A ... ");
2187 printk("\n..... (found pin %d) ...", pin2); 2203 printk("\n..... (found pin %d) ...", pin2);
2188 /* 2204 /*
2189 * legacy devices should be connected to IO APIC #0 2205 * legacy devices should be connected to IO APIC #0
2190 */ 2206 */
2191 setup_ExtINT_IRQ0_pin(apic2, pin2, vector); 2207 replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
2208 setup_timer_IRQ0_pin(apic2, pin2, vector);
2209 unmask_IO_APIC_irq(0);
2210 enable_8259A_irq(0);
2192 if (timer_irq_works()) { 2211 if (timer_irq_works()) {
2193 printk("works.\n"); 2212 printk("works.\n");
2194 if (pin1 != -1) 2213 timer_through_8259 = 1;
2195 replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
2196 else
2197 add_pin_to_irq(0, apic2, pin2);
2198 if (nmi_watchdog == NMI_IO_APIC) { 2214 if (nmi_watchdog == NMI_IO_APIC) {
2215 disable_8259A_irq(0);
2199 setup_nmi(); 2216 setup_nmi();
2217 enable_8259A_irq(0);
2200 } 2218 }
2201 goto out; 2219 goto out;
2202 } 2220 }
2203 /* 2221 /*
2204 * Cleanup, just in case ... 2222 * Cleanup, just in case ...
2205 */ 2223 */
2224 disable_8259A_irq(0);
2206 clear_IO_APIC_pin(apic2, pin2); 2225 clear_IO_APIC_pin(apic2, pin2);
2226 printk(" failed.\n");
2207 } 2227 }
2208 printk(" failed.\n");
2209 2228
2210 if (nmi_watchdog == NMI_IO_APIC) { 2229 if (nmi_watchdog == NMI_IO_APIC) {
2211 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); 2230 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
2212 nmi_watchdog = 0; 2231 nmi_watchdog = NMI_NONE;
2213 } 2232 }
2233 timer_ack = 0;
2214 2234
2215 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); 2235 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
2216 2236
2217 disable_8259A_irq(0);
2218 set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, 2237 set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
2219 "fasteoi"); 2238 "fasteoi");
2220 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ 2239 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
@@ -2224,12 +2243,12 @@ static inline void __init check_timer(void)
2224 printk(" works.\n"); 2243 printk(" works.\n");
2225 goto out; 2244 goto out;
2226 } 2245 }
2246 disable_8259A_irq(0);
2227 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); 2247 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
2228 printk(" failed.\n"); 2248 printk(" failed.\n");
2229 2249
2230 printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); 2250 printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
2231 2251
2232 timer_ack = 0;
2233 init_8259A(0); 2252 init_8259A(0);
2234 make_8259A_irq(0); 2253 make_8259A_irq(0);
2235 apic_write_around(APIC_LVT0, APIC_DM_EXTINT); 2254 apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
@@ -2286,28 +2305,14 @@ void __init setup_IO_APIC(void)
2286 print_IO_APIC(); 2305 print_IO_APIC();
2287} 2306}
2288 2307
2289static int __init setup_disable_8254_timer(char *s)
2290{
2291 timer_over_8254 = -1;
2292 return 1;
2293}
2294static int __init setup_enable_8254_timer(char *s)
2295{
2296 timer_over_8254 = 2;
2297 return 1;
2298}
2299
2300__setup("disable_8254_timer", setup_disable_8254_timer);
2301__setup("enable_8254_timer", setup_enable_8254_timer);
2302
2303/* 2308/*
2304 * Called after all the initialization is done. If we didnt find any 2309 * Called after all the initialization is done. If we didnt find any
2305 * APIC bugs then we can allow the modify fast path 2310 * APIC bugs then we can allow the modify fast path
2306 */ 2311 */
2307 2312
2308static int __init io_apic_bug_finalize(void) 2313static int __init io_apic_bug_finalize(void)
2309{ 2314{
2310 if(sis_apic_bug == -1) 2315 if (sis_apic_bug == -1)
2311 sis_apic_bug = 0; 2316 sis_apic_bug = 0;
2312 return 0; 2317 return 0;
2313} 2318}
@@ -2318,17 +2323,17 @@ struct sysfs_ioapic_data {
2318 struct sys_device dev; 2323 struct sys_device dev;
2319 struct IO_APIC_route_entry entry[0]; 2324 struct IO_APIC_route_entry entry[0];
2320}; 2325};
2321static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; 2326static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
2322 2327
2323static int ioapic_suspend(struct sys_device *dev, pm_message_t state) 2328static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
2324{ 2329{
2325 struct IO_APIC_route_entry *entry; 2330 struct IO_APIC_route_entry *entry;
2326 struct sysfs_ioapic_data *data; 2331 struct sysfs_ioapic_data *data;
2327 int i; 2332 int i;
2328 2333
2329 data = container_of(dev, struct sysfs_ioapic_data, dev); 2334 data = container_of(dev, struct sysfs_ioapic_data, dev);
2330 entry = data->entry; 2335 entry = data->entry;
2331 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) 2336 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2332 entry[i] = ioapic_read_entry(dev->id, i); 2337 entry[i] = ioapic_read_entry(dev->id, i);
2333 2338
2334 return 0; 2339 return 0;
@@ -2341,7 +2346,7 @@ static int ioapic_resume(struct sys_device *dev)
2341 unsigned long flags; 2346 unsigned long flags;
2342 union IO_APIC_reg_00 reg_00; 2347 union IO_APIC_reg_00 reg_00;
2343 int i; 2348 int i;
2344 2349
2345 data = container_of(dev, struct sysfs_ioapic_data, dev); 2350 data = container_of(dev, struct sysfs_ioapic_data, dev);
2346 entry = data->entry; 2351 entry = data->entry;
2347 2352
@@ -2352,7 +2357,7 @@ static int ioapic_resume(struct sys_device *dev)
2352 io_apic_write(dev->id, 0, reg_00.raw); 2357 io_apic_write(dev->id, 0, reg_00.raw);
2353 } 2358 }
2354 spin_unlock_irqrestore(&ioapic_lock, flags); 2359 spin_unlock_irqrestore(&ioapic_lock, flags);
2355 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) 2360 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2356 ioapic_write_entry(dev->id, i, entry[i]); 2361 ioapic_write_entry(dev->id, i, entry[i]);
2357 2362
2358 return 0; 2363 return 0;
@@ -2366,24 +2371,23 @@ static struct sysdev_class ioapic_sysdev_class = {
2366 2371
2367static int __init ioapic_init_sysfs(void) 2372static int __init ioapic_init_sysfs(void)
2368{ 2373{
2369 struct sys_device * dev; 2374 struct sys_device *dev;
2370 int i, size, error = 0; 2375 int i, size, error = 0;
2371 2376
2372 error = sysdev_class_register(&ioapic_sysdev_class); 2377 error = sysdev_class_register(&ioapic_sysdev_class);
2373 if (error) 2378 if (error)
2374 return error; 2379 return error;
2375 2380
2376 for (i = 0; i < nr_ioapics; i++ ) { 2381 for (i = 0; i < nr_ioapics; i++) {
2377 size = sizeof(struct sys_device) + nr_ioapic_registers[i] 2382 size = sizeof(struct sys_device) + nr_ioapic_registers[i]
2378 * sizeof(struct IO_APIC_route_entry); 2383 * sizeof(struct IO_APIC_route_entry);
2379 mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); 2384 mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
2380 if (!mp_ioapic_data[i]) { 2385 if (!mp_ioapic_data[i]) {
2381 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); 2386 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2382 continue; 2387 continue;
2383 } 2388 }
2384 memset(mp_ioapic_data[i], 0, size);
2385 dev = &mp_ioapic_data[i]->dev; 2389 dev = &mp_ioapic_data[i]->dev;
2386 dev->id = i; 2390 dev->id = i;
2387 dev->cls = &ioapic_sysdev_class; 2391 dev->cls = &ioapic_sysdev_class;
2388 error = sysdev_register(dev); 2392 error = sysdev_register(dev);
2389 if (error) { 2393 if (error) {
@@ -2458,7 +2462,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2458 msg->address_lo = 2462 msg->address_lo =
2459 MSI_ADDR_BASE_LO | 2463 MSI_ADDR_BASE_LO |
2460 ((INT_DEST_MODE == 0) ? 2464 ((INT_DEST_MODE == 0) ?
2461 MSI_ADDR_DEST_MODE_PHYSICAL: 2465MSI_ADDR_DEST_MODE_PHYSICAL:
2462 MSI_ADDR_DEST_MODE_LOGICAL) | 2466 MSI_ADDR_DEST_MODE_LOGICAL) |
2463 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 2467 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2464 MSI_ADDR_REDIRECTION_CPU: 2468 MSI_ADDR_REDIRECTION_CPU:
@@ -2469,7 +2473,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2469 MSI_DATA_TRIGGER_EDGE | 2473 MSI_DATA_TRIGGER_EDGE |
2470 MSI_DATA_LEVEL_ASSERT | 2474 MSI_DATA_LEVEL_ASSERT |
2471 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 2475 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2472 MSI_DATA_DELIVERY_FIXED: 2476MSI_DATA_DELIVERY_FIXED:
2473 MSI_DATA_DELIVERY_LOWPRI) | 2477 MSI_DATA_DELIVERY_LOWPRI) |
2474 MSI_DATA_VECTOR(vector); 2478 MSI_DATA_VECTOR(vector);
2475 } 2479 }
@@ -2640,12 +2644,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2640#endif /* CONFIG_HT_IRQ */ 2644#endif /* CONFIG_HT_IRQ */
2641 2645
2642/* -------------------------------------------------------------------------- 2646/* --------------------------------------------------------------------------
2643 ACPI-based IOAPIC Configuration 2647 ACPI-based IOAPIC Configuration
2644 -------------------------------------------------------------------------- */ 2648 -------------------------------------------------------------------------- */
2645 2649
2646#ifdef CONFIG_ACPI 2650#ifdef CONFIG_ACPI
2647 2651
2648int __init io_apic_get_unique_id (int ioapic, int apic_id) 2652int __init io_apic_get_unique_id(int ioapic, int apic_id)
2649{ 2653{
2650 union IO_APIC_reg_00 reg_00; 2654 union IO_APIC_reg_00 reg_00;
2651 static physid_mask_t apic_id_map = PHYSID_MASK_NONE; 2655 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -2654,10 +2658,10 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id)
2654 int i = 0; 2658 int i = 0;
2655 2659
2656 /* 2660 /*
2657 * The P4 platform supports up to 256 APIC IDs on two separate APIC 2661 * The P4 platform supports up to 256 APIC IDs on two separate APIC
2658 * buses (one for LAPICs, one for IOAPICs), where predecessors only 2662 * buses (one for LAPICs, one for IOAPICs), where predecessors only
2659 * supports up to 16 on one shared APIC bus. 2663 * supports up to 16 on one shared APIC bus.
2660 * 2664 *
2661 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full 2665 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
2662 * advantage of new APIC bus architecture. 2666 * advantage of new APIC bus architecture.
2663 */ 2667 */
@@ -2676,7 +2680,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id)
2676 } 2680 }
2677 2681
2678 /* 2682 /*
2679 * Every APIC in a system must have a unique ID or we get lots of nice 2683 * Every APIC in a system must have a unique ID or we get lots of nice
2680 * 'stuck on smp_invalidate_needed IPI wait' messages. 2684 * 'stuck on smp_invalidate_needed IPI wait' messages.
2681 */ 2685 */
2682 if (check_apicid_used(apic_id_map, apic_id)) { 2686 if (check_apicid_used(apic_id_map, apic_id)) {
@@ -2693,7 +2697,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id)
2693 "trying %d\n", ioapic, apic_id, i); 2697 "trying %d\n", ioapic, apic_id, i);
2694 2698
2695 apic_id = i; 2699 apic_id = i;
2696 } 2700 }
2697 2701
2698 tmp = apicid_to_cpu_present(apic_id); 2702 tmp = apicid_to_cpu_present(apic_id);
2699 physids_or(apic_id_map, apic_id_map, tmp); 2703 physids_or(apic_id_map, apic_id_map, tmp);
@@ -2720,7 +2724,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id)
2720} 2724}
2721 2725
2722 2726
2723int __init io_apic_get_version (int ioapic) 2727int __init io_apic_get_version(int ioapic)
2724{ 2728{
2725 union IO_APIC_reg_01 reg_01; 2729 union IO_APIC_reg_01 reg_01;
2726 unsigned long flags; 2730 unsigned long flags;
@@ -2733,7 +2737,7 @@ int __init io_apic_get_version (int ioapic)
2733} 2737}
2734 2738
2735 2739
2736int __init io_apic_get_redir_entries (int ioapic) 2740int __init io_apic_get_redir_entries(int ioapic)
2737{ 2741{
2738 union IO_APIC_reg_01 reg_01; 2742 union IO_APIC_reg_01 reg_01;
2739 unsigned long flags; 2743 unsigned long flags;
@@ -2746,7 +2750,7 @@ int __init io_apic_get_redir_entries (int ioapic)
2746} 2750}
2747 2751
2748 2752
2749int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) 2753int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
2750{ 2754{
2751 struct IO_APIC_route_entry entry; 2755 struct IO_APIC_route_entry entry;
2752 2756
@@ -2762,7 +2766,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
2762 * corresponding device driver registers for this IRQ. 2766 * corresponding device driver registers for this IRQ.
2763 */ 2767 */
2764 2768
2765 memset(&entry,0,sizeof(entry)); 2769 memset(&entry, 0, sizeof(entry));
2766 2770
2767 entry.delivery_mode = INT_DELIVERY_MODE; 2771 entry.delivery_mode = INT_DELIVERY_MODE;
2768 entry.dest_mode = INT_DEST_MODE; 2772 entry.dest_mode = INT_DEST_MODE;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index ef1a8dfcc529..e5ef60303562 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -61,7 +61,7 @@ struct irq_cfg {
61}; 61};
62 62
63/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 63/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
64struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { 64static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
65 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, 65 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
66 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, 66 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
67 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, 67 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
@@ -90,7 +90,7 @@ static int no_timer_check;
90 90
91static int disable_timer_pin_1 __initdata; 91static int disable_timer_pin_1 __initdata;
92 92
93int timer_over_8254 __initdata = 1; 93int timer_through_8259 __initdata;
94 94
95/* Where if anywhere is the i8259 connect in external int mode */ 95/* Where if anywhere is the i8259 connect in external int mode */
96static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 96static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
@@ -183,7 +183,7 @@ static bool io_apic_level_ack_pending(unsigned int irq)
183 break; 183 break;
184 reg = io_apic_read(entry->apic, 0x10 + pin*2); 184 reg = io_apic_read(entry->apic, 0x10 + pin*2);
185 /* Is the remote IRR bit set? */ 185 /* Is the remote IRR bit set? */
186 if ((reg >> 14) & 1) { 186 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
187 spin_unlock_irqrestore(&ioapic_lock, flags); 187 spin_unlock_irqrestore(&ioapic_lock, flags);
188 return true; 188 return true;
189 } 189 }
@@ -298,7 +298,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
298 break; 298 break;
299 io_apic_write(apic, 0x11 + pin*2, dest); 299 io_apic_write(apic, 0x11 + pin*2, dest);
300 reg = io_apic_read(apic, 0x10 + pin*2); 300 reg = io_apic_read(apic, 0x10 + pin*2);
301 reg &= ~0x000000ff; 301 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
302 reg |= vector; 302 reg |= vector;
303 io_apic_modify(apic, reg); 303 io_apic_modify(apic, reg);
304 if (!entry->next) 304 if (!entry->next)
@@ -360,16 +360,37 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
360 entry->pin = pin; 360 entry->pin = pin;
361} 361}
362 362
363/*
364 * Reroute an IRQ to a different pin.
365 */
366static void __init replace_pin_at_irq(unsigned int irq,
367 int oldapic, int oldpin,
368 int newapic, int newpin)
369{
370 struct irq_pin_list *entry = irq_2_pin + irq;
371
372 while (1) {
373 if (entry->apic == oldapic && entry->pin == oldpin) {
374 entry->apic = newapic;
375 entry->pin = newpin;
376 }
377 if (!entry->next)
378 break;
379 entry = irq_2_pin + entry->next;
380 }
381}
382
363 383
364#define DO_ACTION(name,R,ACTION, FINAL) \ 384#define DO_ACTION(name,R,ACTION, FINAL) \
365 \ 385 \
366 static void name##_IO_APIC_irq (unsigned int irq) \ 386 static void name##_IO_APIC_irq (unsigned int irq) \
367 __DO_ACTION(R, ACTION, FINAL) 387 __DO_ACTION(R, ACTION, FINAL)
368 388
369DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) 389/* mask = 1 */
370 /* mask = 1 */ 390DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
371DO_ACTION( __unmask, 0, &= 0xfffeffff, ) 391
372 /* mask = 0 */ 392/* mask = 0 */
393DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, )
373 394
374static void mask_IO_APIC_irq (unsigned int irq) 395static void mask_IO_APIC_irq (unsigned int irq)
375{ 396{
@@ -430,20 +451,6 @@ static int __init disable_timer_pin_setup(char *arg)
430} 451}
431__setup("disable_timer_pin_1", disable_timer_pin_setup); 452__setup("disable_timer_pin_1", disable_timer_pin_setup);
432 453
433static int __init setup_disable_8254_timer(char *s)
434{
435 timer_over_8254 = -1;
436 return 1;
437}
438static int __init setup_enable_8254_timer(char *s)
439{
440 timer_over_8254 = 2;
441 return 1;
442}
443
444__setup("disable_8254_timer", setup_disable_8254_timer);
445__setup("enable_8254_timer", setup_enable_8254_timer);
446
447 454
448/* 455/*
449 * Find the IRQ entry number of a certain pin. 456 * Find the IRQ entry number of a certain pin.
@@ -911,26 +918,21 @@ static void __init setup_IO_APIC_irqs(void)
911} 918}
912 919
913/* 920/*
914 * Set up the 8259A-master output pin as broadcast to all 921 * Set up the timer pin, possibly with the 8259A-master behind.
915 * CPUs.
916 */ 922 */
917static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) 923static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
924 int vector)
918{ 925{
919 struct IO_APIC_route_entry entry; 926 struct IO_APIC_route_entry entry;
920 927
921 memset(&entry, 0, sizeof(entry)); 928 memset(&entry, 0, sizeof(entry));
922 929
923 disable_8259A_irq(0);
924
925 /* mask LVT0 */
926 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
927
928 /* 930 /*
929 * We use logical delivery to get the timer IRQ 931 * We use logical delivery to get the timer IRQ
930 * to the first CPU. 932 * to the first CPU.
931 */ 933 */
932 entry.dest_mode = INT_DEST_MODE; 934 entry.dest_mode = INT_DEST_MODE;
933 entry.mask = 0; /* unmask IRQ now */ 935 entry.mask = 1; /* mask IRQ now */
934 entry.dest = cpu_mask_to_apicid(TARGET_CPUS); 936 entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
935 entry.delivery_mode = INT_DELIVERY_MODE; 937 entry.delivery_mode = INT_DELIVERY_MODE;
936 entry.polarity = 0; 938 entry.polarity = 0;
@@ -939,7 +941,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
939 941
940 /* 942 /*
941 * The timer IRQ doesn't have to know that behind the 943 * The timer IRQ doesn't have to know that behind the
942 * scene we have a 8259A-master in AEOI mode ... 944 * scene we may have a 8259A-master in AEOI mode ...
943 */ 945 */
944 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 946 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
945 947
@@ -947,8 +949,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
947 * Add it to the IO-APIC irq-routing table: 949 * Add it to the IO-APIC irq-routing table:
948 */ 950 */
949 ioapic_write_entry(apic, pin, entry); 951 ioapic_write_entry(apic, pin, entry);
950
951 enable_8259A_irq(0);
952} 952}
953 953
954void __apicdebuginit print_IO_APIC(void) 954void __apicdebuginit print_IO_APIC(void)
@@ -1077,6 +1077,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1077 1077
1078 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1078 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1079 smp_processor_id(), hard_smp_processor_id()); 1079 smp_processor_id(), hard_smp_processor_id());
1080 v = apic_read(APIC_ID);
1080 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1081 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
1081 v = apic_read(APIC_LVR); 1082 v = apic_read(APIC_LVR);
1082 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1083 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
@@ -1659,6 +1660,7 @@ static inline void __init check_timer(void)
1659 struct irq_cfg *cfg = irq_cfg + 0; 1660 struct irq_cfg *cfg = irq_cfg + 0;
1660 int apic1, pin1, apic2, pin2; 1661 int apic1, pin1, apic2, pin2;
1661 unsigned long flags; 1662 unsigned long flags;
1663 int no_pin1 = 0;
1662 1664
1663 local_irq_save(flags); 1665 local_irq_save(flags);
1664 1666
@@ -1669,16 +1671,11 @@ static inline void __init check_timer(void)
1669 assign_irq_vector(0, TARGET_CPUS); 1671 assign_irq_vector(0, TARGET_CPUS);
1670 1672
1671 /* 1673 /*
1672 * Subtle, code in do_timer_interrupt() expects an AEOI 1674 * As IRQ0 is to be enabled in the 8259A, the virtual
1673 * mode for the 8259A whenever interrupts are routed 1675 * wire has to be disabled in the local APIC.
1674 * through I/O APICs. Also IRQ0 has to be enabled in
1675 * the 8259A which implies the virtual wire has to be
1676 * disabled in the local APIC.
1677 */ 1676 */
1678 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 1677 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
1679 init_8259A(1); 1678 init_8259A(1);
1680 if (timer_over_8254 > 0)
1681 enable_8259A_irq(0);
1682 1679
1683 pin1 = find_isa_irq_pin(0, mp_INT); 1680 pin1 = find_isa_irq_pin(0, mp_INT);
1684 apic1 = find_isa_irq_apic(0, mp_INT); 1681 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -1688,15 +1685,39 @@ static inline void __init check_timer(void)
1688 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", 1685 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
1689 cfg->vector, apic1, pin1, apic2, pin2); 1686 cfg->vector, apic1, pin1, apic2, pin2);
1690 1687
1688 /*
1689 * Some BIOS writers are clueless and report the ExtINTA
1690 * I/O APIC input from the cascaded 8259A as the timer
1691 * interrupt input. So just in case, if only one pin
1692 * was found above, try it both directly and through the
1693 * 8259A.
1694 */
1695 if (pin1 == -1) {
1696 pin1 = pin2;
1697 apic1 = apic2;
1698 no_pin1 = 1;
1699 } else if (pin2 == -1) {
1700 pin2 = pin1;
1701 apic2 = apic1;
1702 }
1703
1704 replace_pin_at_irq(0, 0, 0, apic1, pin1);
1705 apic1 = 0;
1706 pin1 = 0;
1707 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
1708
1691 if (pin1 != -1) { 1709 if (pin1 != -1) {
1692 /* 1710 /*
1693 * Ok, does IRQ0 through the IOAPIC work? 1711 * Ok, does IRQ0 through the IOAPIC work?
1694 */ 1712 */
1713 if (no_pin1) {
1714 add_pin_to_irq(0, apic1, pin1);
1715 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
1716 }
1695 unmask_IO_APIC_irq(0); 1717 unmask_IO_APIC_irq(0);
1696 if (!no_timer_check && timer_irq_works()) { 1718 if (!no_timer_check && timer_irq_works()) {
1697 nmi_watchdog_default(); 1719 nmi_watchdog_default();
1698 if (nmi_watchdog == NMI_IO_APIC) { 1720 if (nmi_watchdog == NMI_IO_APIC) {
1699 disable_8259A_irq(0);
1700 setup_nmi(); 1721 setup_nmi();
1701 enable_8259A_irq(0); 1722 enable_8259A_irq(0);
1702 } 1723 }
@@ -1705,42 +1726,48 @@ static inline void __init check_timer(void)
1705 goto out; 1726 goto out;
1706 } 1727 }
1707 clear_IO_APIC_pin(apic1, pin1); 1728 clear_IO_APIC_pin(apic1, pin1);
1708 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " 1729 if (!no_pin1)
1709 "connected to IO-APIC\n"); 1730 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: "
1710 } 1731 "8254 timer not connected to IO-APIC\n");
1711 1732
1712 apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " 1733 apic_printk(APIC_VERBOSE,KERN_INFO
1713 "through the 8259A ... "); 1734 "...trying to set up timer (IRQ0) "
1714 if (pin2 != -1) { 1735 "through the 8259A ... ");
1715 apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", 1736 apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
1716 apic2, pin2); 1737 apic2, pin2);
1717 /* 1738 /*
1718 * legacy devices should be connected to IO APIC #0 1739 * legacy devices should be connected to IO APIC #0
1719 */ 1740 */
1720 setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector); 1741 replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
1742 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
1743 unmask_IO_APIC_irq(0);
1744 enable_8259A_irq(0);
1721 if (timer_irq_works()) { 1745 if (timer_irq_works()) {
1722 apic_printk(APIC_VERBOSE," works.\n"); 1746 apic_printk(APIC_VERBOSE," works.\n");
1747 timer_through_8259 = 1;
1723 nmi_watchdog_default(); 1748 nmi_watchdog_default();
1724 if (nmi_watchdog == NMI_IO_APIC) { 1749 if (nmi_watchdog == NMI_IO_APIC) {
1750 disable_8259A_irq(0);
1725 setup_nmi(); 1751 setup_nmi();
1752 enable_8259A_irq(0);
1726 } 1753 }
1727 goto out; 1754 goto out;
1728 } 1755 }
1729 /* 1756 /*
1730 * Cleanup, just in case ... 1757 * Cleanup, just in case ...
1731 */ 1758 */
1759 disable_8259A_irq(0);
1732 clear_IO_APIC_pin(apic2, pin2); 1760 clear_IO_APIC_pin(apic2, pin2);
1761 apic_printk(APIC_VERBOSE," failed.\n");
1733 } 1762 }
1734 apic_printk(APIC_VERBOSE," failed.\n");
1735 1763
1736 if (nmi_watchdog == NMI_IO_APIC) { 1764 if (nmi_watchdog == NMI_IO_APIC) {
1737 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); 1765 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
1738 nmi_watchdog = 0; 1766 nmi_watchdog = NMI_NONE;
1739 } 1767 }
1740 1768
1741 apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); 1769 apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
1742 1770
1743 disable_8259A_irq(0);
1744 irq_desc[0].chip = &lapic_irq_type; 1771 irq_desc[0].chip = &lapic_irq_type;
1745 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 1772 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
1746 enable_8259A_irq(0); 1773 enable_8259A_irq(0);
@@ -1749,6 +1776,7 @@ static inline void __init check_timer(void)
1749 apic_printk(APIC_VERBOSE," works.\n"); 1776 apic_printk(APIC_VERBOSE," works.\n");
1750 goto out; 1777 goto out;
1751 } 1778 }
1779 disable_8259A_irq(0);
1752 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 1780 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
1753 apic_printk(APIC_VERBOSE," failed.\n"); 1781 apic_printk(APIC_VERBOSE," failed.\n");
1754 1782
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index c0df7b89ca23..9d98cda39ad9 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -8,7 +8,6 @@
8#include <linux/kernel_stat.h> 8#include <linux/kernel_stat.h>
9#include <linux/mc146818rtc.h> 9#include <linux/mc146818rtc.h>
10#include <linux/cache.h> 10#include <linux/cache.h>
11#include <linux/interrupt.h>
12#include <linux/cpu.h> 11#include <linux/cpu.h>
13#include <linux/module.h> 12#include <linux/module.h>
14 13
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 147352df28b9..468acd04aa2e 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -313,16 +313,20 @@ skip:
313 per_cpu(irq_stat,j).irq_tlb_count); 313 per_cpu(irq_stat,j).irq_tlb_count);
314 seq_printf(p, " TLB shootdowns\n"); 314 seq_printf(p, " TLB shootdowns\n");
315#endif 315#endif
316#ifdef CONFIG_X86_MCE
316 seq_printf(p, "TRM: "); 317 seq_printf(p, "TRM: ");
317 for_each_online_cpu(j) 318 for_each_online_cpu(j)
318 seq_printf(p, "%10u ", 319 seq_printf(p, "%10u ",
319 per_cpu(irq_stat,j).irq_thermal_count); 320 per_cpu(irq_stat,j).irq_thermal_count);
320 seq_printf(p, " Thermal event interrupts\n"); 321 seq_printf(p, " Thermal event interrupts\n");
322#endif
323#ifdef CONFIG_X86_LOCAL_APIC
321 seq_printf(p, "SPU: "); 324 seq_printf(p, "SPU: ");
322 for_each_online_cpu(j) 325 for_each_online_cpu(j)
323 seq_printf(p, "%10u ", 326 seq_printf(p, "%10u ",
324 per_cpu(irq_stat,j).irq_spurious_count); 327 per_cpu(irq_stat,j).irq_spurious_count);
325 seq_printf(p, " Spurious interrupts\n"); 328 seq_printf(p, " Spurious interrupts\n");
329#endif
326 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); 330 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
327#if defined(CONFIG_X86_IO_APIC) 331#if defined(CONFIG_X86_IO_APIC)
328 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); 332 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
@@ -331,6 +335,40 @@ skip:
331 return 0; 335 return 0;
332} 336}
333 337
338/*
339 * /proc/stat helpers
340 */
341u64 arch_irq_stat_cpu(unsigned int cpu)
342{
343 u64 sum = nmi_count(cpu);
344
345#ifdef CONFIG_X86_LOCAL_APIC
346 sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
347#endif
348#ifdef CONFIG_SMP
349 sum += per_cpu(irq_stat, cpu).irq_resched_count;
350 sum += per_cpu(irq_stat, cpu).irq_call_count;
351 sum += per_cpu(irq_stat, cpu).irq_tlb_count;
352#endif
353#ifdef CONFIG_X86_MCE
354 sum += per_cpu(irq_stat, cpu).irq_thermal_count;
355#endif
356#ifdef CONFIG_X86_LOCAL_APIC
357 sum += per_cpu(irq_stat, cpu).irq_spurious_count;
358#endif
359 return sum;
360}
361
362u64 arch_irq_stat(void)
363{
364 u64 sum = atomic_read(&irq_err_count);
365
366#ifdef CONFIG_X86_IO_APIC
367 sum += atomic_read(&irq_mis_count);
368#endif
369 return sum;
370}
371
334#ifdef CONFIG_HOTPLUG_CPU 372#ifdef CONFIG_HOTPLUG_CPU
335#include <mach_apic.h> 373#include <mach_apic.h>
336 374
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 3aac15466a91..1f78b238d8d2 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -135,6 +135,7 @@ skip:
135 seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); 135 seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
136 seq_printf(p, " TLB shootdowns\n"); 136 seq_printf(p, " TLB shootdowns\n");
137#endif 137#endif
138#ifdef CONFIG_X86_MCE
138 seq_printf(p, "TRM: "); 139 seq_printf(p, "TRM: ");
139 for_each_online_cpu(j) 140 for_each_online_cpu(j)
140 seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); 141 seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
@@ -143,6 +144,7 @@ skip:
143 for_each_online_cpu(j) 144 for_each_online_cpu(j)
144 seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); 145 seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
145 seq_printf(p, " Threshold APIC interrupts\n"); 146 seq_printf(p, " Threshold APIC interrupts\n");
147#endif
146 seq_printf(p, "SPU: "); 148 seq_printf(p, "SPU: ");
147 for_each_online_cpu(j) 149 for_each_online_cpu(j)
148 seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); 150 seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
@@ -153,6 +155,32 @@ skip:
153} 155}
154 156
155/* 157/*
158 * /proc/stat helpers
159 */
160u64 arch_irq_stat_cpu(unsigned int cpu)
161{
162 u64 sum = cpu_pda(cpu)->__nmi_count;
163
164 sum += cpu_pda(cpu)->apic_timer_irqs;
165#ifdef CONFIG_SMP
166 sum += cpu_pda(cpu)->irq_resched_count;
167 sum += cpu_pda(cpu)->irq_call_count;
168 sum += cpu_pda(cpu)->irq_tlb_count;
169#endif
170#ifdef CONFIG_X86_MCE
171 sum += cpu_pda(cpu)->irq_thermal_count;
172 sum += cpu_pda(cpu)->irq_threshold_count;
173#endif
174 sum += cpu_pda(cpu)->irq_spurious_count;
175 return sum;
176}
177
178u64 arch_irq_stat(void)
179{
180 return atomic_read(&irq_err_count);
181}
182
183/*
156 * do_IRQ handles all normal device IRQ's (the special 184 * do_IRQ handles all normal device IRQ's (the special
157 * SMP cross-CPU interrupts have their own specific 185 * SMP cross-CPU interrupts have their own specific
158 * handlers). 186 * handlers).
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
new file mode 100644
index 000000000000..d66914287ee1
--- /dev/null
+++ b/arch/x86/kernel/irqinit_32.c
@@ -0,0 +1,114 @@
1#include <linux/errno.h>
2#include <linux/signal.h>
3#include <linux/sched.h>
4#include <linux/ioport.h>
5#include <linux/interrupt.h>
6#include <linux/slab.h>
7#include <linux/random.h>
8#include <linux/init.h>
9#include <linux/kernel_stat.h>
10#include <linux/sysdev.h>
11#include <linux/bitops.h>
12
13#include <asm/atomic.h>
14#include <asm/system.h>
15#include <asm/io.h>
16#include <asm/timer.h>
17#include <asm/pgtable.h>
18#include <asm/delay.h>
19#include <asm/desc.h>
20#include <asm/apic.h>
21#include <asm/arch_hooks.h>
22#include <asm/i8259.h>
23
24
25
26/*
27 * Note that on a 486, we don't want to do a SIGFPE on an irq13
28 * as the irq is unreliable, and exception 16 works correctly
29 * (ie as explained in the intel literature). On a 386, you
30 * can't use exception 16 due to bad IBM design, so we have to
31 * rely on the less exact irq13.
32 *
33 * Careful.. Not only is IRQ13 unreliable, but it is also
34 * leads to races. IBM designers who came up with it should
35 * be shot.
36 */
37
38
39static irqreturn_t math_error_irq(int cpl, void *dev_id)
40{
41 extern void math_error(void __user *);
42 outb(0,0xF0);
43 if (ignore_fpu_irq || !boot_cpu_data.hard_math)
44 return IRQ_NONE;
45 math_error((void __user *)get_irq_regs()->ip);
46 return IRQ_HANDLED;
47}
48
49/*
50 * New motherboards sometimes make IRQ 13 be a PCI interrupt,
51 * so allow interrupt sharing.
52 */
53static struct irqaction fpu_irq = {
54 .handler = math_error_irq,
55 .mask = CPU_MASK_NONE,
56 .name = "fpu",
57};
58
59void __init init_ISA_irqs (void)
60{
61 int i;
62
63#ifdef CONFIG_X86_LOCAL_APIC
64 init_bsp_APIC();
65#endif
66 init_8259A(0);
67
68 /*
69 * 16 old-style INTA-cycle interrupts:
70 */
71 for (i = 0; i < 16; i++) {
72 set_irq_chip_and_handler_name(i, &i8259A_chip,
73 handle_level_irq, "XT");
74 }
75}
76
77/* Overridden in paravirt.c */
78void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
79
80void __init native_init_IRQ(void)
81{
82 int i;
83
84 /* all the set up before the call gates are initialised */
85 pre_intr_init_hook();
86
87 /*
88 * Cover the whole vector space, no vector can escape
89 * us. (some of these will be overridden and become
90 * 'special' SMP interrupts)
91 */
92 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
93 int vector = FIRST_EXTERNAL_VECTOR + i;
94 if (i >= NR_IRQS)
95 break;
96 /* SYSCALL_VECTOR was reserved in trap_init. */
97 if (!test_bit(vector, used_vectors))
98 set_intr_gate(vector, interrupt[i]);
99 }
100
101 /* setup after call gates are initialised (usually add in
102 * the architecture specific gates)
103 */
104 intr_init_hook();
105
106 /*
107 * External FPU? Set up irq13 if so, for
108 * original braindamaged IBM FERR coupling.
109 */
110 if (boot_cpu_data.hard_math && !cpu_has_fpu)
111 setup_irq(FPU_IRQ, &fpu_irq);
112
113 irq_ctx_init(smp_processor_id());
114}
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
new file mode 100644
index 000000000000..64bc0f14285f
--- /dev/null
+++ b/arch/x86/kernel/irqinit_64.c
@@ -0,0 +1,203 @@
1#include <linux/linkage.h>
2#include <linux/errno.h>
3#include <linux/signal.h>
4#include <linux/sched.h>
5#include <linux/ioport.h>
6#include <linux/interrupt.h>
7#include <linux/timex.h>
8#include <linux/slab.h>
9#include <linux/random.h>
10#include <linux/init.h>
11#include <linux/kernel_stat.h>
12#include <linux/sysdev.h>
13#include <linux/bitops.h>
14
15#include <asm/acpi.h>
16#include <asm/atomic.h>
17#include <asm/system.h>
18#include <asm/io.h>
19#include <asm/hw_irq.h>
20#include <asm/pgtable.h>
21#include <asm/delay.h>
22#include <asm/desc.h>
23#include <asm/apic.h>
24#include <asm/i8259.h>
25
26/*
27 * Common place to define all x86 IRQ vectors
28 *
29 * This builds up the IRQ handler stubs using some ugly macros in irq.h
30 *
31 * These macros create the low-level assembly IRQ routines that save
32 * register context and call do_IRQ(). do_IRQ() then does all the
33 * operations that are needed to keep the AT (or SMP IOAPIC)
34 * interrupt-controller happy.
35 */
36
37#define BI(x,y) \
38 BUILD_IRQ(x##y)
39
40#define BUILD_16_IRQS(x) \
41 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
42 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
43 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
44 BI(x,c) BI(x,d) BI(x,e) BI(x,f)
45
46/*
47 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
48 * (these are usually mapped to vectors 0x30-0x3f)
49 */
50
51/*
52 * The IO-APIC gives us many more interrupt sources. Most of these
53 * are unused but an SMP system is supposed to have enough memory ...
54 * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
55 * across the spectrum, so we really want to be prepared to get all
56 * of these. Plus, more powerful systems might have more than 64
57 * IO-APIC registers.
58 *
59 * (these are usually mapped into the 0x30-0xff vector range)
60 */
61 BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
62BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
63BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
64BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
65
66#undef BUILD_16_IRQS
67#undef BI
68
69
70#define IRQ(x,y) \
71 IRQ##x##y##_interrupt
72
73#define IRQLIST_16(x) \
74 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
75 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
76 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
77 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
78
79/* for the irq vectors */
80static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
81 IRQLIST_16(0x2), IRQLIST_16(0x3),
82 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
83 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
84 IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
85};
86
87#undef IRQ
88#undef IRQLIST_16
89
90
91
92
93/*
94 * IRQ2 is cascade interrupt to second interrupt controller
95 */
96
97static struct irqaction irq2 = {
98 .handler = no_action,
99 .mask = CPU_MASK_NONE,
100 .name = "cascade",
101};
102DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
103 [0 ... IRQ0_VECTOR - 1] = -1,
104 [IRQ0_VECTOR] = 0,
105 [IRQ1_VECTOR] = 1,
106 [IRQ2_VECTOR] = 2,
107 [IRQ3_VECTOR] = 3,
108 [IRQ4_VECTOR] = 4,
109 [IRQ5_VECTOR] = 5,
110 [IRQ6_VECTOR] = 6,
111 [IRQ7_VECTOR] = 7,
112 [IRQ8_VECTOR] = 8,
113 [IRQ9_VECTOR] = 9,
114 [IRQ10_VECTOR] = 10,
115 [IRQ11_VECTOR] = 11,
116 [IRQ12_VECTOR] = 12,
117 [IRQ13_VECTOR] = 13,
118 [IRQ14_VECTOR] = 14,
119 [IRQ15_VECTOR] = 15,
120 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
121};
122
123static void __init init_ISA_irqs (void)
124{
125 int i;
126
127 init_bsp_APIC();
128 init_8259A(0);
129
130 for (i = 0; i < NR_IRQS; i++) {
131 irq_desc[i].status = IRQ_DISABLED;
132 irq_desc[i].action = NULL;
133 irq_desc[i].depth = 1;
134
135 if (i < 16) {
136 /*
137 * 16 old-style INTA-cycle interrupts:
138 */
139 set_irq_chip_and_handler_name(i, &i8259A_chip,
140 handle_level_irq, "XT");
141 } else {
142 /*
143 * 'high' PCI IRQs filled in on demand
144 */
145 irq_desc[i].chip = &no_irq_chip;
146 }
147 }
148}
149
150void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
151
152void __init native_init_IRQ(void)
153{
154 int i;
155
156 init_ISA_irqs();
157 /*
158 * Cover the whole vector space, no vector can escape
159 * us. (some of these will be overridden and become
160 * 'special' SMP interrupts)
161 */
162 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
163 int vector = FIRST_EXTERNAL_VECTOR + i;
164 if (vector != IA32_SYSCALL_VECTOR)
165 set_intr_gate(vector, interrupt[i]);
166 }
167
168#ifdef CONFIG_SMP
169 /*
170 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
171 * IPI, driven by wakeup.
172 */
173 set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
174
175 /* IPIs for invalidation */
176 set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
177 set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
178 set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
179 set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
180 set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
181 set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
182 set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
183 set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
184
185 /* IPI for generic function call */
186 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
187
188 /* Low priority IPI to cleanup after moving an irq */
189 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
190#endif
191 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
192 set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
193
194 /* self generated IPI for local APIC timer */
195 set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
196
197 /* IPI vectors for APIC spurious and error interrupts */
198 set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
199 set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
200
201 if (!acpi_ioapic)
202 setup_irq(2, &irq2);
203}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 08a30986d472..87edf1ceb1df 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/clocksource.h> 19#include <linux/clocksource.h>
20#include <linux/kvm_para.h> 20#include <linux/kvm_para.h>
21#include <asm/pvclock.h>
21#include <asm/arch_hooks.h> 22#include <asm/arch_hooks.h>
22#include <asm/msr.h> 23#include <asm/msr.h>
23#include <asm/apic.h> 24#include <asm/apic.h>
@@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg)
36early_param("no-kvmclock", parse_no_kvmclock); 37early_param("no-kvmclock", parse_no_kvmclock);
37 38
38/* The hypervisor will put information about time periodically here */ 39/* The hypervisor will put information about time periodically here */
39static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); 40static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
40#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field 41static struct pvclock_wall_clock wall_clock;
41 42
42static inline u64 kvm_get_delta(u64 last_tsc)
43{
44 int cpu = smp_processor_id();
45 u64 delta = native_read_tsc() - last_tsc;
46 return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
47}
48
49static struct kvm_wall_clock wall_clock;
50static cycle_t kvm_clock_read(void);
51/* 43/*
52 * The wallclock is the time of day when we booted. Since then, some time may 44 * The wallclock is the time of day when we booted. Since then, some time may
53 * have elapsed since the hypervisor wrote the data. So we try to account for 45 * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void);
55 */ 47 */
56static unsigned long kvm_get_wallclock(void) 48static unsigned long kvm_get_wallclock(void)
57{ 49{
58 u32 wc_sec, wc_nsec; 50 struct pvclock_vcpu_time_info *vcpu_time;
59 u64 delta;
60 struct timespec ts; 51 struct timespec ts;
61 int version, nsec;
62 int low, high; 52 int low, high;
63 53
64 low = (int)__pa(&wall_clock); 54 low = (int)__pa(&wall_clock);
65 high = ((u64)__pa(&wall_clock) >> 32); 55 high = ((u64)__pa(&wall_clock) >> 32);
56 native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
66 57
67 delta = kvm_clock_read(); 58 vcpu_time = &get_cpu_var(hv_clock);
59 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
60 put_cpu_var(hv_clock);
68 61
69 native_write_msr(MSR_KVM_WALL_CLOCK, low, high); 62 return ts.tv_sec;
70 do {
71 version = wall_clock.wc_version;
72 rmb();
73 wc_sec = wall_clock.wc_sec;
74 wc_nsec = wall_clock.wc_nsec;
75 rmb();
76 } while ((wall_clock.wc_version != version) || (version & 1));
77
78 delta = kvm_clock_read() - delta;
79 delta += wc_nsec;
80 nsec = do_div(delta, NSEC_PER_SEC);
81 set_normalized_timespec(&ts, wc_sec + delta, nsec);
82 /*
83 * Of all mechanisms of time adjustment I've tested, this one
84 * was the champion!
85 */
86 return ts.tv_sec + 1;
87} 63}
88 64
89static int kvm_set_wallclock(unsigned long now) 65static int kvm_set_wallclock(unsigned long now)
90{ 66{
91 return 0; 67 return -1;
92} 68}
93 69
94/*
95 * This is our read_clock function. The host puts an tsc timestamp each time
96 * it updates a new time. Without the tsc adjustment, we can have a situation
97 * in which a vcpu starts to run earlier (smaller system_time), but probes
98 * time later (compared to another vcpu), leading to backwards time
99 */
100static cycle_t kvm_clock_read(void) 70static cycle_t kvm_clock_read(void)
101{ 71{
102 u64 last_tsc, now; 72 struct pvclock_vcpu_time_info *src;
103 int cpu; 73 cycle_t ret;
104 74
105 preempt_disable(); 75 src = &get_cpu_var(hv_clock);
106 cpu = smp_processor_id(); 76 ret = pvclock_clocksource_read(src);
107 77 put_cpu_var(hv_clock);
108 last_tsc = get_clock(cpu, tsc_timestamp); 78 return ret;
109 now = get_clock(cpu, system_time);
110
111 now += kvm_get_delta(last_tsc);
112 preempt_enable();
113
114 return now;
115} 79}
80
116static struct clocksource kvm_clock = { 81static struct clocksource kvm_clock = {
117 .name = "kvm-clock", 82 .name = "kvm-clock",
118 .read = kvm_clock_read, 83 .read = kvm_clock_read,
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
123 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 88 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
124}; 89};
125 90
126static int kvm_register_clock(void) 91static int kvm_register_clock(char *txt)
127{ 92{
128 int cpu = smp_processor_id(); 93 int cpu = smp_processor_id();
129 int low, high; 94 int low, high;
130 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; 95 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
131 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); 96 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
132 97 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
98 cpu, high, low, txt);
133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); 99 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
134} 100}
135 101
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
140 * Now that the first cpu already had this clocksource initialized, 106 * Now that the first cpu already had this clocksource initialized,
141 * we shouldn't fail. 107 * we shouldn't fail.
142 */ 108 */
143 WARN_ON(kvm_register_clock()); 109 WARN_ON(kvm_register_clock("secondary cpu clock"));
144 /* ok, done with our trickery, call native */ 110 /* ok, done with our trickery, call native */
145 setup_secondary_APIC_clock(); 111 setup_secondary_APIC_clock();
146} 112}
147#endif 113#endif
148 114
115#ifdef CONFIG_SMP
116void __init kvm_smp_prepare_boot_cpu(void)
117{
118 WARN_ON(kvm_register_clock("primary cpu clock"));
119 native_smp_prepare_boot_cpu();
120}
121#endif
122
149/* 123/*
150 * After the clock is registered, the host will keep writing to the 124 * After the clock is registered, the host will keep writing to the
151 * registered memory location. If the guest happens to shutdown, this memory 125 * registered memory location. If the guest happens to shutdown, this memory
@@ -174,7 +148,7 @@ void __init kvmclock_init(void)
174 return; 148 return;
175 149
176 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { 150 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
177 if (kvm_register_clock()) 151 if (kvm_register_clock("boot clock"))
178 return; 152 return;
179 pv_time_ops.get_wallclock = kvm_get_wallclock; 153 pv_time_ops.get_wallclock = kvm_get_wallclock;
180 pv_time_ops.set_wallclock = kvm_set_wallclock; 154 pv_time_ops.set_wallclock = kvm_set_wallclock;
@@ -182,6 +156,9 @@ void __init kvmclock_init(void)
182#ifdef CONFIG_X86_LOCAL_APIC 156#ifdef CONFIG_X86_LOCAL_APIC
183 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; 157 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
184#endif 158#endif
159#ifdef CONFIG_SMP
160 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
161#endif
185 machine_ops.shutdown = kvm_shutdown; 162 machine_ops.shutdown = kvm_shutdown;
186#ifdef CONFIG_KEXEC 163#ifdef CONFIG_KEXEC
187 machine_ops.crash_shutdown = kvm_crash_shutdown; 164 machine_ops.crash_shutdown = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 0224c3637c73..21f2bae98c15 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -20,9 +20,9 @@
20#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
21 21
22#ifdef CONFIG_SMP 22#ifdef CONFIG_SMP
23static void flush_ldt(void *null) 23static void flush_ldt(void *current_mm)
24{ 24{
25 if (current->active_mm) 25 if (current->active_mm == current_mm)
26 load_LDT(&current->active_mm->context); 26 load_LDT(&current->active_mm->context);
27} 27}
28#endif 28#endif
@@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
68 load_LDT(pc); 68 load_LDT(pc);
69 mask = cpumask_of_cpu(smp_processor_id()); 69 mask = cpumask_of_cpu(smp_processor_id());
70 if (!cpus_equal(current->mm->cpu_vm_mask, mask)) 70 if (!cpus_equal(current->mm->cpu_vm_mask, mask))
71 smp_call_function(flush_ldt, NULL, 1, 1); 71 smp_call_function(flush_ldt, current->mm, 1, 1);
72 preempt_enable(); 72 preempt_enable();
73#else 73#else
74 load_LDT(pc); 74 load_LDT(pc);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index d0b234c9fc31..f4960171bc66 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -39,7 +39,7 @@ static void set_idt(void *newidt, __u16 limit)
39 curidt.address = (unsigned long)newidt; 39 curidt.address = (unsigned long)newidt;
40 40
41 load_idt(&curidt); 41 load_idt(&curidt);
42}; 42}
43 43
44 44
45static void set_gdt(void *newgdt, __u16 limit) 45static void set_gdt(void *newgdt, __u16 limit)
@@ -51,7 +51,7 @@ static void set_gdt(void *newgdt, __u16 limit)
51 curgdt.address = (unsigned long)newgdt; 51 curgdt.address = (unsigned long)newgdt;
52 52
53 load_gdt(&curgdt); 53 load_gdt(&curgdt);
54}; 54}
55 55
56static void load_segments(void) 56static void load_segments(void)
57{ 57{
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 69729e38b78a..9758fea87c5b 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -5,13 +5,14 @@
5 * 2006 Shaohua Li <shaohua.li@intel.com> 5 * 2006 Shaohua Li <shaohua.li@intel.com>
6 * 6 *
7 * This driver allows to upgrade microcode on Intel processors 7 * This driver allows to upgrade microcode on Intel processors
8 * belonging to IA-32 family - PentiumPro, Pentium II, 8 * belonging to IA-32 family - PentiumPro, Pentium II,
9 * Pentium III, Xeon, Pentium 4, etc. 9 * Pentium III, Xeon, Pentium 4, etc.
10 * 10 *
11 * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, 11 * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
12 * Order Number 245472 or free download from: 12 * Software Developer's Manual
13 * 13 * Order Number 253668 or free download from:
14 * http://developer.intel.com/design/pentium4/manuals/245472.htm 14 *
15 * http://developer.intel.com/design/pentium4/manuals/253668.htm
15 * 16 *
16 * For more information, go to http://www.urbanmyth.org/microcode 17 * For more information, go to http://www.urbanmyth.org/microcode
17 * 18 *
@@ -58,12 +59,12 @@
58 * nature of implementation. 59 * nature of implementation.
59 * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com> 60 * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
60 * Fix the panic when writing zero-length microcode chunk. 61 * Fix the panic when writing zero-length microcode chunk.
61 * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, 62 * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
62 * Jun Nakajima <jun.nakajima@intel.com> 63 * Jun Nakajima <jun.nakajima@intel.com>
63 * Support for the microcode updates in the new format. 64 * Support for the microcode updates in the new format.
64 * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com> 65 * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
65 * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl 66 * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
66 * because we no longer hold a copy of applied microcode 67 * because we no longer hold a copy of applied microcode
67 * in kernel memory. 68 * in kernel memory.
68 * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com> 69 * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
69 * Fix sigmatch() macro to handle old CPUs with pf == 0. 70 * Fix sigmatch() macro to handle old CPUs with pf == 0.
@@ -320,11 +321,11 @@ static void apply_microcode(int cpu)
320 return; 321 return;
321 322
322 /* serialize access to the physical write to MSR 0x79 */ 323 /* serialize access to the physical write to MSR 0x79 */
323 spin_lock_irqsave(&microcode_update_lock, flags); 324 spin_lock_irqsave(&microcode_update_lock, flags);
324 325
325 /* write microcode via MSR 0x79 */ 326 /* write microcode via MSR 0x79 */
326 wrmsr(MSR_IA32_UCODE_WRITE, 327 wrmsr(MSR_IA32_UCODE_WRITE,
327 (unsigned long) uci->mc->bits, 328 (unsigned long) uci->mc->bits,
328 (unsigned long) uci->mc->bits >> 16 >> 16); 329 (unsigned long) uci->mc->bits >> 16 >> 16);
329 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 330 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
330 331
@@ -341,7 +342,7 @@ static void apply_microcode(int cpu)
341 return; 342 return;
342 } 343 }
343 printk(KERN_INFO "microcode: CPU%d updated from revision " 344 printk(KERN_INFO "microcode: CPU%d updated from revision "
344 "0x%x to 0x%x, date = %08x \n", 345 "0x%x to 0x%x, date = %08x \n",
345 cpu_num, uci->rev, val[1], uci->mc->hdr.date); 346 cpu_num, uci->rev, val[1], uci->mc->hdr.date);
346 uci->rev = val[1]; 347 uci->rev = val[1];
347} 348}
@@ -534,7 +535,7 @@ static int cpu_request_microcode(int cpu)
534 c->x86, c->x86_model, c->x86_mask); 535 c->x86, c->x86_model, c->x86_mask);
535 error = request_firmware(&firmware, name, &microcode_pdev->dev); 536 error = request_firmware(&firmware, name, &microcode_pdev->dev);
536 if (error) { 537 if (error) {
537 pr_debug("microcode: ucode data file %s load failed\n", name); 538 pr_debug("microcode: data file %s load failed\n", name);
538 return error; 539 return error;
539 } 540 }
540 buf = firmware->data; 541 buf = firmware->data;
@@ -805,6 +806,9 @@ static int __init microcode_init (void)
805{ 806{
806 int error; 807 int error;
807 808
809 printk(KERN_INFO
810 "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
811
808 error = microcode_dev_init(); 812 error = microcode_dev_init();
809 if (error) 813 if (error)
810 return error; 814 return error;
@@ -825,9 +829,6 @@ static int __init microcode_init (void)
825 } 829 }
826 830
827 register_hotcpu_notifier(&mc_cpu_notifier); 831 register_hotcpu_notifier(&mc_cpu_notifier);
828
829 printk(KERN_INFO
830 "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
831 return 0; 832 return 0;
832} 833}
833 834
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index edc5fbfe85c0..fdfdc550b366 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -12,6 +12,7 @@
12#include <asm/io.h> 12#include <asm/io.h>
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/acpi.h> 14#include <asm/acpi.h>
15#include <asm/mmconfig.h>
15 16
16#include "../pci/pci.h" 17#include "../pci/pci.h"
17 18
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 84160f74eeb0..6580dae46277 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -24,8 +24,11 @@
24#include <linux/kdebug.h> 24#include <linux/kdebug.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26 26
27#include <asm/i8259.h>
28#include <asm/io_apic.h>
27#include <asm/smp.h> 29#include <asm/smp.h>
28#include <asm/nmi.h> 30#include <asm/nmi.h>
31#include <asm/timer.h>
29 32
30#include "mach_traps.h" 33#include "mach_traps.h"
31 34
@@ -81,7 +84,7 @@ int __init check_nmi_watchdog(void)
81 84
82 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); 85 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
83 if (!prev_nmi_count) 86 if (!prev_nmi_count)
84 return -1; 87 goto error;
85 88
86 printk(KERN_INFO "Testing NMI watchdog ... "); 89 printk(KERN_INFO "Testing NMI watchdog ... ");
87 90
@@ -118,7 +121,7 @@ int __init check_nmi_watchdog(void)
118 if (!atomic_read(&nmi_active)) { 121 if (!atomic_read(&nmi_active)) {
119 kfree(prev_nmi_count); 122 kfree(prev_nmi_count);
120 atomic_set(&nmi_active, -1); 123 atomic_set(&nmi_active, -1);
121 return -1; 124 goto error;
122 } 125 }
123 printk("OK.\n"); 126 printk("OK.\n");
124 127
@@ -129,6 +132,12 @@ int __init check_nmi_watchdog(void)
129 132
130 kfree(prev_nmi_count); 133 kfree(prev_nmi_count);
131 return 0; 134 return 0;
135error:
136 if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
137 disable_8259A_irq(0);
138 timer_ack = 0;
139
140 return -1;
132} 141}
133 142
134static int __init setup_nmi_watchdog(char *str) 143static int __init setup_nmi_watchdog(char *str)
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index 5a29ded994fa..0060e44e8989 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -21,6 +21,8 @@
21#include <linux/cpumask.h> 21#include <linux/cpumask.h>
22#include <linux/kdebug.h> 22#include <linux/kdebug.h>
23 23
24#include <asm/i8259.h>
25#include <asm/io_apic.h>
24#include <asm/smp.h> 26#include <asm/smp.h>
25#include <asm/nmi.h> 27#include <asm/nmi.h>
26#include <asm/proto.h> 28#include <asm/proto.h>
@@ -90,7 +92,7 @@ int __init check_nmi_watchdog(void)
90 92
91 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); 93 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
92 if (!prev_nmi_count) 94 if (!prev_nmi_count)
93 return -1; 95 goto error;
94 96
95 printk(KERN_INFO "Testing NMI watchdog ... "); 97 printk(KERN_INFO "Testing NMI watchdog ... ");
96 98
@@ -121,7 +123,7 @@ int __init check_nmi_watchdog(void)
121 if (!atomic_read(&nmi_active)) { 123 if (!atomic_read(&nmi_active)) {
122 kfree(prev_nmi_count); 124 kfree(prev_nmi_count);
123 atomic_set(&nmi_active, -1); 125 atomic_set(&nmi_active, -1);
124 return -1; 126 goto error;
125 } 127 }
126 printk("OK.\n"); 128 printk("OK.\n");
127 129
@@ -132,6 +134,11 @@ int __init check_nmi_watchdog(void)
132 134
133 kfree(prev_nmi_count); 135 kfree(prev_nmi_count);
134 return 0; 136 return 0;
137error:
138 if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
139 disable_8259A_irq(0);
140
141 return -1;
135} 142}
136 143
137static int __init setup_nmi_watchdog(char *str) 144static int __init setup_nmi_watchdog(char *str)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 74f0c5ea2a03..f1ab0f727007 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -380,6 +380,9 @@ struct pv_mmu_ops pv_mmu_ops = {
380 .pte_update = paravirt_nop, 380 .pte_update = paravirt_nop,
381 .pte_update_defer = paravirt_nop, 381 .pte_update_defer = paravirt_nop,
382 382
383 .ptep_modify_prot_start = __ptep_modify_prot_start,
384 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
385
383#ifdef CONFIG_HIGHPTE 386#ifdef CONFIG_HIGHPTE
384 .kmap_atomic_pte = kmap_atomic, 387 .kmap_atomic_pte = kmap_atomic,
385#endif 388#endif
@@ -403,6 +406,7 @@ struct pv_mmu_ops pv_mmu_ops = {
403#endif /* PAGETABLE_LEVELS >= 3 */ 406#endif /* PAGETABLE_LEVELS >= 3 */
404 407
405 .pte_val = native_pte_val, 408 .pte_val = native_pte_val,
409 .pte_flags = native_pte_val,
406 .pgd_val = native_pgd_val, 410 .pgd_val = native_pgd_val,
407 411
408 .make_pte = native_make_pte, 412 .make_pte = native_make_pte,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index dc00a1331ace..3c43109ba054 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -77,10 +77,14 @@ void __init dma32_reserve_bootmem(void)
77 if (end_pfn <= MAX_DMA32_PFN) 77 if (end_pfn <= MAX_DMA32_PFN)
78 return; 78 return;
79 79
80 /*
81 * check aperture_64.c allocate_aperture() for reason about
82 * using 512M as goal
83 */
80 align = 64ULL<<20; 84 align = 64ULL<<20;
81 size = round_up(dma32_bootmem_size, align); 85 size = round_up(dma32_bootmem_size, align);
82 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, 86 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
83 __pa(MAX_DMA_ADDRESS)); 87 512ULL<<20);
84 if (dma32_bootmem_ptr) 88 if (dma32_bootmem_ptr)
85 dma32_bootmem_size = size; 89 dma32_bootmem_size = size;
86 else 90 else
@@ -88,7 +92,6 @@ void __init dma32_reserve_bootmem(void)
88} 92}
89static void __init dma32_free_bootmem(void) 93static void __init dma32_free_bootmem(void)
90{ 94{
91 int node;
92 95
93 if (end_pfn <= MAX_DMA32_PFN) 96 if (end_pfn <= MAX_DMA32_PFN)
94 return; 97 return;
@@ -96,9 +99,7 @@ static void __init dma32_free_bootmem(void)
96 if (!dma32_bootmem_ptr) 99 if (!dma32_bootmem_ptr)
97 return; 100 return;
98 101
99 for_each_online_node(node) 102 free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size);
100 free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
101 dma32_bootmem_size);
102 103
103 dma32_bootmem_ptr = NULL; 104 dma32_bootmem_ptr = NULL;
104 dma32_bootmem_size = 0; 105 dma32_bootmem_size = 0;
@@ -357,7 +358,7 @@ int dma_supported(struct device *dev, u64 mask)
357EXPORT_SYMBOL(dma_supported); 358EXPORT_SYMBOL(dma_supported);
358 359
359/* Allocate DMA memory on node near device */ 360/* Allocate DMA memory on node near device */
360noinline struct page * 361static noinline struct page *
361dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) 362dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
362{ 363{
363 int node; 364 int node;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index aa8ec928caa8..021f3c684a62 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -104,7 +104,6 @@ static unsigned long alloc_iommu(struct device *dev, int size)
104 size, base_index, boundary_size, 0); 104 size, base_index, boundary_size, 0);
105 } 105 }
106 if (offset != -1) { 106 if (offset != -1) {
107 set_bit_string(iommu_gart_bitmap, offset, size);
108 next_bit = offset+size; 107 next_bit = offset+size;
109 if (next_bit >= iommu_pages) { 108 if (next_bit >= iommu_pages) {
110 next_bit = 0; 109 next_bit = 0;
@@ -534,8 +533,8 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
534 unsigned aper_size = 0, aper_base_32, aper_order; 533 unsigned aper_size = 0, aper_base_32, aper_order;
535 u64 aper_base; 534 u64 aper_base;
536 535
537 pci_read_config_dword(dev, 0x94, &aper_base_32); 536 pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
538 pci_read_config_dword(dev, 0x90, &aper_order); 537 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
539 aper_order = (aper_order >> 1) & 7; 538 aper_order = (aper_order >> 1) & 7;
540 539
541 aper_base = aper_base_32 & 0x7fff; 540 aper_base = aper_base_32 & 0x7fff;
@@ -549,14 +548,63 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
549 return aper_base; 548 return aper_base;
550} 549}
551 550
551static void enable_gart_translations(void)
552{
553 int i;
554
555 for (i = 0; i < num_k8_northbridges; i++) {
556 struct pci_dev *dev = k8_northbridges[i];
557
558 enable_gart_translation(dev, __pa(agp_gatt_table));
559 }
560}
561
562/*
563 * If fix_up_north_bridges is set, the north bridges have to be fixed up on
564 * resume in the same way as they are handled in gart_iommu_hole_init().
565 */
566static bool fix_up_north_bridges;
567static u32 aperture_order;
568static u32 aperture_alloc;
569
570void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
571{
572 fix_up_north_bridges = true;
573 aperture_order = aper_order;
574 aperture_alloc = aper_alloc;
575}
576
552static int gart_resume(struct sys_device *dev) 577static int gart_resume(struct sys_device *dev)
553{ 578{
579 printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n");
580
581 if (fix_up_north_bridges) {
582 int i;
583
584 printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n");
585
586 for (i = 0; i < num_k8_northbridges; i++) {
587 struct pci_dev *dev = k8_northbridges[i];
588
589 /*
590 * Don't enable translations just yet. That is the next
591 * step. Restore the pre-suspend aperture settings.
592 */
593 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL,
594 aperture_order << 1);
595 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE,
596 aperture_alloc >> 25);
597 }
598 }
599
600 enable_gart_translations();
601
554 return 0; 602 return 0;
555} 603}
556 604
557static int gart_suspend(struct sys_device *dev, pm_message_t state) 605static int gart_suspend(struct sys_device *dev, pm_message_t state)
558{ 606{
559 return -EINVAL; 607 return 0;
560} 608}
561 609
562static struct sysdev_class gart_sysdev_class = { 610static struct sysdev_class gart_sysdev_class = {
@@ -614,27 +662,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
614 memset(gatt, 0, gatt_size); 662 memset(gatt, 0, gatt_size);
615 agp_gatt_table = gatt; 663 agp_gatt_table = gatt;
616 664
617 for (i = 0; i < num_k8_northbridges; i++) { 665 enable_gart_translations();
618 u32 gatt_reg;
619 u32 ctl;
620
621 dev = k8_northbridges[i];
622 gatt_reg = __pa(gatt) >> 12;
623 gatt_reg <<= 4;
624 pci_write_config_dword(dev, 0x98, gatt_reg);
625 pci_read_config_dword(dev, 0x90, &ctl);
626
627 ctl |= 1;
628 ctl &= ~((1<<4) | (1<<5));
629
630 pci_write_config_dword(dev, 0x90, ctl);
631 }
632 666
633 error = sysdev_class_register(&gart_sysdev_class); 667 error = sysdev_class_register(&gart_sysdev_class);
634 if (!error) 668 if (!error)
635 error = sysdev_register(&device_gart); 669 error = sysdev_register(&device_gart);
636 if (error) 670 if (error)
637 panic("Could not register gart_sysdev -- would corrupt data on next suspend"); 671 panic("Could not register gart_sysdev -- would corrupt data on next suspend");
672
638 flush_gart(); 673 flush_gart();
639 674
640 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", 675 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
@@ -677,11 +712,11 @@ void gart_iommu_shutdown(void)
677 u32 ctl; 712 u32 ctl;
678 713
679 dev = k8_northbridges[i]; 714 dev = k8_northbridges[i];
680 pci_read_config_dword(dev, 0x90, &ctl); 715 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
681 716
682 ctl &= ~1; 717 ctl &= ~GARTEN;
683 718
684 pci_write_config_dword(dev, 0x90, ctl); 719 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
685 } 720 }
686} 721}
687 722
@@ -788,10 +823,10 @@ void __init gart_iommu_init(void)
788 wbinvd(); 823 wbinvd();
789 824
790 /* 825 /*
791 * Try to workaround a bug (thanks to BenH) 826 * Try to workaround a bug (thanks to BenH):
792 * Set unmapped entries to a scratch page instead of 0. 827 * Set unmapped entries to a scratch page instead of 0.
793 * Any prefetches that hit unmapped entries won't get an bus abort 828 * Any prefetches that hit unmapped entries won't get an bus abort
794 * then. 829 * then. (P2P bridge may be prefetching on DMA reads).
795 */ 830 */
796 scratch = get_zeroed_page(GFP_KERNEL); 831 scratch = get_zeroed_page(GFP_KERNEL);
797 if (!scratch) 832 if (!scratch)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ba370dc8685b..4061d63aabe7 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -6,6 +6,7 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/pm.h> 8#include <linux/pm.h>
9#include <linux/clockchips.h>
9 10
10struct kmem_cache *task_xstate_cachep; 11struct kmem_cache *task_xstate_cachep;
11 12
@@ -45,6 +46,76 @@ void arch_task_cache_init(void)
45 SLAB_PANIC, NULL); 46 SLAB_PANIC, NULL);
46} 47}
47 48
49/*
50 * Idle related variables and functions
51 */
52unsigned long boot_option_idle_override = 0;
53EXPORT_SYMBOL(boot_option_idle_override);
54
55/*
56 * Powermanagement idle function, if any..
57 */
58void (*pm_idle)(void);
59EXPORT_SYMBOL(pm_idle);
60
61#ifdef CONFIG_X86_32
62/*
63 * This halt magic was a workaround for ancient floppy DMA
64 * wreckage. It should be safe to remove.
65 */
66static int hlt_counter;
67void disable_hlt(void)
68{
69 hlt_counter++;
70}
71EXPORT_SYMBOL(disable_hlt);
72
73void enable_hlt(void)
74{
75 hlt_counter--;
76}
77EXPORT_SYMBOL(enable_hlt);
78
79static inline int hlt_use_halt(void)
80{
81 return (!hlt_counter && boot_cpu_data.hlt_works_ok);
82}
83#else
84static inline int hlt_use_halt(void)
85{
86 return 1;
87}
88#endif
89
90/*
91 * We use this if we don't have any better
92 * idle routine..
93 */
94void default_idle(void)
95{
96 if (hlt_use_halt()) {
97 current_thread_info()->status &= ~TS_POLLING;
98 /*
99 * TS_POLLING-cleared state must be visible before we
100 * test NEED_RESCHED:
101 */
102 smp_mb();
103
104 if (!need_resched())
105 safe_halt(); /* enables interrupts racelessly */
106 else
107 local_irq_enable();
108 current_thread_info()->status |= TS_POLLING;
109 } else {
110 local_irq_enable();
111 /* loop is done by the caller */
112 cpu_relax();
113 }
114}
115#ifdef CONFIG_APM_MODULE
116EXPORT_SYMBOL(default_idle);
117#endif
118
48static void do_nothing(void *unused) 119static void do_nothing(void *unused)
49{ 120{
50} 121}
@@ -122,44 +193,129 @@ static void poll_idle(void)
122 * 193 *
123 * idle=mwait overrides this decision and forces the usage of mwait. 194 * idle=mwait overrides this decision and forces the usage of mwait.
124 */ 195 */
196
197#define MWAIT_INFO 0x05
198#define MWAIT_ECX_EXTENDED_INFO 0x01
199#define MWAIT_EDX_C1 0xf0
200
125static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) 201static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
126{ 202{
203 u32 eax, ebx, ecx, edx;
204
127 if (force_mwait) 205 if (force_mwait)
128 return 1; 206 return 1;
129 207
130 if (c->x86_vendor == X86_VENDOR_AMD) { 208 if (c->cpuid_level < MWAIT_INFO)
131 switch(c->x86) { 209 return 0;
132 case 0x10: 210
133 case 0x11: 211 cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
134 return 0; 212 /* Check, whether EDX has extended info about MWAIT */
135 } 213 if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
136 } 214 return 1;
215
216 /*
217 * edx enumeratios MONITOR/MWAIT extensions. Check, whether
218 * C1 supports MWAIT
219 */
220 return (edx & MWAIT_EDX_C1);
221}
222
223/*
224 * Check for AMD CPUs, which have potentially C1E support
225 */
226static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
227{
228 if (c->x86_vendor != X86_VENDOR_AMD)
229 return 0;
230
231 if (c->x86 < 0x0F)
232 return 0;
233
234 /* Family 0x0f models < rev F do not have C1E */
235 if (c->x86 == 0x0f && c->x86_model < 0x40)
236 return 0;
237
137 return 1; 238 return 1;
138} 239}
139 240
140void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 241/*
242 * C1E aware idle routine. We check for C1E active in the interrupt
243 * pending message MSR. If we detect C1E, then we handle it the same
244 * way as C3 power states (local apic timer and TSC stop)
245 */
246static void c1e_idle(void)
141{ 247{
142 static int selected; 248 static cpumask_t c1e_mask = CPU_MASK_NONE;
249 static int c1e_detected;
143 250
144 if (selected) 251 if (need_resched())
145 return; 252 return;
253
254 if (!c1e_detected) {
255 u32 lo, hi;
256
257 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
258 if (lo & K8_INTP_C1E_ACTIVE_MASK) {
259 c1e_detected = 1;
260 mark_tsc_unstable("TSC halt in C1E");
261 printk(KERN_INFO "System has C1E enabled\n");
262 }
263 }
264
265 if (c1e_detected) {
266 int cpu = smp_processor_id();
267
268 if (!cpu_isset(cpu, c1e_mask)) {
269 cpu_set(cpu, c1e_mask);
270 /*
271 * Force broadcast so ACPI can not interfere. Needs
272 * to run with interrupts enabled as it uses
273 * smp_function_call.
274 */
275 local_irq_enable();
276 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
277 &cpu);
278 printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
279 cpu);
280 local_irq_disable();
281 }
282 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
283
284 default_idle();
285
286 /*
287 * The switch back from broadcast mode needs to be
288 * called with interrupts disabled.
289 */
290 local_irq_disable();
291 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
292 local_irq_enable();
293 } else
294 default_idle();
295}
296
297void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
298{
146#ifdef CONFIG_X86_SMP 299#ifdef CONFIG_X86_SMP
147 if (pm_idle == poll_idle && smp_num_siblings > 1) { 300 if (pm_idle == poll_idle && smp_num_siblings > 1) {
148 printk(KERN_WARNING "WARNING: polling idle and HT enabled," 301 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
149 " performance may degrade.\n"); 302 " performance may degrade.\n");
150 } 303 }
151#endif 304#endif
305 if (pm_idle)
306 return;
307
152 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { 308 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
153 /* 309 /*
154 * Skip, if setup has overridden idle.
155 * One CPU supports mwait => All CPUs supports mwait 310 * One CPU supports mwait => All CPUs supports mwait
156 */ 311 */
157 if (!pm_idle) { 312 printk(KERN_INFO "using mwait in idle threads.\n");
158 printk(KERN_INFO "using mwait in idle threads.\n"); 313 pm_idle = mwait_idle;
159 pm_idle = mwait_idle; 314 } else if (check_c1e_idle(c)) {
160 } 315 printk(KERN_INFO "using C1E aware idle routine\n");
161 } 316 pm_idle = c1e_idle;
162 selected = 1; 317 } else
318 pm_idle = default_idle;
163} 319}
164 320
165static int __init idle_setup(char *str) 321static int __init idle_setup(char *str)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 6d5483356e74..c2a11d77b1b5 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -58,11 +58,6 @@
58 58
59asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 59asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
60 60
61static int hlt_counter;
62
63unsigned long boot_option_idle_override = 0;
64EXPORT_SYMBOL(boot_option_idle_override);
65
66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 61DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
67EXPORT_PER_CPU_SYMBOL(current_task); 62EXPORT_PER_CPU_SYMBOL(current_task);
68 63
@@ -77,55 +72,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
77 return ((unsigned long *)tsk->thread.sp)[3]; 72 return ((unsigned long *)tsk->thread.sp)[3];
78} 73}
79 74
80/*
81 * Powermanagement idle function, if any..
82 */
83void (*pm_idle)(void);
84EXPORT_SYMBOL(pm_idle);
85
86void disable_hlt(void)
87{
88 hlt_counter++;
89}
90
91EXPORT_SYMBOL(disable_hlt);
92
93void enable_hlt(void)
94{
95 hlt_counter--;
96}
97
98EXPORT_SYMBOL(enable_hlt);
99
100/*
101 * We use this if we don't have any better
102 * idle routine..
103 */
104void default_idle(void)
105{
106 if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
107 current_thread_info()->status &= ~TS_POLLING;
108 /*
109 * TS_POLLING-cleared state must be visible before we
110 * test NEED_RESCHED:
111 */
112 smp_mb();
113
114 if (!need_resched())
115 safe_halt(); /* enables interrupts racelessly */
116 else
117 local_irq_enable();
118 current_thread_info()->status |= TS_POLLING;
119 } else {
120 local_irq_enable();
121 /* loop is done by the caller */
122 cpu_relax();
123 }
124}
125#ifdef CONFIG_APM_MODULE
126EXPORT_SYMBOL(default_idle);
127#endif
128
129#ifdef CONFIG_HOTPLUG_CPU 75#ifdef CONFIG_HOTPLUG_CPU
130#include <asm/nmi.h> 76#include <asm/nmi.h>
131/* We don't actually take CPU down, just spin without interrupts. */ 77/* We don't actually take CPU down, just spin without interrupts. */
@@ -168,24 +114,19 @@ void cpu_idle(void)
168 while (1) { 114 while (1) {
169 tick_nohz_stop_sched_tick(); 115 tick_nohz_stop_sched_tick();
170 while (!need_resched()) { 116 while (!need_resched()) {
171 void (*idle)(void);
172 117
173 check_pgt_cache(); 118 check_pgt_cache();
174 rmb(); 119 rmb();
175 idle = pm_idle;
176 120
177 if (rcu_pending(cpu)) 121 if (rcu_pending(cpu))
178 rcu_check_callbacks(cpu, 0); 122 rcu_check_callbacks(cpu, 0);
179 123
180 if (!idle)
181 idle = default_idle;
182
183 if (cpu_is_offline(cpu)) 124 if (cpu_is_offline(cpu))
184 play_dead(); 125 play_dead();
185 126
186 local_irq_disable(); 127 local_irq_disable();
187 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 128 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
188 idle(); 129 pm_idle();
189 } 130 }
190 tick_nohz_restart_sched_tick(); 131 tick_nohz_restart_sched_tick();
191 preempt_enable_no_resched(); 132 preempt_enable_no_resched();
@@ -333,6 +274,7 @@ void flush_thread(void)
333 /* 274 /*
334 * Forget coprocessor state.. 275 * Forget coprocessor state..
335 */ 276 */
277 tsk->fpu_counter = 0;
336 clear_fpu(tsk); 278 clear_fpu(tsk);
337 clear_used_math(); 279 clear_used_math();
338} 280}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ac54ff56df80..290183e9731a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -56,15 +56,6 @@ asmlinkage extern void ret_from_fork(void);
56 56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58 58
59unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
66EXPORT_SYMBOL(pm_idle);
67
68static ATOMIC_NOTIFIER_HEAD(idle_notifier); 59static ATOMIC_NOTIFIER_HEAD(idle_notifier);
69 60
70void idle_notifier_register(struct notifier_block *n) 61void idle_notifier_register(struct notifier_block *n)
@@ -94,25 +85,6 @@ void exit_idle(void)
94 __exit_idle(); 85 __exit_idle();
95} 86}
96 87
97/*
98 * We use this if we don't have any better
99 * idle routine..
100 */
101void default_idle(void)
102{
103 current_thread_info()->status &= ~TS_POLLING;
104 /*
105 * TS_POLLING-cleared state must be visible before we
106 * test NEED_RESCHED:
107 */
108 smp_mb();
109 if (!need_resched())
110 safe_halt(); /* enables interrupts racelessly */
111 else
112 local_irq_enable();
113 current_thread_info()->status |= TS_POLLING;
114}
115
116#ifdef CONFIG_HOTPLUG_CPU 88#ifdef CONFIG_HOTPLUG_CPU
117DECLARE_PER_CPU(int, cpu_state); 89DECLARE_PER_CPU(int, cpu_state);
118 90
@@ -150,12 +122,9 @@ void cpu_idle(void)
150 while (1) { 122 while (1) {
151 tick_nohz_stop_sched_tick(); 123 tick_nohz_stop_sched_tick();
152 while (!need_resched()) { 124 while (!need_resched()) {
153 void (*idle)(void);
154 125
155 rmb(); 126 rmb();
156 idle = pm_idle; 127
157 if (!idle)
158 idle = default_idle;
159 if (cpu_is_offline(smp_processor_id())) 128 if (cpu_is_offline(smp_processor_id()))
160 play_dead(); 129 play_dead();
161 /* 130 /*
@@ -165,7 +134,7 @@ void cpu_idle(void)
165 */ 134 */
166 local_irq_disable(); 135 local_irq_disable();
167 enter_idle(); 136 enter_idle();
168 idle(); 137 pm_idle();
169 /* In many cases the interrupt that ended idle 138 /* In many cases the interrupt that ended idle
170 has already called exit_idle. But some idle 139 has already called exit_idle. But some idle
171 loops can be woken up without interrupt. */ 140 loops can be woken up without interrupt. */
@@ -294,6 +263,7 @@ void flush_thread(void)
294 /* 263 /*
295 * Forget coprocessor state.. 264 * Forget coprocessor state..
296 */ 265 */
266 tsk->fpu_counter = 0;
297 clear_fpu(tsk); 267 clear_fpu(tsk);
298 clear_used_math(); 268 clear_used_math();
299} 269}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000000000000..05fbe9a0325a
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,141 @@
1/* paravirtual clock -- common code used by kvm/xen
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16*/
17
18#include <linux/kernel.h>
19#include <linux/percpu.h>
20#include <asm/pvclock.h>
21
22/*
23 * These are perodically updated
24 * xen: magic shared_info page
25 * kvm: gpa registered via msr
26 * and then copied here.
27 */
28struct pvclock_shadow_time {
29 u64 tsc_timestamp; /* TSC at last update of time vals. */
30 u64 system_timestamp; /* Time, in nanosecs, since boot. */
31 u32 tsc_to_nsec_mul;
32 int tsc_shift;
33 u32 version;
34};
35
36/*
37 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
38 * yielding a 64-bit result.
39 */
40static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
41{
42 u64 product;
43#ifdef __i386__
44 u32 tmp1, tmp2;
45#endif
46
47 if (shift < 0)
48 delta >>= -shift;
49 else
50 delta <<= shift;
51
52#ifdef __i386__
53 __asm__ (
54 "mul %5 ; "
55 "mov %4,%%eax ; "
56 "mov %%edx,%4 ; "
57 "mul %5 ; "
58 "xor %5,%5 ; "
59 "add %4,%%eax ; "
60 "adc %5,%%edx ; "
61 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
62 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
63#elif __x86_64__
64 __asm__ (
65 "mul %%rdx ; shrd $32,%%rdx,%%rax"
66 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
67#else
68#error implement me!
69#endif
70
71 return product;
72}
73
74static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
75{
76 u64 delta = native_read_tsc() - shadow->tsc_timestamp;
77 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
78}
79
80/*
81 * Reads a consistent set of time-base values from hypervisor,
82 * into a shadow data area.
83 */
84static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
85 struct pvclock_vcpu_time_info *src)
86{
87 do {
88 dst->version = src->version;
89 rmb(); /* fetch version before data */
90 dst->tsc_timestamp = src->tsc_timestamp;
91 dst->system_timestamp = src->system_time;
92 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
93 dst->tsc_shift = src->tsc_shift;
94 rmb(); /* test version after fetching data */
95 } while ((src->version & 1) || (dst->version != src->version));
96
97 return dst->version;
98}
99
100cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
101{
102 struct pvclock_shadow_time shadow;
103 unsigned version;
104 cycle_t ret, offset;
105
106 do {
107 version = pvclock_get_time_values(&shadow, src);
108 barrier();
109 offset = pvclock_get_nsec_offset(&shadow);
110 ret = shadow.system_timestamp + offset;
111 barrier();
112 } while (version != src->version);
113
114 return ret;
115}
116
117void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
118 struct pvclock_vcpu_time_info *vcpu_time,
119 struct timespec *ts)
120{
121 u32 version;
122 u64 delta;
123 struct timespec now;
124
125 /* get wallclock at system boot */
126 do {
127 version = wall_clock->version;
128 rmb(); /* fetch version before time */
129 now.tv_sec = wall_clock->sec;
130 now.tv_nsec = wall_clock->nsec;
131 rmb(); /* fetch time before checking version */
132 } while ((wall_clock->version & 1) || (version != wall_clock->version));
133
134 delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
135 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
136
137 now.tv_nsec = do_div(delta, NSEC_PER_SEC);
138 now.tv_sec = delta;
139
140 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
141}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index d89a648fe710..79bdcd11c66e 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -65,6 +65,7 @@ static enum {
65 ICH_FORCE_HPET_RESUME, 65 ICH_FORCE_HPET_RESUME,
66 VT8237_FORCE_HPET_RESUME, 66 VT8237_FORCE_HPET_RESUME,
67 NVIDIA_FORCE_HPET_RESUME, 67 NVIDIA_FORCE_HPET_RESUME,
68 ATI_FORCE_HPET_RESUME,
68} force_hpet_resume_type; 69} force_hpet_resume_type;
69 70
70static void __iomem *rcba_base; 71static void __iomem *rcba_base;
@@ -158,6 +159,8 @@ static void ich_force_enable_hpet(struct pci_dev *dev)
158 159
159DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, 160DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0,
160 ich_force_enable_hpet); 161 ich_force_enable_hpet);
162DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0,
163 ich_force_enable_hpet);
161DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, 164DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1,
162 ich_force_enable_hpet); 165 ich_force_enable_hpet);
163DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, 166DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0,
@@ -174,6 +177,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
174 177
175static struct pci_dev *cached_dev; 178static struct pci_dev *cached_dev;
176 179
180static void hpet_print_force_info(void)
181{
182 printk(KERN_INFO "HPET not enabled in BIOS. "
183 "You might try hpet=force boot option\n");
184}
185
177static void old_ich_force_hpet_resume(void) 186static void old_ich_force_hpet_resume(void)
178{ 187{
179 u32 val; 188 u32 val;
@@ -253,6 +262,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
253{ 262{
254 if (hpet_force_user) 263 if (hpet_force_user)
255 old_ich_force_enable_hpet(dev); 264 old_ich_force_enable_hpet(dev);
265 else
266 hpet_print_force_info();
256} 267}
257 268
258DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, 269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
@@ -290,8 +301,13 @@ static void vt8237_force_enable_hpet(struct pci_dev *dev)
290{ 301{
291 u32 uninitialized_var(val); 302 u32 uninitialized_var(val);
292 303
293 if (!hpet_force_user || hpet_address || force_hpet_address) 304 if (hpet_address || force_hpet_address)
305 return;
306
307 if (!hpet_force_user) {
308 hpet_print_force_info();
294 return; 309 return;
310 }
295 311
296 pci_read_config_dword(dev, 0x68, &val); 312 pci_read_config_dword(dev, 0x68, &val);
297 /* 313 /*
@@ -330,6 +346,36 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235,
330DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, 346DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
331 vt8237_force_enable_hpet); 347 vt8237_force_enable_hpet);
332 348
349static void ati_force_hpet_resume(void)
350{
351 pci_write_config_dword(cached_dev, 0x14, 0xfed00000);
352 printk(KERN_DEBUG "Force enabled HPET at resume\n");
353}
354
355static void ati_force_enable_hpet(struct pci_dev *dev)
356{
357 u32 uninitialized_var(val);
358
359 if (hpet_address || force_hpet_address)
360 return;
361
362 if (!hpet_force_user) {
363 hpet_print_force_info();
364 return;
365 }
366
367 pci_write_config_dword(dev, 0x14, 0xfed00000);
368 pci_read_config_dword(dev, 0x14, &val);
369 force_hpet_address = val;
370 force_hpet_resume_type = ATI_FORCE_HPET_RESUME;
371 dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n",
372 force_hpet_address);
373 cached_dev = dev;
374 return;
375}
376DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS,
377 ati_force_enable_hpet);
378
333/* 379/*
334 * Undocumented chipset feature taken from LinuxBIOS. 380 * Undocumented chipset feature taken from LinuxBIOS.
335 */ 381 */
@@ -343,8 +389,13 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev)
343{ 389{
344 u32 uninitialized_var(val); 390 u32 uninitialized_var(val);
345 391
346 if (!hpet_force_user || hpet_address || force_hpet_address) 392 if (hpet_address || force_hpet_address)
393 return;
394
395 if (!hpet_force_user) {
396 hpet_print_force_info();
347 return; 397 return;
398 }
348 399
349 pci_write_config_dword(dev, 0x44, 0xfed00001); 400 pci_write_config_dword(dev, 0x44, 0xfed00001);
350 pci_read_config_dword(dev, 0x44, &val); 401 pci_read_config_dword(dev, 0x44, &val);
@@ -397,6 +448,9 @@ void force_hpet_resume(void)
397 case NVIDIA_FORCE_HPET_RESUME: 448 case NVIDIA_FORCE_HPET_RESUME:
398 nvidia_force_hpet_resume(); 449 nvidia_force_hpet_resume();
399 return; 450 return;
451 case ATI_FORCE_HPET_RESUME:
452 ati_force_hpet_resume();
453 return;
400 default: 454 default:
401 break; 455 break;
402 } 456 }
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f6be7d5f82f8..f8a62160e151 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -27,7 +27,7 @@
27void (*pm_power_off)(void); 27void (*pm_power_off)(void);
28EXPORT_SYMBOL(pm_power_off); 28EXPORT_SYMBOL(pm_power_off);
29 29
30static long no_idt[3]; 30static const struct desc_ptr no_idt = {};
31static int reboot_mode; 31static int reboot_mode;
32enum reboot_type reboot_type = BOOT_KBD; 32enum reboot_type reboot_type = BOOT_KBD;
33int reboot_force; 33int reboot_force;
@@ -201,15 +201,15 @@ core_initcall(reboot_init);
201 controller to pulse the CPU reset line, which is more thorough, but 201 controller to pulse the CPU reset line, which is more thorough, but
202 doesn't work with at least one type of 486 motherboard. It is easy 202 doesn't work with at least one type of 486 motherboard. It is easy
203 to stop this code working; hence the copious comments. */ 203 to stop this code working; hence the copious comments. */
204static unsigned long long 204static const unsigned long long
205real_mode_gdt_entries [3] = 205real_mode_gdt_entries [3] =
206{ 206{
207 0x0000000000000000ULL, /* Null descriptor */ 207 0x0000000000000000ULL, /* Null descriptor */
208 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ 208 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */
209 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ 209 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
210}; 210};
211 211
212static struct desc_ptr 212static const struct desc_ptr
213real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, 213real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries },
214real_mode_idt = { 0x3ff, 0 }; 214real_mode_idt = { 0x3ff, 0 };
215 215
@@ -231,7 +231,7 @@ real_mode_idt = { 0x3ff, 0 };
231 231
232 More could be done here to set up the registers as if a CPU reset had 232 More could be done here to set up the registers as if a CPU reset had
233 occurred; hopefully real BIOSs don't assume much. */ 233 occurred; hopefully real BIOSs don't assume much. */
234static unsigned char real_mode_switch [] = 234static const unsigned char real_mode_switch [] =
235{ 235{
236 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ 236 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */
237 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ 237 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */
@@ -245,7 +245,7 @@ static unsigned char real_mode_switch [] =
245 0x24, 0x10, /* f: andb $0x10,al */ 245 0x24, 0x10, /* f: andb $0x10,al */
246 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ 246 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */
247}; 247};
248static unsigned char jump_to_bios [] = 248static const unsigned char jump_to_bios [] =
249{ 249{
250 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ 250 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */
251}; 251};
@@ -255,7 +255,7 @@ static unsigned char jump_to_bios [] =
255 * specified by the code and length parameters. 255 * specified by the code and length parameters.
256 * We assume that length will aways be less that 100! 256 * We assume that length will aways be less that 100!
257 */ 257 */
258void machine_real_restart(unsigned char *code, int length) 258void machine_real_restart(const unsigned char *code, int length)
259{ 259{
260 local_irq_disable(); 260 local_irq_disable();
261 261
@@ -368,7 +368,7 @@ static void native_machine_emergency_restart(void)
368 } 368 }
369 369
370 case BOOT_TRIPLE: 370 case BOOT_TRIPLE:
371 load_idt((const struct desc_ptr *)&no_idt); 371 load_idt(&no_idt);
372 __asm__ __volatile__("int3"); 372 __asm__ __volatile__("int3");
373 373
374 reboot_type = BOOT_KBD; 374 reboot_type = BOOT_KBD;
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index dec0b5ec25c2..61a837743fe5 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -49,7 +49,7 @@ struct device_fixup {
49 void (*reboot_fixup)(struct pci_dev *); 49 void (*reboot_fixup)(struct pci_dev *);
50}; 50};
51 51
52static struct device_fixup fixups_table[] = { 52static const struct device_fixup fixups_table[] = {
53{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, 53{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
54{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, 54{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
55{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, 55{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
@@ -64,7 +64,7 @@ static struct device_fixup fixups_table[] = {
64 */ 64 */
65void mach_reboot_fixups(void) 65void mach_reboot_fixups(void)
66{ 66{
67 struct device_fixup *cur; 67 const struct device_fixup *cur;
68 struct pci_dev *dev; 68 struct pci_dev *dev;
69 int i; 69 int i;
70 70
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 2c5f8b213e86..5a2f8e063887 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -532,10 +532,16 @@ static void __init reserve_crashkernel(void)
532 (unsigned long)(crash_size >> 20), 532 (unsigned long)(crash_size >> 20),
533 (unsigned long)(crash_base >> 20), 533 (unsigned long)(crash_base >> 20),
534 (unsigned long)(total_mem >> 20)); 534 (unsigned long)(total_mem >> 20));
535
536 if (reserve_bootmem(crash_base, crash_size,
537 BOOTMEM_EXCLUSIVE) < 0) {
538 printk(KERN_INFO "crashkernel reservation "
539 "failed - memory is in use\n");
540 return;
541 }
542
535 crashk_res.start = crash_base; 543 crashk_res.start = crash_base;
536 crashk_res.end = crash_base + crash_size - 1; 544 crashk_res.end = crash_base + crash_size - 1;
537 reserve_bootmem(crash_base, crash_size,
538 BOOTMEM_DEFAULT);
539 } else 545 } else
540 printk(KERN_INFO "crashkernel reservation failed - " 546 printk(KERN_INFO "crashkernel reservation failed - "
541 "you have to specify a base address\n"); 547 "you have to specify a base address\n");
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6dff1286ad8a..545440e471b2 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -71,6 +71,7 @@
71#include <asm/topology.h> 71#include <asm/topology.h>
72#include <asm/trampoline.h> 72#include <asm/trampoline.h>
73#include <asm/pat.h> 73#include <asm/pat.h>
74#include <asm/mmconfig.h>
74 75
75#include <mach_apic.h> 76#include <mach_apic.h>
76#ifdef CONFIG_PARAVIRT 77#ifdef CONFIG_PARAVIRT
@@ -79,6 +80,8 @@
79#define ARCH_SETUP 80#define ARCH_SETUP
80#endif 81#endif
81 82
83#include "cpu/cpu.h"
84
82/* 85/*
83 * Machine setup.. 86 * Machine setup..
84 */ 87 */
@@ -95,8 +98,6 @@ int bootloader_type;
95 98
96unsigned long saved_video_mode; 99unsigned long saved_video_mode;
97 100
98int force_mwait __cpuinitdata;
99
100/* 101/*
101 * Early DMI memory 102 * Early DMI memory
102 */ 103 */
@@ -118,7 +119,7 @@ EXPORT_SYMBOL_GPL(edid_info);
118 119
119extern int root_mountflags; 120extern int root_mountflags;
120 121
121char __initdata command_line[COMMAND_LINE_SIZE]; 122static char __initdata command_line[COMMAND_LINE_SIZE];
122 123
123static struct resource standard_io_resources[] = { 124static struct resource standard_io_resources[] = {
124 { .name = "dma1", .start = 0x00, .end = 0x1f, 125 { .name = "dma1", .start = 0x00, .end = 0x1f,
@@ -164,6 +165,7 @@ static struct resource bss_resource = {
164 .flags = IORESOURCE_RAM, 165 .flags = IORESOURCE_RAM,
165}; 166};
166 167
168static void __init early_cpu_init(void);
167static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); 169static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
168 170
169#ifdef CONFIG_PROC_VMCORE 171#ifdef CONFIG_PROC_VMCORE
@@ -293,18 +295,6 @@ static void __init parse_setup_data(void)
293 } 295 }
294} 296}
295 297
296#ifdef CONFIG_PCI_MMCONFIG
297extern void __cpuinit fam10h_check_enable_mmcfg(void);
298extern void __init check_enable_amd_mmconf_dmi(void);
299#else
300void __cpuinit fam10h_check_enable_mmcfg(void)
301{
302}
303void __init check_enable_amd_mmconf_dmi(void)
304{
305}
306#endif
307
308/* 298/*
309 * setup_arch - architecture-specific boot-time initializations 299 * setup_arch - architecture-specific boot-time initializations
310 * 300 *
@@ -352,6 +342,7 @@ void __init setup_arch(char **cmdline_p)
352 bss_resource.start = virt_to_phys(&__bss_start); 342 bss_resource.start = virt_to_phys(&__bss_start);
353 bss_resource.end = virt_to_phys(&__bss_stop)-1; 343 bss_resource.end = virt_to_phys(&__bss_stop)-1;
354 344
345 early_cpu_init();
355 early_identify_cpu(&boot_cpu_data); 346 early_identify_cpu(&boot_cpu_data);
356 347
357 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); 348 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
@@ -537,7 +528,20 @@ void __init setup_arch(char **cmdline_p)
537 check_enable_amd_mmconf_dmi(); 528 check_enable_amd_mmconf_dmi();
538} 529}
539 530
540static int __cpuinit get_model_name(struct cpuinfo_x86 *c) 531struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
532
533static void __cpuinit default_init(struct cpuinfo_x86 *c)
534{
535 display_cacheinfo(c);
536}
537
538static struct cpu_dev __cpuinitdata default_cpu = {
539 .c_init = default_init,
540 .c_vendor = "Unknown",
541};
542static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
543
544int __cpuinit get_model_name(struct cpuinfo_x86 *c)
541{ 545{
542 unsigned int *v; 546 unsigned int *v;
543 547
@@ -553,7 +557,7 @@ static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
553} 557}
554 558
555 559
556static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) 560void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
557{ 561{
558 unsigned int n, dummy, eax, ebx, ecx, edx; 562 unsigned int n, dummy, eax, ebx, ecx, edx;
559 563
@@ -585,228 +589,6 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
585 } 589 }
586} 590}
587 591
588#ifdef CONFIG_NUMA
589static int __cpuinit nearby_node(int apicid)
590{
591 int i, node;
592
593 for (i = apicid - 1; i >= 0; i--) {
594 node = apicid_to_node[i];
595 if (node != NUMA_NO_NODE && node_online(node))
596 return node;
597 }
598 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
599 node = apicid_to_node[i];
600 if (node != NUMA_NO_NODE && node_online(node))
601 return node;
602 }
603 return first_node(node_online_map); /* Shouldn't happen */
604}
605#endif
606
607/*
608 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
609 * Assumes number of cores is a power of two.
610 */
611static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
612{
613#ifdef CONFIG_SMP
614 unsigned bits;
615#ifdef CONFIG_NUMA
616 int cpu = smp_processor_id();
617 int node = 0;
618 unsigned apicid = hard_smp_processor_id();
619#endif
620 bits = c->x86_coreid_bits;
621
622 /* Low order bits define the core id (index of core in socket) */
623 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
624 /* Convert the initial APIC ID into the socket ID */
625 c->phys_proc_id = c->initial_apicid >> bits;
626
627#ifdef CONFIG_NUMA
628 node = c->phys_proc_id;
629 if (apicid_to_node[apicid] != NUMA_NO_NODE)
630 node = apicid_to_node[apicid];
631 if (!node_online(node)) {
632 /* Two possibilities here:
633 - The CPU is missing memory and no node was created.
634 In that case try picking one from a nearby CPU
635 - The APIC IDs differ from the HyperTransport node IDs
636 which the K8 northbridge parsing fills in.
637 Assume they are all increased by a constant offset,
638 but in the same order as the HT nodeids.
639 If that doesn't result in a usable node fall back to the
640 path for the previous case. */
641
642 int ht_nodeid = c->initial_apicid;
643
644 if (ht_nodeid >= 0 &&
645 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
646 node = apicid_to_node[ht_nodeid];
647 /* Pick a nearby node */
648 if (!node_online(node))
649 node = nearby_node(apicid);
650 }
651 numa_set_node(cpu, node);
652
653 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
654#endif
655#endif
656}
657
658static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
659{
660#ifdef CONFIG_SMP
661 unsigned bits, ecx;
662
663 /* Multi core CPU? */
664 if (c->extended_cpuid_level < 0x80000008)
665 return;
666
667 ecx = cpuid_ecx(0x80000008);
668
669 c->x86_max_cores = (ecx & 0xff) + 1;
670
671 /* CPU telling us the core id bits shift? */
672 bits = (ecx >> 12) & 0xF;
673
674 /* Otherwise recompute */
675 if (bits == 0) {
676 while ((1 << bits) < c->x86_max_cores)
677 bits++;
678 }
679
680 c->x86_coreid_bits = bits;
681
682#endif
683}
684
685#define ENABLE_C1E_MASK 0x18000000
686#define CPUID_PROCESSOR_SIGNATURE 1
687#define CPUID_XFAM 0x0ff00000
688#define CPUID_XFAM_K8 0x00000000
689#define CPUID_XFAM_10H 0x00100000
690#define CPUID_XFAM_11H 0x00200000
691#define CPUID_XMOD 0x000f0000
692#define CPUID_XMOD_REV_F 0x00040000
693
694/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
695static __cpuinit int amd_apic_timer_broken(void)
696{
697 u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
698
699 switch (eax & CPUID_XFAM) {
700 case CPUID_XFAM_K8:
701 if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
702 break;
703 case CPUID_XFAM_10H:
704 case CPUID_XFAM_11H:
705 rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
706 if (lo & ENABLE_C1E_MASK)
707 return 1;
708 break;
709 default:
710 /* err on the side of caution */
711 return 1;
712 }
713 return 0;
714}
715
716static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
717{
718 early_init_amd_mc(c);
719
720 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
721 if (c->x86_power & (1<<8))
722 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
723}
724
725static void __cpuinit init_amd(struct cpuinfo_x86 *c)
726{
727 unsigned level;
728
729#ifdef CONFIG_SMP
730 unsigned long value;
731
732 /*
733 * Disable TLB flush filter by setting HWCR.FFDIS on K8
734 * bit 6 of msr C001_0015
735 *
736 * Errata 63 for SH-B3 steppings
737 * Errata 122 for all steppings (F+ have it disabled by default)
738 */
739 if (c->x86 == 15) {
740 rdmsrl(MSR_K8_HWCR, value);
741 value |= 1 << 6;
742 wrmsrl(MSR_K8_HWCR, value);
743 }
744#endif
745
746 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
747 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
748 clear_cpu_cap(c, 0*32+31);
749
750 /* On C+ stepping K8 rep microcode works well for copy/memset */
751 level = cpuid_eax(1);
752 if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
753 level >= 0x0f58))
754 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
755 if (c->x86 == 0x10 || c->x86 == 0x11)
756 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
757
758 /* Enable workaround for FXSAVE leak */
759 if (c->x86 >= 6)
760 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
761
762 level = get_model_name(c);
763 if (!level) {
764 switch (c->x86) {
765 case 15:
766 /* Should distinguish Models here, but this is only
767 a fallback anyways. */
768 strcpy(c->x86_model_id, "Hammer");
769 break;
770 }
771 }
772 display_cacheinfo(c);
773
774 /* Multi core CPU? */
775 if (c->extended_cpuid_level >= 0x80000008)
776 amd_detect_cmp(c);
777
778 if (c->extended_cpuid_level >= 0x80000006 &&
779 (cpuid_edx(0x80000006) & 0xf000))
780 num_cache_leaves = 4;
781 else
782 num_cache_leaves = 3;
783
784 if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
785 set_cpu_cap(c, X86_FEATURE_K8);
786
787 /* MFENCE stops RDTSC speculation */
788 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
789
790 if (c->x86 == 0x10)
791 fam10h_check_enable_mmcfg();
792
793 if (amd_apic_timer_broken())
794 disable_apic_timer = 1;
795
796 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
797 unsigned long long tseg;
798
799 /*
800 * Split up direct mapping around the TSEG SMM area.
801 * Don't do it for gbpages because there seems very little
802 * benefit in doing so.
803 */
804 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
805 (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
806 set_memory_4k((unsigned long)__va(tseg), 1);
807 }
808}
809
810void __cpuinit detect_ht(struct cpuinfo_x86 *c) 592void __cpuinit detect_ht(struct cpuinfo_x86 *c)
811{ 593{
812#ifdef CONFIG_SMP 594#ifdef CONFIG_SMP
@@ -857,135 +639,59 @@ out:
857#endif 639#endif
858} 640}
859 641
860/* 642static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
861 * find out the number of processor cores on the die
862 */
863static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
864{
865 unsigned int eax, t;
866
867 if (c->cpuid_level < 4)
868 return 1;
869
870 cpuid_count(4, 0, &eax, &t, &t, &t);
871
872 if (eax & 0x1f)
873 return ((eax >> 26) + 1);
874 else
875 return 1;
876}
877
878static void __cpuinit srat_detect_node(void)
879{
880#ifdef CONFIG_NUMA
881 unsigned node;
882 int cpu = smp_processor_id();
883 int apicid = hard_smp_processor_id();
884
885 /* Don't do the funky fallback heuristics the AMD version employs
886 for now. */
887 node = apicid_to_node[apicid];
888 if (node == NUMA_NO_NODE || !node_online(node))
889 node = first_node(node_online_map);
890 numa_set_node(cpu, node);
891
892 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
893#endif
894}
895
896static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
897{
898 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
899 (c->x86 == 0x6 && c->x86_model >= 0x0e))
900 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
901}
902
903static void __cpuinit init_intel(struct cpuinfo_x86 *c)
904{ 643{
905 /* Cache sizes */ 644 char *v = c->x86_vendor_id;
906 unsigned n; 645 int i;
907 646 static int printed;
908 init_intel_cacheinfo(c); 647
909 if (c->cpuid_level > 9) { 648 for (i = 0; i < X86_VENDOR_NUM; i++) {
910 unsigned eax = cpuid_eax(10); 649 if (cpu_devs[i]) {
911 /* Check for version and the number of counters */ 650 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
912 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) 651 (cpu_devs[i]->c_ident[1] &&
913 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); 652 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
653 c->x86_vendor = i;
654 this_cpu = cpu_devs[i];
655 return;
656 }
657 }
914 } 658 }
915 659 if (!printed) {
916 if (cpu_has_ds) { 660 printed++;
917 unsigned int l1, l2; 661 printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
918 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); 662 printk(KERN_ERR "CPU: Your system may be unstable.\n");
919 if (!(l1 & (1<<11)))
920 set_cpu_cap(c, X86_FEATURE_BTS);
921 if (!(l1 & (1<<12)))
922 set_cpu_cap(c, X86_FEATURE_PEBS);
923 } 663 }
924 664 c->x86_vendor = X86_VENDOR_UNKNOWN;
925
926 if (cpu_has_bts)
927 ds_init_intel(c);
928
929 n = c->extended_cpuid_level;
930 if (n >= 0x80000008) {
931 unsigned eax = cpuid_eax(0x80000008);
932 c->x86_virt_bits = (eax >> 8) & 0xff;
933 c->x86_phys_bits = eax & 0xff;
934 /* CPUID workaround for Intel 0F34 CPU */
935 if (c->x86_vendor == X86_VENDOR_INTEL &&
936 c->x86 == 0xF && c->x86_model == 0x3 &&
937 c->x86_mask == 0x4)
938 c->x86_phys_bits = 36;
939 }
940
941 if (c->x86 == 15)
942 c->x86_cache_alignment = c->x86_clflush_size * 2;
943 if (c->x86 == 6)
944 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
945 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
946 c->x86_max_cores = intel_num_cpu_cores(c);
947
948 srat_detect_node();
949}
950
951static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
952{
953 if (c->x86 == 0x6 && c->x86_model >= 0xf)
954 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
955} 665}
956 666
957static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 667static void __init early_cpu_support_print(void)
958{ 668{
959 /* Cache sizes */ 669 int i,j;
960 unsigned n; 670 struct cpu_dev *cpu_devx;
961 671
962 n = c->extended_cpuid_level; 672 printk("KERNEL supported cpus:\n");
963 if (n >= 0x80000008) { 673 for (i = 0; i < X86_VENDOR_NUM; i++) {
964 unsigned eax = cpuid_eax(0x80000008); 674 cpu_devx = cpu_devs[i];
965 c->x86_virt_bits = (eax >> 8) & 0xff; 675 if (!cpu_devx)
966 c->x86_phys_bits = eax & 0xff; 676 continue;
967 } 677 for (j = 0; j < 2; j++) {
968 678 if (!cpu_devx->c_ident[j])
969 if (c->x86 == 0x6 && c->x86_model >= 0xf) { 679 continue;
970 c->x86_cache_alignment = c->x86_clflush_size * 2; 680 printk(" %s %s\n", cpu_devx->c_vendor,
971 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 681 cpu_devx->c_ident[j]);
972 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 682 }
973 } 683 }
974 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
975} 684}
976 685
977static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) 686static void __init early_cpu_init(void)
978{ 687{
979 char *v = c->x86_vendor_id; 688 struct cpu_vendor_dev *cvdev;
980 689
981 if (!strcmp(v, "AuthenticAMD")) 690 for (cvdev = __x86cpuvendor_start ;
982 c->x86_vendor = X86_VENDOR_AMD; 691 cvdev < __x86cpuvendor_end ;
983 else if (!strcmp(v, "GenuineIntel")) 692 cvdev++)
984 c->x86_vendor = X86_VENDOR_INTEL; 693 cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
985 else if (!strcmp(v, "CentaurHauls")) 694 early_cpu_support_print();
986 c->x86_vendor = X86_VENDOR_CENTAUR;
987 else
988 c->x86_vendor = X86_VENDOR_UNKNOWN;
989} 695}
990 696
991/* Do some early cpuid on the boot CPU to get some parameter that are 697/* Do some early cpuid on the boot CPU to get some parameter that are
@@ -1066,17 +772,9 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1066 if (c->extended_cpuid_level >= 0x80000007) 772 if (c->extended_cpuid_level >= 0x80000007)
1067 c->x86_power = cpuid_edx(0x80000007); 773 c->x86_power = cpuid_edx(0x80000007);
1068 774
1069 switch (c->x86_vendor) { 775 if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
1070 case X86_VENDOR_AMD: 776 cpu_devs[c->x86_vendor]->c_early_init)
1071 early_init_amd(c); 777 cpu_devs[c->x86_vendor]->c_early_init(c);
1072 break;
1073 case X86_VENDOR_INTEL:
1074 early_init_intel(c);
1075 break;
1076 case X86_VENDOR_CENTAUR:
1077 early_init_centaur(c);
1078 break;
1079 }
1080 778
1081 validate_pat_support(c); 779 validate_pat_support(c);
1082} 780}
@@ -1104,24 +802,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1104 * At the end of this section, c->x86_capability better 802 * At the end of this section, c->x86_capability better
1105 * indicate the features this CPU genuinely supports! 803 * indicate the features this CPU genuinely supports!
1106 */ 804 */
1107 switch (c->x86_vendor) { 805 if (this_cpu->c_init)
1108 case X86_VENDOR_AMD: 806 this_cpu->c_init(c);
1109 init_amd(c);
1110 break;
1111
1112 case X86_VENDOR_INTEL:
1113 init_intel(c);
1114 break;
1115
1116 case X86_VENDOR_CENTAUR:
1117 init_centaur(c);
1118 break;
1119
1120 case X86_VENDOR_UNKNOWN:
1121 default:
1122 display_cacheinfo(c);
1123 break;
1124 }
1125 807
1126 detect_ht(c); 808 detect_ht(c);
1127 809
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 56078d61c793..f2b666756299 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -59,7 +59,6 @@
59#include <asm/pgtable.h> 59#include <asm/pgtable.h>
60#include <asm/tlbflush.h> 60#include <asm/tlbflush.h>
61#include <asm/mtrr.h> 61#include <asm/mtrr.h>
62#include <asm/nmi.h>
63#include <asm/vmi.h> 62#include <asm/vmi.h>
64#include <asm/genapic.h> 63#include <asm/genapic.h>
65#include <linux/mc146818rtc.h> 64#include <linux/mc146818rtc.h>
@@ -996,7 +995,6 @@ do_rest:
996#endif 995#endif
997 cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ 996 cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
998 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ 997 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
999 cpu_clear(cpu, cpu_possible_map);
1000 cpu_clear(cpu, cpu_present_map); 998 cpu_clear(cpu, cpu_present_map);
1001 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; 999 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
1002 } 1000 }
@@ -1158,9 +1156,11 @@ static int __init smp_sanity_check(unsigned max_cpus)
1158 * If SMP should be disabled, then really disable it! 1156 * If SMP should be disabled, then really disable it!
1159 */ 1157 */
1160 if (!max_cpus) { 1158 if (!max_cpus) {
1161 printk(KERN_INFO "SMP mode deactivated," 1159 printk(KERN_INFO "SMP mode deactivated.\n");
1162 "forcing use of dummy APIC emulation.\n");
1163 smpboot_clear_io_apic(); 1160 smpboot_clear_io_apic();
1161
1162 localise_nmi_watchdog();
1163
1164#ifdef CONFIG_X86_32 1164#ifdef CONFIG_X86_32
1165 connect_bsp_APIC(); 1165 connect_bsp_APIC();
1166#endif 1166#endif
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index d2ab52cc1d6b..7066cb855a60 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -19,8 +19,8 @@
19#include <linux/utsname.h> 19#include <linux/utsname.h>
20#include <linux/ipc.h> 20#include <linux/ipc.h>
21 21
22#include <asm/uaccess.h> 22#include <linux/uaccess.h>
23#include <asm/unistd.h> 23#include <linux/unistd.h>
24 24
25asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, 25asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
26 unsigned long prot, unsigned long flags, 26 unsigned long prot, unsigned long flags,
@@ -103,7 +103,7 @@ asmlinkage int old_select(struct sel_arg_struct __user *arg)
103 * 103 *
104 * This is really horribly ugly. 104 * This is really horribly ugly.
105 */ 105 */
106asmlinkage int sys_ipc (uint call, int first, int second, 106asmlinkage int sys_ipc(uint call, int first, int second,
107 int third, void __user *ptr, long fifth) 107 int third, void __user *ptr, long fifth)
108{ 108{
109 int version, ret; 109 int version, ret;
@@ -113,24 +113,24 @@ asmlinkage int sys_ipc (uint call, int first, int second,
113 113
114 switch (call) { 114 switch (call) {
115 case SEMOP: 115 case SEMOP:
116 return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL); 116 return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL);
117 case SEMTIMEDOP: 117 case SEMTIMEDOP:
118 return sys_semtimedop(first, (struct sembuf __user *)ptr, second, 118 return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
119 (const struct timespec __user *)fifth); 119 (const struct timespec __user *)fifth);
120 120
121 case SEMGET: 121 case SEMGET:
122 return sys_semget (first, second, third); 122 return sys_semget(first, second, third);
123 case SEMCTL: { 123 case SEMCTL: {
124 union semun fourth; 124 union semun fourth;
125 if (!ptr) 125 if (!ptr)
126 return -EINVAL; 126 return -EINVAL;
127 if (get_user(fourth.__pad, (void __user * __user *) ptr)) 127 if (get_user(fourth.__pad, (void __user * __user *) ptr))
128 return -EFAULT; 128 return -EFAULT;
129 return sys_semctl (first, second, third, fourth); 129 return sys_semctl(first, second, third, fourth);
130 } 130 }
131 131
132 case MSGSND: 132 case MSGSND:
133 return sys_msgsnd (first, (struct msgbuf __user *) ptr, 133 return sys_msgsnd(first, (struct msgbuf __user *) ptr,
134 second, third); 134 second, third);
135 case MSGRCV: 135 case MSGRCV:
136 switch (version) { 136 switch (version) {
@@ -138,45 +138,45 @@ asmlinkage int sys_ipc (uint call, int first, int second,
138 struct ipc_kludge tmp; 138 struct ipc_kludge tmp;
139 if (!ptr) 139 if (!ptr)
140 return -EINVAL; 140 return -EINVAL;
141 141
142 if (copy_from_user(&tmp, 142 if (copy_from_user(&tmp,
143 (struct ipc_kludge __user *) ptr, 143 (struct ipc_kludge __user *) ptr,
144 sizeof (tmp))) 144 sizeof(tmp)))
145 return -EFAULT; 145 return -EFAULT;
146 return sys_msgrcv (first, tmp.msgp, second, 146 return sys_msgrcv(first, tmp.msgp, second,
147 tmp.msgtyp, third); 147 tmp.msgtyp, third);
148 } 148 }
149 default: 149 default:
150 return sys_msgrcv (first, 150 return sys_msgrcv(first,
151 (struct msgbuf __user *) ptr, 151 (struct msgbuf __user *) ptr,
152 second, fifth, third); 152 second, fifth, third);
153 } 153 }
154 case MSGGET: 154 case MSGGET:
155 return sys_msgget ((key_t) first, second); 155 return sys_msgget((key_t) first, second);
156 case MSGCTL: 156 case MSGCTL:
157 return sys_msgctl (first, second, (struct msqid_ds __user *) ptr); 157 return sys_msgctl(first, second, (struct msqid_ds __user *) ptr);
158 158
159 case SHMAT: 159 case SHMAT:
160 switch (version) { 160 switch (version) {
161 default: { 161 default: {
162 ulong raddr; 162 ulong raddr;
163 ret = do_shmat (first, (char __user *) ptr, second, &raddr); 163 ret = do_shmat(first, (char __user *) ptr, second, &raddr);
164 if (ret) 164 if (ret)
165 return ret; 165 return ret;
166 return put_user (raddr, (ulong __user *) third); 166 return put_user(raddr, (ulong __user *) third);
167 } 167 }
168 case 1: /* iBCS2 emulator entry point */ 168 case 1: /* iBCS2 emulator entry point */
169 if (!segment_eq(get_fs(), get_ds())) 169 if (!segment_eq(get_fs(), get_ds()))
170 return -EINVAL; 170 return -EINVAL;
171 /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */ 171 /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */
172 return do_shmat (first, (char __user *) ptr, second, (ulong *) third); 172 return do_shmat(first, (char __user *) ptr, second, (ulong *) third);
173 } 173 }
174 case SHMDT: 174 case SHMDT:
175 return sys_shmdt ((char __user *)ptr); 175 return sys_shmdt((char __user *)ptr);
176 case SHMGET: 176 case SHMGET:
177 return sys_shmget (first, second, third); 177 return sys_shmget(first, second, third);
178 case SHMCTL: 178 case SHMCTL:
179 return sys_shmctl (first, second, 179 return sys_shmctl(first, second,
180 (struct shmid_ds __user *) ptr); 180 (struct shmid_ds __user *) ptr);
181 default: 181 default:
182 return -ENOSYS; 182 return -ENOSYS;
@@ -186,28 +186,28 @@ asmlinkage int sys_ipc (uint call, int first, int second,
186/* 186/*
187 * Old cruft 187 * Old cruft
188 */ 188 */
189asmlinkage int sys_uname(struct old_utsname __user * name) 189asmlinkage int sys_uname(struct old_utsname __user *name)
190{ 190{
191 int err; 191 int err;
192 if (!name) 192 if (!name)
193 return -EFAULT; 193 return -EFAULT;
194 down_read(&uts_sem); 194 down_read(&uts_sem);
195 err = copy_to_user(name, utsname(), sizeof (*name)); 195 err = copy_to_user(name, utsname(), sizeof(*name));
196 up_read(&uts_sem); 196 up_read(&uts_sem);
197 return err?-EFAULT:0; 197 return err? -EFAULT:0;
198} 198}
199 199
200asmlinkage int sys_olduname(struct oldold_utsname __user * name) 200asmlinkage int sys_olduname(struct oldold_utsname __user *name)
201{ 201{
202 int error; 202 int error;
203 203
204 if (!name) 204 if (!name)
205 return -EFAULT; 205 return -EFAULT;
206 if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) 206 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
207 return -EFAULT; 207 return -EFAULT;
208 208
209 down_read(&uts_sem); 209 down_read(&uts_sem);
210 210
211 error = __copy_to_user(&name->sysname, &utsname()->sysname, 211 error = __copy_to_user(&name->sysname, &utsname()->sysname,
212 __OLD_UTS_LEN); 212 __OLD_UTS_LEN);
213 error |= __put_user(0, name->sysname + __OLD_UTS_LEN); 213 error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
@@ -223,9 +223,9 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name)
223 error |= __copy_to_user(&name->machine, &utsname()->machine, 223 error |= __copy_to_user(&name->machine, &utsname()->machine,
224 __OLD_UTS_LEN); 224 __OLD_UTS_LEN);
225 error |= __put_user(0, name->machine + __OLD_UTS_LEN); 225 error |= __put_user(0, name->machine + __OLD_UTS_LEN);
226 226
227 up_read(&uts_sem); 227 up_read(&uts_sem);
228 228
229 error = error ? -EFAULT : 0; 229 error = error ? -EFAULT : 0;
230 230
231 return error; 231 return error;
@@ -241,6 +241,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[])
241 long __res; 241 long __res;
242 asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" 242 asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx"
243 : "=a" (__res) 243 : "=a" (__res)
244 : "0" (__NR_execve),"ri" (filename),"c" (argv), "d" (envp) : "memory"); 244 : "0" (__NR_execve), "ri" (filename), "c" (argv), "d" (envp) : "memory");
245 return __res; 245 return __res;
246} 246}
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 2ff21f398934..5f29f12da50c 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -84,8 +84,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
84 if (timer_ack) { 84 if (timer_ack) {
85 /* 85 /*
86 * Subtle, when I/O APICs are used we have to ack timer IRQ 86 * Subtle, when I/O APICs are used we have to ack timer IRQ
87 * manually to reset the IRR bit for do_slow_gettimeoffset(). 87 * manually to deassert NMI lines for the watchdog if run
88 * This will also deassert NMI lines for the watchdog if run
89 * on an 82489DX-based system. 88 * on an 82489DX-based system.
90 */ 89 */
91 spin_lock(&i8259A_lock); 90 spin_lock(&i8259A_lock);
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index adff76ea97c4..ec6d3b2130c4 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -71,7 +71,6 @@ asmlinkage void general_protection(void);
71asmlinkage void page_fault(void); 71asmlinkage void page_fault(void);
72asmlinkage void coprocessor_error(void); 72asmlinkage void coprocessor_error(void);
73asmlinkage void simd_coprocessor_error(void); 73asmlinkage void simd_coprocessor_error(void);
74asmlinkage void reserved(void);
75asmlinkage void alignment_check(void); 74asmlinkage void alignment_check(void);
76asmlinkage void machine_check(void); 75asmlinkage void machine_check(void);
77asmlinkage void spurious_interrupt_bug(void); 76asmlinkage void spurious_interrupt_bug(void);
@@ -702,12 +701,10 @@ DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
702DO_ERROR( 4, SIGSEGV, "overflow", overflow) 701DO_ERROR( 4, SIGSEGV, "overflow", overflow)
703DO_ERROR( 5, SIGSEGV, "bounds", bounds) 702DO_ERROR( 5, SIGSEGV, "bounds", bounds)
704DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) 703DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
705DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
706DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) 704DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
707DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 705DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
708DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 706DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
709DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) 707DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
710DO_ERROR(18, SIGSEGV, "reserved", reserved)
711 708
712/* Runs on IST stack */ 709/* Runs on IST stack */
713asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) 710asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 068759db63dd..774a5a83c296 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -14,7 +14,10 @@
14 14
15#include "mach_timer.h" 15#include "mach_timer.h"
16 16
17static int tsc_disabled; 17/* native_sched_clock() is called before tsc_init(), so
18 we must start with the TSC soft disabled to prevent
19 erroneous rdtsc usage on !cpu_has_tsc processors */
20static int tsc_disabled = -1;
18 21
19/* 22/*
20 * On some systems the TSC frequency does not 23 * On some systems the TSC frequency does not
@@ -283,7 +286,6 @@ core_initcall(cpufreq_tsc);
283 286
284/* clock source code */ 287/* clock source code */
285 288
286static unsigned long current_tsc_khz;
287static struct clocksource clocksource_tsc; 289static struct clocksource clocksource_tsc;
288 290
289/* 291/*
@@ -402,25 +404,20 @@ void __init tsc_init(void)
402{ 404{
403 int cpu; 405 int cpu;
404 406
405 if (!cpu_has_tsc || tsc_disabled) { 407 if (!cpu_has_tsc || tsc_disabled > 0)
406 /* Disable the TSC in case of !cpu_has_tsc */
407 tsc_disabled = 1;
408 return; 408 return;
409 }
410 409
411 cpu_khz = calculate_cpu_khz(); 410 cpu_khz = calculate_cpu_khz();
412 tsc_khz = cpu_khz; 411 tsc_khz = cpu_khz;
413 412
414 if (!cpu_khz) { 413 if (!cpu_khz) {
415 mark_tsc_unstable("could not calculate TSC khz"); 414 mark_tsc_unstable("could not calculate TSC khz");
416 /*
417 * We need to disable the TSC completely in this case
418 * to prevent sched_clock() from using it.
419 */
420 tsc_disabled = 1;
421 return; 415 return;
422 } 416 }
423 417
418 /* now allow native_sched_clock() to use rdtsc */
419 tsc_disabled = 0;
420
424 printk("Detected %lu.%03lu MHz processor.\n", 421 printk("Detected %lu.%03lu MHz processor.\n",
425 (unsigned long)cpu_khz / 1000, 422 (unsigned long)cpu_khz / 1000,
426 (unsigned long)cpu_khz % 1000); 423 (unsigned long)cpu_khz % 1000);
@@ -441,9 +438,8 @@ void __init tsc_init(void)
441 438
442 unsynchronized_tsc(); 439 unsynchronized_tsc();
443 check_geode_tsc_reliable(); 440 check_geode_tsc_reliable();
444 current_tsc_khz = tsc_khz; 441 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
445 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, 442 clocksource_tsc.shift);
446 clocksource_tsc.shift);
447 /* lower the rating if we already know its unstable: */ 443 /* lower the rating if we already know its unstable: */
448 if (check_tsc_unstable()) { 444 if (check_tsc_unstable()) {
449 clocksource_tsc.rating = 0; 445 clocksource_tsc.rating = 0;
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 1784b8077a12..9898fb01edfd 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -242,7 +242,7 @@ void __init tsc_calibrate(void)
242 if (hpet) { 242 if (hpet) {
243 printk(KERN_INFO "TSC calibrated against HPET\n"); 243 printk(KERN_INFO "TSC calibrated against HPET\n");
244 if (hpet2 < hpet1) 244 if (hpet2 < hpet1)
245 hpet2 += 0x100000000; 245 hpet2 += 0x100000000UL;
246 hpet2 -= hpet1; 246 hpet2 -= hpet1;
247 tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000; 247 tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000;
248 } else { 248 } else {
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index ce5ed083a1e9..2674f5796275 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -60,13 +60,6 @@ SECTIONS
60 60
61 BUG_TABLE :text 61 BUG_TABLE :text
62 62
63 . = ALIGN(4);
64 .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
65 __tracedata_start = .;
66 *(.tracedata)
67 __tracedata_end = .;
68 }
69
70 RODATA 63 RODATA
71 64
72 /* writeable */ 65 /* writeable */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fad3674b06a5..fd246e22fe6b 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -53,13 +53,6 @@ SECTIONS
53 53
54 RODATA 54 RODATA
55 55
56 . = ALIGN(4);
57 .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
58 __tracedata_start = .;
59 *(.tracedata)
60 __tracedata_end = .;
61 }
62
63 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ 56 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
64 /* Data */ 57 /* Data */
65 .data : AT(ADDR(.data) - LOAD_OFFSET) { 58 .data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -177,6 +170,7 @@ SECTIONS
177 *(.con_initcall.init) 170 *(.con_initcall.init)
178 } 171 }
179 __con_initcall_end = .; 172 __con_initcall_end = .;
173 . = ALIGN(16);
180 __x86cpuvendor_start = .; 174 __x86cpuvendor_start = .;
181 .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { 175 .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) {
182 *(.x86cpuvendor.init) 176 *(.x86cpuvendor.init)
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index ba8c0b75ab0a..0c029e8959c7 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -15,9 +15,12 @@
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/pci_ids.h> 16#include <linux/pci_ids.h>
17#include <linux/pci_regs.h> 17#include <linux/pci_regs.h>
18
19#include <asm/apic.h>
18#include <asm/pci-direct.h> 20#include <asm/pci-direct.h>
19#include <asm/io.h> 21#include <asm/io.h>
20#include <asm/paravirt.h> 22#include <asm/paravirt.h>
23#include <asm/setup.h>
21 24
22#if defined CONFIG_PCI && defined CONFIG_PARAVIRT 25#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
23/* 26/*