aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-05 17:55:20 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-05 17:55:20 -0400
commitea62ccd00fd0b6720b033adfc9984f31130ce195 (patch)
tree9837b797b2466fffcb0af96c388b06eae9c3df18 /arch/x86_64/kernel
parent886a0768affe9a32f18c45f8e1393bca9ece5392 (diff)
parent35060b6a9a4e1c89bc6fbea61090e302dbc61847 (diff)
Merge branch 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6
* 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6: (231 commits) [PATCH] i386: Don't delete cpu_devs data to identify different x86 types in late_initcall [PATCH] i386: type may be unused [PATCH] i386: Some additional chipset register values validation. [PATCH] i386: Add missing !X86_PAE dependincy to the 2G/2G split. [PATCH] x86-64: Don't exclude asm-offsets.c in Documentation/dontdiff [PATCH] i386: avoid redundant preempt_disable in __unlazy_fpu [PATCH] i386: white space fixes in i387.h [PATCH] i386: Drop noisy e820 debugging printks [PATCH] x86-64: Fix allnoconfig error in genapic_flat.c [PATCH] x86-64: Shut up warnings for vfat compat ioctls on other file systems [PATCH] x86-64: Share identical video.S between i386 and x86-64 [PATCH] x86-64: Remove CONFIG_REORDER [PATCH] x86-64: Print type and size correctly for unknown compat ioctls [PATCH] i386: Remove copy_*_user BUG_ONs for (size < 0) [PATCH] i386: Little cleanups in smpboot.c [PATCH] x86-64: Don't enable NUMA for a single node in K8 NUMA scanning [PATCH] x86: Use RDTSCP for synchronous get_cycles if possible [PATCH] i386: Add X86_FEATURE_RDTSCP [PATCH] i386: Implement X86_FEATURE_SYNC_RDTSC on i386 [PATCH] i386: Implement alternative_io for i386 ... Fix up trivial conflict in include/linux/highmem.h manually. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r--arch/x86_64/kernel/Makefile7
-rw-r--r--arch/x86_64/kernel/acpi/sleep.c24
-rw-r--r--arch/x86_64/kernel/acpi/wakeup.S286
-rw-r--r--arch/x86_64/kernel/aperture.c5
-rw-r--r--arch/x86_64/kernel/apic.c35
-rw-r--r--arch/x86_64/kernel/asm-offsets.c10
-rw-r--r--arch/x86_64/kernel/bugs.c21
-rw-r--r--arch/x86_64/kernel/e820.c5
-rw-r--r--arch/x86_64/kernel/early-quirks.c13
-rw-r--r--arch/x86_64/kernel/early_printk.c5
-rw-r--r--arch/x86_64/kernel/entry.S5
-rw-r--r--arch/x86_64/kernel/functionlist1284
-rw-r--r--arch/x86_64/kernel/genapic.c104
-rw-r--r--arch/x86_64/kernel/genapic_cluster.c137
-rw-r--r--arch/x86_64/kernel/genapic_flat.c25
-rw-r--r--arch/x86_64/kernel/head.S340
-rw-r--r--arch/x86_64/kernel/head64.c41
-rw-r--r--arch/x86_64/kernel/io_apic.c31
-rw-r--r--arch/x86_64/kernel/ioport.c1
-rw-r--r--arch/x86_64/kernel/machine_kexec.c14
-rw-r--r--arch/x86_64/kernel/mce.c32
-rw-r--r--arch/x86_64/kernel/mpparse.c2
-rw-r--r--arch/x86_64/kernel/nmi.c678
-rw-r--r--arch/x86_64/kernel/pci-calgary.c2
-rw-r--r--arch/x86_64/kernel/pci-gart.c2
-rw-r--r--arch/x86_64/kernel/pci-nommu.c2
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86_64/kernel/process.c12
-rw-r--r--arch/x86_64/kernel/setup.c35
-rw-r--r--arch/x86_64/kernel/setup64.c5
-rw-r--r--arch/x86_64/kernel/signal.c6
-rw-r--r--arch/x86_64/kernel/smp.c30
-rw-r--r--arch/x86_64/kernel/smpboot.c47
-rw-r--r--arch/x86_64/kernel/suspend.c19
-rw-r--r--arch/x86_64/kernel/suspend_asm.S7
-rw-r--r--arch/x86_64/kernel/syscall.c1
-rw-r--r--arch/x86_64/kernel/time.c71
-rw-r--r--arch/x86_64/kernel/trampoline.S123
-rw-r--r--arch/x86_64/kernel/traps.c34
-rw-r--r--arch/x86_64/kernel/tsc.c17
-rw-r--r--arch/x86_64/kernel/tsc_sync.c4
-rw-r--r--arch/x86_64/kernel/verify_cpu.S119
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S20
-rw-r--r--arch/x86_64/kernel/vsyscall.c68
44 files changed, 934 insertions, 2797 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index bb47e86f3d02..4d94c51803d8 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,8 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
9 x8664_ksyms.o i387.o syscall.o vsyscall.o \ 9 x8664_ksyms.o i387.o syscall.o vsyscall.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o 11 pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \
12 perfctr-watchdog.o
12 13
13obj-$(CONFIG_STACKTRACE) += stacktrace.o 14obj-$(CONFIG_STACKTRACE) += stacktrace.o
14obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o 15obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
@@ -21,8 +22,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
21obj-$(CONFIG_X86_CPUID) += cpuid.o 22obj-$(CONFIG_X86_CPUID) += cpuid.o
22obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o 23obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o
23obj-y += apic.o nmi.o 24obj-y += apic.o nmi.o
24obj-y += io_apic.o mpparse.o \ 25obj-y += io_apic.o mpparse.o genapic.o genapic_flat.o
25 genapic.o genapic_cluster.o genapic_flat.o
26obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o 26obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
27obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 27obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
28obj-$(CONFIG_PM) += suspend.o 28obj-$(CONFIG_PM) += suspend.o
@@ -58,3 +58,4 @@ i8237-y += ../../i386/kernel/i8237.o
58msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o 58msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
59alternative-y += ../../i386/kernel/alternative.o 59alternative-y += ../../i386/kernel/alternative.o
60pcspeaker-y += ../../i386/kernel/pcspeaker.o 60pcspeaker-y += ../../i386/kernel/pcspeaker.o
61perfctr-watchdog-y += ../../i386/kernel/cpu/perfctr-watchdog.o
diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c
index e1548fbe95ae..195b7034a148 100644
--- a/arch/x86_64/kernel/acpi/sleep.c
+++ b/arch/x86_64/kernel/acpi/sleep.c
@@ -60,19 +60,6 @@ extern char wakeup_start, wakeup_end;
60 60
61extern unsigned long acpi_copy_wakeup_routine(unsigned long); 61extern unsigned long acpi_copy_wakeup_routine(unsigned long);
62 62
63static pgd_t low_ptr;
64
65static void init_low_mapping(void)
66{
67 pgd_t *slot0 = pgd_offset(current->mm, 0UL);
68 low_ptr = *slot0;
69 /* FIXME: We're playing with the current task's page tables here, which
70 * is potentially dangerous on SMP systems.
71 */
72 set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
73 local_flush_tlb();
74}
75
76/** 63/**
77 * acpi_save_state_mem - save kernel state 64 * acpi_save_state_mem - save kernel state
78 * 65 *
@@ -81,8 +68,6 @@ static void init_low_mapping(void)
81 */ 68 */
82int acpi_save_state_mem(void) 69int acpi_save_state_mem(void)
83{ 70{
84 init_low_mapping();
85
86 memcpy((void *)acpi_wakeup_address, &wakeup_start, 71 memcpy((void *)acpi_wakeup_address, &wakeup_start,
87 &wakeup_end - &wakeup_start); 72 &wakeup_end - &wakeup_start);
88 acpi_copy_wakeup_routine(acpi_wakeup_address); 73 acpi_copy_wakeup_routine(acpi_wakeup_address);
@@ -95,8 +80,6 @@ int acpi_save_state_mem(void)
95 */ 80 */
96void acpi_restore_state_mem(void) 81void acpi_restore_state_mem(void)
97{ 82{
98 set_pgd(pgd_offset(current->mm, 0UL), low_ptr);
99 local_flush_tlb();
100} 83}
101 84
102/** 85/**
@@ -109,10 +92,11 @@ void acpi_restore_state_mem(void)
109 */ 92 */
110void __init acpi_reserve_bootmem(void) 93void __init acpi_reserve_bootmem(void)
111{ 94{
112 acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); 95 acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
113 if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) 96 if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2))
114 printk(KERN_CRIT 97 printk(KERN_CRIT
115 "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); 98 "ACPI: Wakeup code way too big, will crash on attempt"
99 " to suspend\n");
116} 100}
117 101
118static int __init acpi_sleep_setup(char *str) 102static int __init acpi_sleep_setup(char *str)
diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S
index 185faa911db5..8550a6ffa275 100644
--- a/arch/x86_64/kernel/acpi/wakeup.S
+++ b/arch/x86_64/kernel/acpi/wakeup.S
@@ -1,6 +1,7 @@
1.text 1.text
2#include <linux/linkage.h> 2#include <linux/linkage.h>
3#include <asm/segment.h> 3#include <asm/segment.h>
4#include <asm/pgtable.h>
4#include <asm/page.h> 5#include <asm/page.h>
5#include <asm/msr.h> 6#include <asm/msr.h>
6 7
@@ -30,22 +31,28 @@ wakeup_code:
30 cld 31 cld
31 # setup data segment 32 # setup data segment
32 movw %cs, %ax 33 movw %cs, %ax
33 movw %ax, %ds # Make ds:0 point to wakeup_start 34 movw %ax, %ds # Make ds:0 point to wakeup_start
34 movw %ax, %ss 35 movw %ax, %ss
35 mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board 36 # Private stack is needed for ASUS board
37 mov $(wakeup_stack - wakeup_code), %sp
36 38
37 pushl $0 # Kill any dangerous flags 39 pushl $0 # Kill any dangerous flags
38 popfl 40 popfl
39 41
40 movl real_magic - wakeup_code, %eax 42 movl real_magic - wakeup_code, %eax
41 cmpl $0x12345678, %eax 43 cmpl $0x12345678, %eax
42 jne bogus_real_magic 44 jne bogus_real_magic
43 45
46 call verify_cpu # Verify the cpu supports long
47 # mode
48 testl %eax, %eax
49 jnz no_longmode
50
44 testl $1, video_flags - wakeup_code 51 testl $1, video_flags - wakeup_code
45 jz 1f 52 jz 1f
46 lcall $0xc000,$3 53 lcall $0xc000,$3
47 movw %cs, %ax 54 movw %cs, %ax
48 movw %ax, %ds # Bios might have played with that 55 movw %ax, %ds # Bios might have played with that
49 movw %ax, %ss 56 movw %ax, %ss
501: 571:
51 58
@@ -61,12 +68,15 @@ wakeup_code:
61 68
62 movb $0xa2, %al ; outb %al, $0x80 69 movb $0xa2, %al ; outb %al, $0x80
63 70
64 lidt %ds:idt_48a - wakeup_code 71 mov %ds, %ax # Find 32bit wakeup_code addr
65 xorl %eax, %eax 72 movzx %ax, %esi # (Convert %ds:gdt to a liner ptr)
66 movw %ds, %ax # (Convert %ds:gdt to a linear ptr) 73 shll $4, %esi
67 shll $4, %eax 74 # Fix up the vectors
68 addl $(gdta - wakeup_code), %eax 75 addl %esi, wakeup_32_vector - wakeup_code
69 movl %eax, gdt_48a +2 - wakeup_code 76 addl %esi, wakeup_long64_vector - wakeup_code
77 addl %esi, gdt_48a + 2 - wakeup_code # Fixup the gdt pointer
78
79 lidtl %ds:idt_48a - wakeup_code
70 lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is 80 lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is
71 # appropriate 81 # appropriate
72 82
@@ -75,86 +85,63 @@ wakeup_code:
75 jmp 1f 85 jmp 1f
761: 861:
77 87
78 .byte 0x66, 0xea # prefix + jmpi-opcode 88 ljmpl *(wakeup_32_vector - wakeup_code)
79 .long wakeup_32 - __START_KERNEL_map 89
80 .word __KERNEL_CS 90 .balign 4
91wakeup_32_vector:
92 .long wakeup_32 - wakeup_code
93 .word __KERNEL32_CS, 0
81 94
82 .code32 95 .code32
83wakeup_32: 96wakeup_32:
84# Running in this code, but at low address; paging is not yet turned on. 97# Running in this code, but at low address; paging is not yet turned on.
85 movb $0xa5, %al ; outb %al, $0x80 98 movb $0xa5, %al ; outb %al, $0x80
86 99
87 /* Check if extended functions are implemented */ 100 movl $__KERNEL_DS, %eax
88 movl $0x80000000, %eax 101 movl %eax, %ds
89 cpuid
90 cmpl $0x80000000, %eax
91 jbe bogus_cpu
92 wbinvd
93 mov $0x80000001, %eax
94 cpuid
95 btl $29, %edx
96 jnc bogus_cpu
97 movl %edx,%edi
98
99 movw $__KERNEL_DS, %ax
100 movw %ax, %ds
101 movw %ax, %es
102 movw %ax, %fs
103 movw %ax, %gs
104
105 movw $__KERNEL_DS, %ax
106 movw %ax, %ss
107 102
108 mov $(wakeup_stack - __START_KERNEL_map), %esp 103 movw $0x0e00 + 'i', %ds:(0xb8012)
109 movl saved_magic - __START_KERNEL_map, %eax 104 movb $0xa8, %al ; outb %al, $0x80;
110 cmpl $0x9abcdef0, %eax
111 jne bogus_32_magic
112 105
113 /* 106 /*
114 * Prepare for entering 64bits mode 107 * Prepare for entering 64bits mode
115 */ 108 */
116 109
117 /* Enable PAE mode and PGE */ 110 /* Enable PAE */
118 xorl %eax, %eax 111 xorl %eax, %eax
119 btsl $5, %eax 112 btsl $5, %eax
120 btsl $7, %eax
121 movl %eax, %cr4 113 movl %eax, %cr4
122 114
123 /* Setup early boot stage 4 level pagetables */ 115 /* Setup early boot stage 4 level pagetables */
124 movl $(wakeup_level4_pgt - __START_KERNEL_map), %eax 116 leal (wakeup_level4_pgt - wakeup_code)(%esi), %eax
125 movl %eax, %cr3 117 movl %eax, %cr3
126 118
127 /* Setup EFER (Extended Feature Enable Register) */ 119 /* Check if nx is implemented */
128 movl $MSR_EFER, %ecx 120 movl $0x80000001, %eax
129 rdmsr 121 cpuid
130 /* Fool rdmsr and reset %eax to avoid dependences */ 122 movl %edx,%edi
131 xorl %eax, %eax 123
132 /* Enable Long Mode */ 124 /* Enable Long Mode */
125 xorl %eax, %eax
133 btsl $_EFER_LME, %eax 126 btsl $_EFER_LME, %eax
134 /* Enable System Call */
135 btsl $_EFER_SCE, %eax
136 127
137 /* No Execute supported? */ 128 /* No Execute supported? */
138 btl $20,%edi 129 btl $20,%edi
139 jnc 1f 130 jnc 1f
140 btsl $_EFER_NX, %eax 131 btsl $_EFER_NX, %eax
1411:
142 132
143 /* Make changes effective */ 133 /* Make changes effective */
1341: movl $MSR_EFER, %ecx
135 xorl %edx, %edx
144 wrmsr 136 wrmsr
145 wbinvd
146 137
147 xorl %eax, %eax 138 xorl %eax, %eax
148 btsl $31, %eax /* Enable paging and in turn activate Long Mode */ 139 btsl $31, %eax /* Enable paging and in turn activate Long Mode */
149 btsl $0, %eax /* Enable protected mode */ 140 btsl $0, %eax /* Enable protected mode */
150 btsl $1, %eax /* Enable MP */
151 btsl $4, %eax /* Enable ET */
152 btsl $5, %eax /* Enable NE */
153 btsl $16, %eax /* Enable WP */
154 btsl $18, %eax /* Enable AM */
155 141
156 /* Make changes effective */ 142 /* Make changes effective */
157 movl %eax, %cr0 143 movl %eax, %cr0
144
158 /* At this point: 145 /* At this point:
159 CR4.PAE must be 1 146 CR4.PAE must be 1
160 CS.L must be 0 147 CS.L must be 0
@@ -162,11 +149,6 @@ wakeup_32:
162 Next instruction must be a branch 149 Next instruction must be a branch
163 This must be on identity-mapped page 150 This must be on identity-mapped page
164 */ 151 */
165 jmp reach_compatibility_mode
166reach_compatibility_mode:
167 movw $0x0e00 + 'i', %ds:(0xb8012)
168 movb $0xa8, %al ; outb %al, $0x80;
169
170 /* 152 /*
171 * At this point we're in long mode but in 32bit compatibility mode 153 * At this point we're in long mode but in 32bit compatibility mode
172 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn 154 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
@@ -174,24 +156,19 @@ reach_compatibility_mode:
174 * the new gdt/idt that has __KERNEL_CS with CS.L = 1. 156 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
175 */ 157 */
176 158
177 movw $0x0e00 + 'n', %ds:(0xb8014)
178 movb $0xa9, %al ; outb %al, $0x80
179
180 /* Load new GDT with the 64bit segment using 32bit descriptor */
181 movl $(pGDT32 - __START_KERNEL_map), %eax
182 lgdt (%eax)
183
184 movl $(wakeup_jumpvector - __START_KERNEL_map), %eax
185 /* Finally jump in 64bit mode */ 159 /* Finally jump in 64bit mode */
186 ljmp *(%eax) 160 ljmp *(wakeup_long64_vector - wakeup_code)(%esi)
187 161
188wakeup_jumpvector: 162 .balign 4
189 .long wakeup_long64 - __START_KERNEL_map 163wakeup_long64_vector:
190 .word __KERNEL_CS 164 .long wakeup_long64 - wakeup_code
165 .word __KERNEL_CS, 0
191 166
192.code64 167.code64
193 168
194 /* Hooray, we are in Long 64-bit mode (but still running in low memory) */ 169 /* Hooray, we are in Long 64-bit mode (but still running in
170 * low memory)
171 */
195wakeup_long64: 172wakeup_long64:
196 /* 173 /*
197 * We must switch to a new descriptor in kernel space for the GDT 174 * We must switch to a new descriptor in kernel space for the GDT
@@ -199,7 +176,15 @@ wakeup_long64:
199 * addresses where we're currently running on. We have to do that here 176 * addresses where we're currently running on. We have to do that here
200 * because in 32bit we couldn't load a 64bit linear address. 177 * because in 32bit we couldn't load a 64bit linear address.
201 */ 178 */
202 lgdt cpu_gdt_descr - __START_KERNEL_map 179 lgdt cpu_gdt_descr
180
181 movw $0x0e00 + 'n', %ds:(0xb8014)
182 movb $0xa9, %al ; outb %al, $0x80
183
184 movq saved_magic, %rax
185 movq $0x123456789abcdef0, %rdx
186 cmpq %rdx, %rax
187 jne bogus_64_magic
203 188
204 movw $0x0e00 + 'u', %ds:(0xb8016) 189 movw $0x0e00 + 'u', %ds:(0xb8016)
205 190
@@ -211,75 +196,58 @@ wakeup_long64:
211 movw %ax, %es 196 movw %ax, %es
212 movw %ax, %fs 197 movw %ax, %fs
213 movw %ax, %gs 198 movw %ax, %gs
214 movq saved_esp, %rsp 199 movq saved_rsp, %rsp
215 200
216 movw $0x0e00 + 'x', %ds:(0xb8018) 201 movw $0x0e00 + 'x', %ds:(0xb8018)
217 movq saved_ebx, %rbx 202 movq saved_rbx, %rbx
218 movq saved_edi, %rdi 203 movq saved_rdi, %rdi
219 movq saved_esi, %rsi 204 movq saved_rsi, %rsi
220 movq saved_ebp, %rbp 205 movq saved_rbp, %rbp
221 206
222 movw $0x0e00 + '!', %ds:(0xb801a) 207 movw $0x0e00 + '!', %ds:(0xb801a)
223 movq saved_eip, %rax 208 movq saved_rip, %rax
224 jmp *%rax 209 jmp *%rax
225 210
226.code32 211.code32
227 212
228 .align 64 213 .align 64
229gdta: 214gdta:
215 /* Its good to keep gdt in sync with one in trampoline.S */
230 .word 0, 0, 0, 0 # dummy 216 .word 0, 0, 0, 0 # dummy
231 217 /* ??? Why I need the accessed bit set in order for this to work? */
232 .word 0, 0, 0, 0 # unused 218 .quad 0x00cf9b000000ffff # __KERNEL32_CS
233 219 .quad 0x00af9b000000ffff # __KERNEL_CS
234 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb) 220 .quad 0x00cf93000000ffff # __KERNEL_DS
235 .word 0 # base address = 0
236 .word 0x9B00 # code read/exec. ??? Why I need 0x9B00 (as opposed to 0x9A00 in order for this to work?)
237 .word 0x00CF # granularity = 4096, 386
238 # (+5th nibble of limit)
239
240 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
241 .word 0 # base address = 0
242 .word 0x9200 # data read/write
243 .word 0x00CF # granularity = 4096, 386
244 # (+5th nibble of limit)
245# this is 64bit descriptor for code
246 .word 0xFFFF
247 .word 0
248 .word 0x9A00 # code read/exec
249 .word 0x00AF # as above, but it is long mode and with D=0
250 221
251idt_48a: 222idt_48a:
252 .word 0 # idt limit = 0 223 .word 0 # idt limit = 0
253 .word 0, 0 # idt base = 0L 224 .word 0, 0 # idt base = 0L
254 225
255gdt_48a: 226gdt_48a:
256 .word 0x8000 # gdt limit=2048, 227 .word 0x800 # gdt limit=2048,
257 # 256 GDT entries 228 # 256 GDT entries
258 .word 0, 0 # gdt base (filled in later) 229 .long gdta - wakeup_code # gdt base (relocated in later)
259
260 230
261real_save_gdt: .word 0
262 .quad 0
263real_magic: .quad 0 231real_magic: .quad 0
264video_mode: .quad 0 232video_mode: .quad 0
265video_flags: .quad 0 233video_flags: .quad 0
266 234
235.code16
267bogus_real_magic: 236bogus_real_magic:
268 movb $0xba,%al ; outb %al,$0x80 237 movb $0xba,%al ; outb %al,$0x80
269 jmp bogus_real_magic 238 jmp bogus_real_magic
270 239
271bogus_32_magic: 240.code64
241bogus_64_magic:
272 movb $0xb3,%al ; outb %al,$0x80 242 movb $0xb3,%al ; outb %al,$0x80
273 jmp bogus_32_magic 243 jmp bogus_64_magic
274 244
275bogus_31_magic: 245.code16
276 movb $0xb1,%al ; outb %al,$0x80 246no_longmode:
277 jmp bogus_31_magic 247 movb $0xbc,%al ; outb %al,$0x80
278 248 jmp no_longmode
279bogus_cpu:
280 movb $0xbc,%al ; outb %al,$0x80
281 jmp bogus_cpu
282 249
250#include "../verify_cpu.S"
283 251
284/* This code uses an extended set of video mode numbers. These include: 252/* This code uses an extended set of video mode numbers. These include:
285 * Aliases for standard modes 253 * Aliases for standard modes
@@ -301,6 +269,7 @@ bogus_cpu:
301#define VIDEO_FIRST_V7 0x0900 269#define VIDEO_FIRST_V7 0x0900
302 270
303# Setting of user mode (AX=mode ID) => CF=success 271# Setting of user mode (AX=mode ID) => CF=success
272.code16
304mode_seta: 273mode_seta:
305 movw %ax, %bx 274 movw %ax, %bx
306#if 0 275#if 0
@@ -346,21 +315,18 @@ check_vesaa:
346 315
347_setbada: jmp setbada 316_setbada: jmp setbada
348 317
349 .code64
350bogus_magic:
351 movw $0x0e00 + 'B', %ds:(0xb8018)
352 jmp bogus_magic
353
354bogus_magic2:
355 movw $0x0e00 + '2', %ds:(0xb8018)
356 jmp bogus_magic2
357
358
359wakeup_stack_begin: # Stack grows down 318wakeup_stack_begin: # Stack grows down
360 319
361.org 0xff0 320.org 0xff0
362wakeup_stack: # Just below end of page 321wakeup_stack: # Just below end of page
363 322
323.org 0x1000
324ENTRY(wakeup_level4_pgt)
325 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
326 .fill 510,8,0
327 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
328 .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
329
364ENTRY(wakeup_end) 330ENTRY(wakeup_end)
365 331
366## 332##
@@ -373,28 +339,11 @@ ENTRY(wakeup_end)
373# 339#
374# Returned address is location of code in low memory (past data and stack) 340# Returned address is location of code in low memory (past data and stack)
375# 341#
342 .code64
376ENTRY(acpi_copy_wakeup_routine) 343ENTRY(acpi_copy_wakeup_routine)
377 pushq %rax 344 pushq %rax
378 pushq %rcx
379 pushq %rdx 345 pushq %rdx
380 346
381 sgdt saved_gdt
382 sidt saved_idt
383 sldt saved_ldt
384 str saved_tss
385
386 movq %cr3, %rdx
387 movq %rdx, saved_cr3
388 movq %cr4, %rdx
389 movq %rdx, saved_cr4
390 movq %cr0, %rdx
391 movq %rdx, saved_cr0
392 sgdt real_save_gdt - wakeup_start (,%rdi)
393 movl $MSR_EFER, %ecx
394 rdmsr
395 movl %eax, saved_efer
396 movl %edx, saved_efer2
397
398 movl saved_video_mode, %edx 347 movl saved_video_mode, %edx
399 movl %edx, video_mode - wakeup_start (,%rdi) 348 movl %edx, video_mode - wakeup_start (,%rdi)
400 movl acpi_video_flags, %edx 349 movl acpi_video_flags, %edx
@@ -403,21 +352,13 @@ ENTRY(acpi_copy_wakeup_routine)
403 movq $0x123456789abcdef0, %rdx 352 movq $0x123456789abcdef0, %rdx
404 movq %rdx, saved_magic 353 movq %rdx, saved_magic
405 354
406 movl saved_magic - __START_KERNEL_map, %eax 355 movq saved_magic, %rax
407 cmpl $0x9abcdef0, %eax 356 movq $0x123456789abcdef0, %rdx
408 jne bogus_32_magic 357 cmpq %rdx, %rax
409 358 jne bogus_64_magic
410 # make sure %cr4 is set correctly (features, etc)
411 movl saved_cr4 - __START_KERNEL_map, %eax
412 movq %rax, %cr4
413 359
414 movl saved_cr0 - __START_KERNEL_map, %eax
415 movq %rax, %cr0
416 jmp 1f # Flush pipelines
4171:
418 # restore the regs we used 360 # restore the regs we used
419 popq %rdx 361 popq %rdx
420 popq %rcx
421 popq %rax 362 popq %rax
422ENTRY(do_suspend_lowlevel_s4bios) 363ENTRY(do_suspend_lowlevel_s4bios)
423 ret 364 ret
@@ -450,13 +391,13 @@ do_suspend_lowlevel:
450 movq %r15, saved_context_r15(%rip) 391 movq %r15, saved_context_r15(%rip)
451 pushfq ; popq saved_context_eflags(%rip) 392 pushfq ; popq saved_context_eflags(%rip)
452 393
453 movq $.L97, saved_eip(%rip) 394 movq $.L97, saved_rip(%rip)
454 395
455 movq %rsp,saved_esp 396 movq %rsp,saved_rsp
456 movq %rbp,saved_ebp 397 movq %rbp,saved_rbp
457 movq %rbx,saved_ebx 398 movq %rbx,saved_rbx
458 movq %rdi,saved_edi 399 movq %rdi,saved_rdi
459 movq %rsi,saved_esi 400 movq %rsi,saved_rsi
460 401
461 addq $8, %rsp 402 addq $8, %rsp
462 movl $3, %edi 403 movl $3, %edi
@@ -503,25 +444,12 @@ do_suspend_lowlevel:
503 444
504.data 445.data
505ALIGN 446ALIGN
506ENTRY(saved_ebp) .quad 0 447ENTRY(saved_rbp) .quad 0
507ENTRY(saved_esi) .quad 0 448ENTRY(saved_rsi) .quad 0
508ENTRY(saved_edi) .quad 0 449ENTRY(saved_rdi) .quad 0
509ENTRY(saved_ebx) .quad 0 450ENTRY(saved_rbx) .quad 0
510 451
511ENTRY(saved_eip) .quad 0 452ENTRY(saved_rip) .quad 0
512ENTRY(saved_esp) .quad 0 453ENTRY(saved_rsp) .quad 0
513 454
514ENTRY(saved_magic) .quad 0 455ENTRY(saved_magic) .quad 0
515
516ALIGN
517# saved registers
518saved_gdt: .quad 0,0
519saved_idt: .quad 0,0
520saved_ldt: .quad 0
521saved_tss: .quad 0
522
523saved_cr0: .quad 0
524saved_cr3: .quad 0
525saved_cr4: .quad 0
526saved_efer: .quad 0
527saved_efer2: .quad 0
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index b487396c4c5b..a52af5820592 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -51,7 +51,6 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
51 51
52static u32 __init allocate_aperture(void) 52static u32 __init allocate_aperture(void)
53{ 53{
54 pg_data_t *nd0 = NODE_DATA(0);
55 u32 aper_size; 54 u32 aper_size;
56 void *p; 55 void *p;
57 56
@@ -65,12 +64,12 @@ static u32 __init allocate_aperture(void)
65 * Unfortunately we cannot move it up because that would make the 64 * Unfortunately we cannot move it up because that would make the
66 * IOMMU useless. 65 * IOMMU useless.
67 */ 66 */
68 p = __alloc_bootmem_node(nd0, aper_size, aper_size, 0); 67 p = __alloc_bootmem_nopanic(aper_size, aper_size, 0);
69 if (!p || __pa(p)+aper_size > 0xffffffff) { 68 if (!p || __pa(p)+aper_size > 0xffffffff) {
70 printk("Cannot allocate aperture memory hole (%p,%uK)\n", 69 printk("Cannot allocate aperture memory hole (%p,%uK)\n",
71 p, aper_size>>10); 70 p, aper_size>>10);
72 if (p) 71 if (p)
73 free_bootmem_node(nd0, __pa(p), aper_size); 72 free_bootmem(__pa(p), aper_size);
74 return 0; 73 return 0;
75 } 74 }
76 printk("Mapping aperture over %d KB of RAM @ %lx\n", 75 printk("Mapping aperture over %d KB of RAM @ %lx\n",
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index bd3e45d47c37..d198f7d82e5a 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -68,6 +68,28 @@ int using_apic_timer __read_mostly = 0;
68 68
69static void apic_pm_activate(void); 69static void apic_pm_activate(void);
70 70
71void apic_wait_icr_idle(void)
72{
73 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
74 cpu_relax();
75}
76
77unsigned int safe_apic_wait_icr_idle(void)
78{
79 unsigned int send_status;
80 int timeout;
81
82 timeout = 0;
83 do {
84 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
85 if (!send_status)
86 break;
87 udelay(100);
88 } while (timeout++ < 1000);
89
90 return send_status;
91}
92
71void enable_NMI_through_LVT0 (void * dummy) 93void enable_NMI_through_LVT0 (void * dummy)
72{ 94{
73 unsigned int v; 95 unsigned int v;
@@ -817,14 +839,15 @@ static void setup_APIC_timer(unsigned int clocks)
817 839
818static int __init calibrate_APIC_clock(void) 840static int __init calibrate_APIC_clock(void)
819{ 841{
820 int apic, apic_start, tsc, tsc_start; 842 unsigned apic, apic_start;
843 unsigned long tsc, tsc_start;
821 int result; 844 int result;
822 /* 845 /*
823 * Put whatever arbitrary (but long enough) timeout 846 * Put whatever arbitrary (but long enough) timeout
824 * value into the APIC clock, we just want to get the 847 * value into the APIC clock, we just want to get the
825 * counter running for calibration. 848 * counter running for calibration.
826 */ 849 */
827 __setup_APIC_LVTT(1000000000); 850 __setup_APIC_LVTT(4000000000);
828 851
829 apic_start = apic_read(APIC_TMCCT); 852 apic_start = apic_read(APIC_TMCCT);
830#ifdef CONFIG_X86_PM_TIMER 853#ifdef CONFIG_X86_PM_TIMER
@@ -835,15 +858,15 @@ static int __init calibrate_APIC_clock(void)
835 } else 858 } else
836#endif 859#endif
837 { 860 {
838 rdtscl(tsc_start); 861 rdtscll(tsc_start);
839 862
840 do { 863 do {
841 apic = apic_read(APIC_TMCCT); 864 apic = apic_read(APIC_TMCCT);
842 rdtscl(tsc); 865 rdtscll(tsc);
843 } while ((tsc - tsc_start) < TICK_COUNT && 866 } while ((tsc - tsc_start) < TICK_COUNT &&
844 (apic - apic_start) < TICK_COUNT); 867 (apic_start - apic) < TICK_COUNT);
845 868
846 result = (apic_start - apic) * 1000L * cpu_khz / 869 result = (apic_start - apic) * 1000L * tsc_khz /
847 (tsc - tsc_start); 870 (tsc - tsc_start);
848 } 871 }
849 printk("result %d\n", result); 872 printk("result %d\n", result);
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
index 96687e2beb2c..778953bc636c 100644
--- a/arch/x86_64/kernel/asm-offsets.c
+++ b/arch/x86_64/kernel/asm-offsets.c
@@ -21,6 +21,14 @@
21 21
22#define BLANK() asm volatile("\n->" : : ) 22#define BLANK() asm volatile("\n->" : : )
23 23
24#define __NO_STUBS 1
25#undef __SYSCALL
26#undef _ASM_X86_64_UNISTD_H_
27#define __SYSCALL(nr, sym) [nr] = 1,
28static char syscalls[] = {
29#include <asm/unistd.h>
30};
31
24int main(void) 32int main(void)
25{ 33{
26#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry)) 34#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
@@ -71,5 +79,7 @@ int main(void)
71 DEFINE(TSS_ist, offsetof(struct tss_struct, ist)); 79 DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
72 BLANK(); 80 BLANK();
73 DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); 81 DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
82 BLANK();
83 DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
74 return 0; 84 return 0;
75} 85}
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
new file mode 100644
index 000000000000..12b585b5345d
--- /dev/null
+++ b/arch/x86_64/kernel/bugs.c
@@ -0,0 +1,21 @@
1/*
2 * arch/x86_64/kernel/bugs.c
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 * Copyright (C) 2000 SuSE
6 */
7
8#include <linux/kernel.h>
9#include <linux/init.h>
10#include <asm/alternative.h>
11#include <asm/processor.h>
12
13void __init check_bugs(void)
14{
15 identify_cpu(&boot_cpu_data);
16#if !defined(CONFIG_SMP)
17 printk("CPU: ");
18 print_cpu_info(&boot_cpu_data);
19#endif
20 alternative_instructions();
21}
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index a490fabfcf47..be8965427a93 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -25,7 +25,7 @@
25#include <asm/bootsetup.h> 25#include <asm/bootsetup.h>
26#include <asm/sections.h> 26#include <asm/sections.h>
27 27
28struct e820map e820 __initdata; 28struct e820map e820;
29 29
30/* 30/*
31 * PFN of last memory page. 31 * PFN of last memory page.
@@ -98,7 +98,7 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
98 * This function checks if any part of the range <start,end> is mapped 98 * This function checks if any part of the range <start,end> is mapped
99 * with type. 99 * with type.
100 */ 100 */
101int __meminit 101int
102e820_any_mapped(unsigned long start, unsigned long end, unsigned type) 102e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
103{ 103{
104 int i; 104 int i;
@@ -112,6 +112,7 @@ e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
112 } 112 }
113 return 0; 113 return 0;
114} 114}
115EXPORT_SYMBOL_GPL(e820_any_mapped);
115 116
116/* 117/*
117 * This function checks if the entire range <start,end> is mapped with type. 118 * This function checks if the entire range <start,end> is mapped with type.
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index fede55a53995..990d9c218a5d 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -71,18 +71,6 @@ static void __init ati_bugs(void)
71 } 71 }
72} 72}
73 73
74static void intel_bugs(void)
75{
76 u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
77
78#ifdef CONFIG_SMP
79 if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
80 device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
81 device == PCI_DEVICE_ID_INTEL_E7525_MCH)
82 quirk_intel_irqbalance();
83#endif
84}
85
86struct chipset { 74struct chipset {
87 u16 vendor; 75 u16 vendor;
88 void (*f)(void); 76 void (*f)(void);
@@ -92,7 +80,6 @@ static struct chipset early_qrk[] __initdata = {
92 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, 80 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
93 { PCI_VENDOR_ID_VIA, via_bugs }, 81 { PCI_VENDOR_ID_VIA, via_bugs },
94 { PCI_VENDOR_ID_ATI, ati_bugs }, 82 { PCI_VENDOR_ID_ATI, ati_bugs },
95 { PCI_VENDOR_ID_INTEL, intel_bugs},
96 {} 83 {}
97}; 84};
98 85
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index 47b6d90349da..92213d2b7c11 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -11,11 +11,10 @@
11 11
12#ifdef __i386__ 12#ifdef __i386__
13#include <asm/setup.h> 13#include <asm/setup.h>
14#define VGABASE (__ISA_IO_base + 0xb8000)
15#else 14#else
16#include <asm/bootsetup.h> 15#include <asm/bootsetup.h>
17#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
18#endif 16#endif
17#define VGABASE (__ISA_IO_base + 0xb8000)
19 18
20static int max_ypos = 25, max_xpos = 80; 19static int max_ypos = 25, max_xpos = 80;
21static int current_ypos = 25, current_xpos = 0; 20static int current_ypos = 25, current_xpos = 0;
@@ -176,7 +175,7 @@ static noinline long simnow(long cmd, long a, long b, long c)
176 return ret; 175 return ret;
177} 176}
178 177
179void __init simnow_init(char *str) 178static void __init simnow_init(char *str)
180{ 179{
181 char *fn = "klog"; 180 char *fn = "klog";
182 if (*str == '=') 181 if (*str == '=')
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index ed4350ced3d0..fa984b53e7e6 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -701,6 +701,7 @@ END(spurious_interrupt)
701 CFI_ADJUST_CFA_OFFSET 8 701 CFI_ADJUST_CFA_OFFSET 8
702 pushq %rax /* push real oldrax to the rdi slot */ 702 pushq %rax /* push real oldrax to the rdi slot */
703 CFI_ADJUST_CFA_OFFSET 8 703 CFI_ADJUST_CFA_OFFSET 8
704 CFI_REL_OFFSET rax,0
704 leaq \sym(%rip),%rax 705 leaq \sym(%rip),%rax
705 jmp error_entry 706 jmp error_entry
706 CFI_ENDPROC 707 CFI_ENDPROC
@@ -710,6 +711,7 @@ END(spurious_interrupt)
710 XCPT_FRAME 711 XCPT_FRAME
711 pushq %rax 712 pushq %rax
712 CFI_ADJUST_CFA_OFFSET 8 713 CFI_ADJUST_CFA_OFFSET 8
714 CFI_REL_OFFSET rax,0
713 leaq \sym(%rip),%rax 715 leaq \sym(%rip),%rax
714 jmp error_entry 716 jmp error_entry
715 CFI_ENDPROC 717 CFI_ENDPROC
@@ -817,6 +819,7 @@ paranoid_schedule\trace:
817 */ 819 */
818KPROBE_ENTRY(error_entry) 820KPROBE_ENTRY(error_entry)
819 _frame RDI 821 _frame RDI
822 CFI_REL_OFFSET rax,0
820 /* rdi slot contains rax, oldrax contains error code */ 823 /* rdi slot contains rax, oldrax contains error code */
821 cld 824 cld
822 subq $14*8,%rsp 825 subq $14*8,%rsp
@@ -824,6 +827,7 @@ KPROBE_ENTRY(error_entry)
824 movq %rsi,13*8(%rsp) 827 movq %rsi,13*8(%rsp)
825 CFI_REL_OFFSET rsi,RSI 828 CFI_REL_OFFSET rsi,RSI
826 movq 14*8(%rsp),%rsi /* load rax from rdi slot */ 829 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
830 CFI_REGISTER rax,rsi
827 movq %rdx,12*8(%rsp) 831 movq %rdx,12*8(%rsp)
828 CFI_REL_OFFSET rdx,RDX 832 CFI_REL_OFFSET rdx,RDX
829 movq %rcx,11*8(%rsp) 833 movq %rcx,11*8(%rsp)
@@ -857,6 +861,7 @@ error_swapgs:
857 swapgs 861 swapgs
858error_sti: 862error_sti:
859 movq %rdi,RDI(%rsp) 863 movq %rdi,RDI(%rsp)
864 CFI_REL_OFFSET rdi,RDI
860 movq %rsp,%rdi 865 movq %rsp,%rdi
861 movq ORIG_RAX(%rsp),%rsi /* get error code */ 866 movq ORIG_RAX(%rsp),%rsi /* get error code */
862 movq $-1,ORIG_RAX(%rsp) 867 movq $-1,ORIG_RAX(%rsp)
diff --git a/arch/x86_64/kernel/functionlist b/arch/x86_64/kernel/functionlist
deleted file mode 100644
index 7ae18ec12454..000000000000
--- a/arch/x86_64/kernel/functionlist
+++ /dev/null
@@ -1,1284 +0,0 @@
1*(.text.flush_thread)
2*(.text.check_poison_obj)
3*(.text.copy_page)
4*(.text.__set_personality)
5*(.text.gart_map_sg)
6*(.text.kmem_cache_free)
7*(.text.find_get_page)
8*(.text._raw_spin_lock)
9*(.text.ide_outb)
10*(.text.unmap_vmas)
11*(.text.copy_page_range)
12*(.text.kprobe_handler)
13*(.text.__handle_mm_fault)
14*(.text.__d_lookup)
15*(.text.copy_user_generic)
16*(.text.__link_path_walk)
17*(.text.get_page_from_freelist)
18*(.text.kmem_cache_alloc)
19*(.text.drive_cmd_intr)
20*(.text.ia32_setup_sigcontext)
21*(.text.huge_pte_offset)
22*(.text.do_page_fault)
23*(.text.page_remove_rmap)
24*(.text.release_pages)
25*(.text.ide_end_request)
26*(.text.__mutex_lock_slowpath)
27*(.text.__find_get_block)
28*(.text.kfree)
29*(.text.vfs_read)
30*(.text._raw_spin_unlock)
31*(.text.free_hot_cold_page)
32*(.text.fget_light)
33*(.text.schedule)
34*(.text.memcmp)
35*(.text.touch_atime)
36*(.text.__might_sleep)
37*(.text.__down_read_trylock)
38*(.text.arch_pick_mmap_layout)
39*(.text.find_vma)
40*(.text.__make_request)
41*(.text.do_generic_mapping_read)
42*(.text.mutex_lock_interruptible)
43*(.text.__generic_file_aio_read)
44*(.text._atomic_dec_and_lock)
45*(.text.__wake_up_bit)
46*(.text.add_to_page_cache)
47*(.text.cache_alloc_debugcheck_after)
48*(.text.vm_normal_page)
49*(.text.mutex_debug_check_no_locks_freed)
50*(.text.net_rx_action)
51*(.text.__find_first_zero_bit)
52*(.text.put_page)
53*(.text._raw_read_lock)
54*(.text.__delay)
55*(.text.dnotify_parent)
56*(.text.do_path_lookup)
57*(.text.do_sync_read)
58*(.text.do_lookup)
59*(.text.bit_waitqueue)
60*(.text.file_read_actor)
61*(.text.strncpy_from_user)
62*(.text.__pagevec_lru_add_active)
63*(.text.fget)
64*(.text.dput)
65*(.text.__strnlen_user)
66*(.text.inotify_inode_queue_event)
67*(.text.rw_verify_area)
68*(.text.ide_intr)
69*(.text.inotify_dentry_parent_queue_event)
70*(.text.permission)
71*(.text.memscan)
72*(.text.hpet_rtc_interrupt)
73*(.text.do_mmap_pgoff)
74*(.text.current_fs_time)
75*(.text.vfs_getattr)
76*(.text.kmem_flagcheck)
77*(.text.mark_page_accessed)
78*(.text.free_pages_and_swap_cache)
79*(.text.generic_fillattr)
80*(.text.__block_prepare_write)
81*(.text.__set_page_dirty_nobuffers)
82*(.text.link_path_walk)
83*(.text.find_get_pages_tag)
84*(.text.ide_do_request)
85*(.text.__alloc_pages)
86*(.text.generic_permission)
87*(.text.mod_page_state_offset)
88*(.text.free_pgd_range)
89*(.text.generic_file_buffered_write)
90*(.text.number)
91*(.text.ide_do_rw_disk)
92*(.text.__brelse)
93*(.text.__mod_page_state_offset)
94*(.text.rotate_reclaimable_page)
95*(.text.find_vma_prepare)
96*(.text.find_vma_prev)
97*(.text.lru_cache_add_active)
98*(.text.__kmalloc_track_caller)
99*(.text.smp_invalidate_interrupt)
100*(.text.handle_IRQ_event)
101*(.text.__find_get_block_slow)
102*(.text.do_wp_page)
103*(.text.do_select)
104*(.text.set_user_nice)
105*(.text.sys_read)
106*(.text.do_munmap)
107*(.text.csum_partial)
108*(.text.__do_softirq)
109*(.text.may_open)
110*(.text.getname)
111*(.text.get_empty_filp)
112*(.text.__fput)
113*(.text.remove_mapping)
114*(.text.filp_ctor)
115*(.text.poison_obj)
116*(.text.unmap_region)
117*(.text.test_set_page_writeback)
118*(.text.__do_page_cache_readahead)
119*(.text.sock_def_readable)
120*(.text.ide_outl)
121*(.text.shrink_zone)
122*(.text.rb_insert_color)
123*(.text.get_request)
124*(.text.sys_pread64)
125*(.text.spin_bug)
126*(.text.ide_outsl)
127*(.text.mask_and_ack_8259A)
128*(.text.filemap_nopage)
129*(.text.page_add_file_rmap)
130*(.text.find_lock_page)
131*(.text.tcp_poll)
132*(.text.__mark_inode_dirty)
133*(.text.file_ra_state_init)
134*(.text.generic_file_llseek)
135*(.text.__pagevec_lru_add)
136*(.text.page_cache_readahead)
137*(.text.n_tty_receive_buf)
138*(.text.zonelist_policy)
139*(.text.vma_adjust)
140*(.text.test_clear_page_dirty)
141*(.text.sync_buffer)
142*(.text.do_exit)
143*(.text.__bitmap_weight)
144*(.text.alloc_pages_current)
145*(.text.get_unused_fd)
146*(.text.zone_watermark_ok)
147*(.text.cpuset_update_task_memory_state)
148*(.text.__bitmap_empty)
149*(.text.sys_munmap)
150*(.text.__inode_dir_notify)
151*(.text.__generic_file_aio_write_nolock)
152*(.text.__pte_alloc)
153*(.text.sys_select)
154*(.text.vm_acct_memory)
155*(.text.vfs_write)
156*(.text.__lru_add_drain)
157*(.text.prio_tree_insert)
158*(.text.generic_file_aio_read)
159*(.text.vma_merge)
160*(.text.block_write_full_page)
161*(.text.__page_set_anon_rmap)
162*(.text.apic_timer_interrupt)
163*(.text.release_console_sem)
164*(.text.sys_write)
165*(.text.sys_brk)
166*(.text.dup_mm)
167*(.text.read_current_timer)
168*(.text.ll_rw_block)
169*(.text.blk_rq_map_sg)
170*(.text.dbg_userword)
171*(.text.__block_commit_write)
172*(.text.cache_grow)
173*(.text.copy_strings)
174*(.text.release_task)
175*(.text.do_sync_write)
176*(.text.unlock_page)
177*(.text.load_elf_binary)
178*(.text.__follow_mount)
179*(.text.__getblk)
180*(.text.do_sys_open)
181*(.text.current_kernel_time)
182*(.text.call_rcu)
183*(.text.write_chan)
184*(.text.vsnprintf)
185*(.text.dummy_inode_setsecurity)
186*(.text.submit_bh)
187*(.text.poll_freewait)
188*(.text.bio_alloc_bioset)
189*(.text.skb_clone)
190*(.text.page_waitqueue)
191*(.text.__mutex_lock_interruptible_slowpath)
192*(.text.get_index)
193*(.text.csum_partial_copy_generic)
194*(.text.bad_range)
195*(.text.remove_vma)
196*(.text.cp_new_stat)
197*(.text.alloc_arraycache)
198*(.text.test_clear_page_writeback)
199*(.text.strsep)
200*(.text.open_namei)
201*(.text._raw_read_unlock)
202*(.text.get_vma_policy)
203*(.text.__down_write_trylock)
204*(.text.find_get_pages)
205*(.text.tcp_rcv_established)
206*(.text.generic_make_request)
207*(.text.__block_write_full_page)
208*(.text.cfq_set_request)
209*(.text.sys_inotify_init)
210*(.text.split_vma)
211*(.text.__mod_timer)
212*(.text.get_options)
213*(.text.vma_link)
214*(.text.mpage_writepages)
215*(.text.truncate_complete_page)
216*(.text.tcp_recvmsg)
217*(.text.sigprocmask)
218*(.text.filemap_populate)
219*(.text.sys_close)
220*(.text.inotify_dev_queue_event)
221*(.text.do_task_stat)
222*(.text.__dentry_open)
223*(.text.unlink_file_vma)
224*(.text.__pollwait)
225*(.text.packet_rcv_spkt)
226*(.text.drop_buffers)
227*(.text.free_pgtables)
228*(.text.generic_file_direct_write)
229*(.text.copy_process)
230*(.text.netif_receive_skb)
231*(.text.dnotify_flush)
232*(.text.print_bad_pte)
233*(.text.anon_vma_unlink)
234*(.text.sys_mprotect)
235*(.text.sync_sb_inodes)
236*(.text.find_inode_fast)
237*(.text.dummy_inode_readlink)
238*(.text.putname)
239*(.text.init_smp_flush)
240*(.text.dbg_redzone2)
241*(.text.sk_run_filter)
242*(.text.may_expand_vm)
243*(.text.generic_file_aio_write)
244*(.text.find_next_zero_bit)
245*(.text.file_kill)
246*(.text.audit_getname)
247*(.text.arch_unmap_area_topdown)
248*(.text.alloc_page_vma)
249*(.text.tcp_transmit_skb)
250*(.text.rb_next)
251*(.text.dbg_redzone1)
252*(.text.generic_file_mmap)
253*(.text.vfs_fstat)
254*(.text.sys_time)
255*(.text.page_lock_anon_vma)
256*(.text.get_unmapped_area)
257*(.text.remote_llseek)
258*(.text.__up_read)
259*(.text.fd_install)
260*(.text.eventpoll_init_file)
261*(.text.dma_alloc_coherent)
262*(.text.create_empty_buffers)
263*(.text.__mutex_unlock_slowpath)
264*(.text.dup_fd)
265*(.text.d_alloc)
266*(.text.tty_ldisc_try)
267*(.text.sys_stime)
268*(.text.__rb_rotate_right)
269*(.text.d_validate)
270*(.text.rb_erase)
271*(.text.path_release)
272*(.text.memmove)
273*(.text.invalidate_complete_page)
274*(.text.clear_inode)
275*(.text.cache_estimate)
276*(.text.alloc_buffer_head)
277*(.text.smp_call_function_interrupt)
278*(.text.flush_tlb_others)
279*(.text.file_move)
280*(.text.balance_dirty_pages_ratelimited)
281*(.text.vma_prio_tree_add)
282*(.text.timespec_trunc)
283*(.text.mempool_alloc)
284*(.text.iget_locked)
285*(.text.d_alloc_root)
286*(.text.cpuset_populate_dir)
287*(.text.anon_vma_prepare)
288*(.text.sys_newstat)
289*(.text.alloc_page_interleave)
290*(.text.__path_lookup_intent_open)
291*(.text.__pagevec_free)
292*(.text.inode_init_once)
293*(.text.free_vfsmnt)
294*(.text.__user_walk_fd)
295*(.text.cfq_idle_slice_timer)
296*(.text.sys_mmap)
297*(.text.sys_llseek)
298*(.text.prio_tree_remove)
299*(.text.filp_close)
300*(.text.file_permission)
301*(.text.vma_prio_tree_remove)
302*(.text.tcp_ack)
303*(.text.nameidata_to_filp)
304*(.text.sys_lseek)
305*(.text.percpu_counter_mod)
306*(.text.igrab)
307*(.text.__bread)
308*(.text.alloc_inode)
309*(.text.filldir)
310*(.text.__rb_rotate_left)
311*(.text.irq_affinity_write_proc)
312*(.text.init_request_from_bio)
313*(.text.find_or_create_page)
314*(.text.tty_poll)
315*(.text.tcp_sendmsg)
316*(.text.ide_wait_stat)
317*(.text.free_buffer_head)
318*(.text.flush_signal_handlers)
319*(.text.tcp_v4_rcv)
320*(.text.nr_blockdev_pages)
321*(.text.locks_remove_flock)
322*(.text.__iowrite32_copy)
323*(.text.do_filp_open)
324*(.text.try_to_release_page)
325*(.text.page_add_new_anon_rmap)
326*(.text.kmem_cache_size)
327*(.text.eth_type_trans)
328*(.text.try_to_free_buffers)
329*(.text.schedule_tail)
330*(.text.proc_lookup)
331*(.text.no_llseek)
332*(.text.kfree_skbmem)
333*(.text.do_wait)
334*(.text.do_mpage_readpage)
335*(.text.vfs_stat_fd)
336*(.text.tty_write)
337*(.text.705)
338*(.text.sync_page)
339*(.text.__remove_shared_vm_struct)
340*(.text.__kfree_skb)
341*(.text.sock_poll)
342*(.text.get_request_wait)
343*(.text.do_sigaction)
344*(.text.do_brk)
345*(.text.tcp_event_data_recv)
346*(.text.read_chan)
347*(.text.pipe_writev)
348*(.text.__emul_lookup_dentry)
349*(.text.rtc_get_rtc_time)
350*(.text.print_objinfo)
351*(.text.file_update_time)
352*(.text.do_signal)
353*(.text.disable_8259A_irq)
354*(.text.blk_queue_bounce)
355*(.text.__anon_vma_link)
356*(.text.__vma_link)
357*(.text.vfs_rename)
358*(.text.sys_newlstat)
359*(.text.sys_newfstat)
360*(.text.sys_mknod)
361*(.text.__show_regs)
362*(.text.iput)
363*(.text.get_signal_to_deliver)
364*(.text.flush_tlb_page)
365*(.text.debug_mutex_wake_waiter)
366*(.text.copy_thread)
367*(.text.clear_page_dirty_for_io)
368*(.text.buffer_io_error)
369*(.text.vfs_permission)
370*(.text.truncate_inode_pages_range)
371*(.text.sys_recvfrom)
372*(.text.remove_suid)
373*(.text.mark_buffer_dirty)
374*(.text.local_bh_enable)
375*(.text.get_zeroed_page)
376*(.text.get_vmalloc_info)
377*(.text.flush_old_exec)
378*(.text.dummy_inode_permission)
379*(.text.__bio_add_page)
380*(.text.prio_tree_replace)
381*(.text.notify_change)
382*(.text.mntput_no_expire)
383*(.text.fput)
384*(.text.__end_that_request_first)
385*(.text.wake_up_bit)
386*(.text.unuse_mm)
387*(.text.shrink_icache_memory)
388*(.text.sched_balance_self)
389*(.text.__pmd_alloc)
390*(.text.pipe_poll)
391*(.text.normal_poll)
392*(.text.__free_pages)
393*(.text.follow_mount)
394*(.text.cdrom_start_packet_command)
395*(.text.blk_recount_segments)
396*(.text.bio_put)
397*(.text.__alloc_skb)
398*(.text.__wake_up)
399*(.text.vm_stat_account)
400*(.text.sys_fcntl)
401*(.text.sys_fadvise64)
402*(.text._raw_write_unlock)
403*(.text.__pud_alloc)
404*(.text.alloc_page_buffers)
405*(.text.vfs_llseek)
406*(.text.sockfd_lookup)
407*(.text._raw_write_lock)
408*(.text.put_compound_page)
409*(.text.prune_dcache)
410*(.text.pipe_readv)
411*(.text.mempool_free)
412*(.text.make_ahead_window)
413*(.text.lru_add_drain)
414*(.text.constant_test_bit)
415*(.text.__clear_user)
416*(.text.arch_unmap_area)
417*(.text.anon_vma_link)
418*(.text.sys_chroot)
419*(.text.setup_arg_pages)
420*(.text.radix_tree_preload)
421*(.text.init_rwsem)
422*(.text.generic_osync_inode)
423*(.text.generic_delete_inode)
424*(.text.do_sys_poll)
425*(.text.dev_queue_xmit)
426*(.text.default_llseek)
427*(.text.__writeback_single_inode)
428*(.text.vfs_ioctl)
429*(.text.__up_write)
430*(.text.unix_poll)
431*(.text.sys_rt_sigprocmask)
432*(.text.sock_recvmsg)
433*(.text.recalc_bh_state)
434*(.text.__put_unused_fd)
435*(.text.process_backlog)
436*(.text.locks_remove_posix)
437*(.text.lease_modify)
438*(.text.expand_files)
439*(.text.end_buffer_read_nobh)
440*(.text.d_splice_alias)
441*(.text.debug_mutex_init_waiter)
442*(.text.copy_from_user)
443*(.text.cap_vm_enough_memory)
444*(.text.show_vfsmnt)
445*(.text.release_sock)
446*(.text.pfifo_fast_enqueue)
447*(.text.half_md4_transform)
448*(.text.fs_may_remount_ro)
449*(.text.do_fork)
450*(.text.copy_hugetlb_page_range)
451*(.text.cache_free_debugcheck)
452*(.text.__tcp_select_window)
453*(.text.task_handoff_register)
454*(.text.sys_open)
455*(.text.strlcpy)
456*(.text.skb_copy_datagram_iovec)
457*(.text.set_up_list3s)
458*(.text.release_open_intent)
459*(.text.qdisc_restart)
460*(.text.n_tty_chars_in_buffer)
461*(.text.inode_change_ok)
462*(.text.__downgrade_write)
463*(.text.debug_mutex_unlock)
464*(.text.add_timer_randomness)
465*(.text.sock_common_recvmsg)
466*(.text.set_bh_page)
467*(.text.printk_lock)
468*(.text.path_release_on_umount)
469*(.text.ip_output)
470*(.text.ide_build_dmatable)
471*(.text.__get_user_8)
472*(.text.end_buffer_read_sync)
473*(.text.__d_path)
474*(.text.d_move)
475*(.text.del_timer)
476*(.text.constant_test_bit)
477*(.text.blockable_page_cache_readahead)
478*(.text.tty_read)
479*(.text.sys_readlink)
480*(.text.sys_faccessat)
481*(.text.read_swap_cache_async)
482*(.text.pty_write_room)
483*(.text.page_address_in_vma)
484*(.text.kthread)
485*(.text.cfq_exit_io_context)
486*(.text.__tcp_push_pending_frames)
487*(.text.sys_pipe)
488*(.text.submit_bio)
489*(.text.pid_revalidate)
490*(.text.page_referenced_file)
491*(.text.lock_sock)
492*(.text.get_page_state_node)
493*(.text.generic_block_bmap)
494*(.text.do_setitimer)
495*(.text.dev_queue_xmit_nit)
496*(.text.copy_from_read_buf)
497*(.text.__const_udelay)
498*(.text.console_conditional_schedule)
499*(.text.wake_up_new_task)
500*(.text.wait_for_completion_interruptible)
501*(.text.tcp_rcv_rtt_update)
502*(.text.sys_mlockall)
503*(.text.set_fs_altroot)
504*(.text.schedule_timeout)
505*(.text.nr_free_pagecache_pages)
506*(.text.nf_iterate)
507*(.text.mapping_tagged)
508*(.text.ip_queue_xmit)
509*(.text.ip_local_deliver)
510*(.text.follow_page)
511*(.text.elf_map)
512*(.text.dummy_file_permission)
513*(.text.dispose_list)
514*(.text.dentry_open)
515*(.text.dentry_iput)
516*(.text.bio_alloc)
517*(.text.wait_on_page_bit)
518*(.text.vfs_readdir)
519*(.text.vfs_lstat)
520*(.text.seq_escape)
521*(.text.__posix_lock_file)
522*(.text.mm_release)
523*(.text.kref_put)
524*(.text.ip_rcv)
525*(.text.__iget)
526*(.text.free_pages)
527*(.text.find_mergeable_anon_vma)
528*(.text.find_extend_vma)
529*(.text.dummy_inode_listsecurity)
530*(.text.bio_add_page)
531*(.text.__vm_enough_memory)
532*(.text.vfs_stat)
533*(.text.tty_paranoia_check)
534*(.text.tcp_read_sock)
535*(.text.tcp_data_queue)
536*(.text.sys_uname)
537*(.text.sys_renameat)
538*(.text.__strncpy_from_user)
539*(.text.__mutex_init)
540*(.text.__lookup_hash)
541*(.text.kref_get)
542*(.text.ip_route_input)
543*(.text.__insert_inode_hash)
544*(.text.do_sock_write)
545*(.text.blk_done_softirq)
546*(.text.__wake_up_sync)
547*(.text.__vma_link_rb)
548*(.text.tty_ioctl)
549*(.text.tracesys)
550*(.text.sys_getdents)
551*(.text.sys_dup)
552*(.text.stub_execve)
553*(.text.sha_transform)
554*(.text.radix_tree_tag_clear)
555*(.text.put_unused_fd)
556*(.text.put_files_struct)
557*(.text.mpage_readpages)
558*(.text.may_delete)
559*(.text.kmem_cache_create)
560*(.text.ip_mc_output)
561*(.text.interleave_nodes)
562*(.text.groups_search)
563*(.text.generic_drop_inode)
564*(.text.generic_commit_write)
565*(.text.fcntl_setlk)
566*(.text.exit_mmap)
567*(.text.end_page_writeback)
568*(.text.__d_rehash)
569*(.text.debug_mutex_free_waiter)
570*(.text.csum_ipv6_magic)
571*(.text.count)
572*(.text.cleanup_rbuf)
573*(.text.check_spinlock_acquired_node)
574*(.text.can_vma_merge_after)
575*(.text.bio_endio)
576*(.text.alloc_pidmap)
577*(.text.write_ldt)
578*(.text.vmtruncate_range)
579*(.text.vfs_create)
580*(.text.__user_walk)
581*(.text.update_send_head)
582*(.text.unmap_underlying_metadata)
583*(.text.tty_ldisc_deref)
584*(.text.tcp_setsockopt)
585*(.text.tcp_send_ack)
586*(.text.sys_pause)
587*(.text.sys_gettimeofday)
588*(.text.sync_dirty_buffer)
589*(.text.strncmp)
590*(.text.release_posix_timer)
591*(.text.proc_file_read)
592*(.text.prepare_to_wait)
593*(.text.locks_mandatory_locked)
594*(.text.interruptible_sleep_on_timeout)
595*(.text.inode_sub_bytes)
596*(.text.in_group_p)
597*(.text.hrtimer_try_to_cancel)
598*(.text.filldir64)
599*(.text.fasync_helper)
600*(.text.dummy_sb_pivotroot)
601*(.text.d_lookup)
602*(.text.d_instantiate)
603*(.text.__d_find_alias)
604*(.text.cpu_idle_wait)
605*(.text.cond_resched_lock)
606*(.text.chown_common)
607*(.text.blk_congestion_wait)
608*(.text.activate_page)
609*(.text.unlock_buffer)
610*(.text.tty_wakeup)
611*(.text.tcp_v4_do_rcv)
612*(.text.tcp_current_mss)
613*(.text.sys_openat)
614*(.text.sys_fchdir)
615*(.text.strnlen_user)
616*(.text.strnlen)
617*(.text.strchr)
618*(.text.sock_common_getsockopt)
619*(.text.skb_checksum)
620*(.text.remove_wait_queue)
621*(.text.rb_replace_node)
622*(.text.radix_tree_node_ctor)
623*(.text.pty_chars_in_buffer)
624*(.text.profile_hit)
625*(.text.prio_tree_left)
626*(.text.pgd_clear_bad)
627*(.text.pfifo_fast_dequeue)
628*(.text.page_referenced)
629*(.text.open_exec)
630*(.text.mmput)
631*(.text.mm_init)
632*(.text.__ide_dma_off_quietly)
633*(.text.ide_dma_intr)
634*(.text.hrtimer_start)
635*(.text.get_io_context)
636*(.text.__get_free_pages)
637*(.text.find_first_zero_bit)
638*(.text.file_free_rcu)
639*(.text.dummy_socket_sendmsg)
640*(.text.do_unlinkat)
641*(.text.do_arch_prctl)
642*(.text.destroy_inode)
643*(.text.can_vma_merge_before)
644*(.text.block_sync_page)
645*(.text.block_prepare_write)
646*(.text.bio_init)
647*(.text.arch_ptrace)
648*(.text.wake_up_inode)
649*(.text.wait_on_retry_sync_kiocb)
650*(.text.vma_prio_tree_next)
651*(.text.tcp_rcv_space_adjust)
652*(.text.__tcp_ack_snd_check)
653*(.text.sys_utime)
654*(.text.sys_recvmsg)
655*(.text.sys_mremap)
656*(.text.sys_bdflush)
657*(.text.sleep_on)
658*(.text.set_page_dirty_lock)
659*(.text.seq_path)
660*(.text.schedule_timeout_interruptible)
661*(.text.sched_fork)
662*(.text.rt_run_flush)
663*(.text.profile_munmap)
664*(.text.prepare_binprm)
665*(.text.__pagevec_release_nonlru)
666*(.text.m_show)
667*(.text.lookup_mnt)
668*(.text.__lookup_mnt)
669*(.text.lock_timer_base)
670*(.text.is_subdir)
671*(.text.invalidate_bh_lru)
672*(.text.init_buffer_head)
673*(.text.ifind_fast)
674*(.text.ide_dma_start)
675*(.text.__get_page_state)
676*(.text.flock_to_posix_lock)
677*(.text.__find_symbol)
678*(.text.do_futex)
679*(.text.do_execve)
680*(.text.dirty_writeback_centisecs_handler)
681*(.text.dev_watchdog)
682*(.text.can_share_swap_page)
683*(.text.blkdev_put)
684*(.text.bio_get_nr_vecs)
685*(.text.xfrm_compile_policy)
686*(.text.vma_prio_tree_insert)
687*(.text.vfs_lstat_fd)
688*(.text.__user_path_lookup_open)
689*(.text.thread_return)
690*(.text.tcp_send_delayed_ack)
691*(.text.sock_def_error_report)
692*(.text.shrink_slab)
693*(.text.serial_out)
694*(.text.seq_read)
695*(.text.secure_ip_id)
696*(.text.search_binary_handler)
697*(.text.proc_pid_unhash)
698*(.text.pagevec_lookup)
699*(.text.new_inode)
700*(.text.memcpy_toiovec)
701*(.text.locks_free_lock)
702*(.text.__lock_page)
703*(.text.__lock_buffer)
704*(.text.load_module)
705*(.text.is_bad_inode)
706*(.text.invalidate_inode_buffers)
707*(.text.insert_vm_struct)
708*(.text.inode_setattr)
709*(.text.inode_add_bytes)
710*(.text.ide_read_24)
711*(.text.ide_get_error_location)
712*(.text.ide_do_drive_cmd)
713*(.text.get_locked_pte)
714*(.text.get_filesystem_list)
715*(.text.generic_file_open)
716*(.text.follow_down)
717*(.text.find_next_bit)
718*(.text.__find_first_bit)
719*(.text.exit_mm)
720*(.text.exec_keys)
721*(.text.end_buffer_write_sync)
722*(.text.end_bio_bh_io_sync)
723*(.text.dummy_socket_shutdown)
724*(.text.d_rehash)
725*(.text.d_path)
726*(.text.do_ioctl)
727*(.text.dget_locked)
728*(.text.copy_thread_group_keys)
729*(.text.cdrom_end_request)
730*(.text.cap_bprm_apply_creds)
731*(.text.blk_rq_bio_prep)
732*(.text.__bitmap_intersects)
733*(.text.bio_phys_segments)
734*(.text.bio_free)
735*(.text.arch_get_unmapped_area_topdown)
736*(.text.writeback_in_progress)
737*(.text.vfs_follow_link)
738*(.text.tcp_rcv_state_process)
739*(.text.tcp_check_space)
740*(.text.sys_stat)
741*(.text.sys_rt_sigreturn)
742*(.text.sys_rt_sigaction)
743*(.text.sys_remap_file_pages)
744*(.text.sys_pwrite64)
745*(.text.sys_fchownat)
746*(.text.sys_fchmodat)
747*(.text.strncat)
748*(.text.strlcat)
749*(.text.strcmp)
750*(.text.steal_locks)
751*(.text.sock_create)
752*(.text.sk_stream_rfree)
753*(.text.sk_stream_mem_schedule)
754*(.text.skip_atoi)
755*(.text.sk_alloc)
756*(.text.show_stat)
757*(.text.set_fs_pwd)
758*(.text.set_binfmt)
759*(.text.pty_unthrottle)
760*(.text.proc_symlink)
761*(.text.pipe_release)
762*(.text.pageout)
763*(.text.n_tty_write_wakeup)
764*(.text.n_tty_ioctl)
765*(.text.nr_free_zone_pages)
766*(.text.migration_thread)
767*(.text.mempool_free_slab)
768*(.text.meminfo_read_proc)
769*(.text.max_sane_readahead)
770*(.text.lru_cache_add)
771*(.text.kill_fasync)
772*(.text.kernel_read)
773*(.text.invalidate_mapping_pages)
774*(.text.inode_has_buffers)
775*(.text.init_once)
776*(.text.inet_sendmsg)
777*(.text.idedisk_issue_flush)
778*(.text.generic_file_write)
779*(.text.free_more_memory)
780*(.text.__free_fdtable)
781*(.text.filp_dtor)
782*(.text.exit_sem)
783*(.text.exit_itimers)
784*(.text.error_interrupt)
785*(.text.end_buffer_async_write)
786*(.text.eligible_child)
787*(.text.elf_map)
788*(.text.dump_task_regs)
789*(.text.dummy_task_setscheduler)
790*(.text.dummy_socket_accept)
791*(.text.dummy_file_free_security)
792*(.text.__down_read)
793*(.text.do_sock_read)
794*(.text.do_sigaltstack)
795*(.text.do_mremap)
796*(.text.current_io_context)
797*(.text.cpu_swap_callback)
798*(.text.copy_vma)
799*(.text.cap_bprm_set_security)
800*(.text.blk_insert_request)
801*(.text.bio_map_kern_endio)
802*(.text.bio_hw_segments)
803*(.text.bictcp_cong_avoid)
804*(.text.add_interrupt_randomness)
805*(.text.wait_for_completion)
806*(.text.version_read_proc)
807*(.text.unix_write_space)
808*(.text.tty_ldisc_ref_wait)
809*(.text.tty_ldisc_put)
810*(.text.try_to_wake_up)
811*(.text.tcp_v4_tw_remember_stamp)
812*(.text.tcp_try_undo_dsack)
813*(.text.tcp_may_send_now)
814*(.text.sys_waitid)
815*(.text.sys_sched_getparam)
816*(.text.sys_getppid)
817*(.text.sys_getcwd)
818*(.text.sys_dup2)
819*(.text.sys_chmod)
820*(.text.sys_chdir)
821*(.text.sprintf)
822*(.text.sock_wfree)
823*(.text.sock_aio_write)
824*(.text.skb_drop_fraglist)
825*(.text.skb_dequeue)
826*(.text.set_close_on_exec)
827*(.text.set_brk)
828*(.text.seq_puts)
829*(.text.SELECT_DRIVE)
830*(.text.sched_exec)
831*(.text.return_EIO)
832*(.text.remove_from_page_cache)
833*(.text.rcu_start_batch)
834*(.text.__put_task_struct)
835*(.text.proc_pid_readdir)
836*(.text.proc_get_inode)
837*(.text.prepare_to_wait_exclusive)
838*(.text.pipe_wait)
839*(.text.pipe_new)
840*(.text.pdflush_operation)
841*(.text.__pagevec_release)
842*(.text.pagevec_lookup_tag)
843*(.text.packet_rcv)
844*(.text.n_tty_set_room)
845*(.text.nr_free_pages)
846*(.text.__net_timestamp)
847*(.text.mpage_end_io_read)
848*(.text.mod_timer)
849*(.text.__memcpy)
850*(.text.mb_cache_shrink_fn)
851*(.text.lock_rename)
852*(.text.kstrdup)
853*(.text.is_ignored)
854*(.text.int_very_careful)
855*(.text.inotify_inode_is_dead)
856*(.text.inotify_get_cookie)
857*(.text.inode_get_bytes)
858*(.text.init_timer)
859*(.text.init_dev)
860*(.text.inet_getname)
861*(.text.ide_map_sg)
862*(.text.__ide_dma_end)
863*(.text.hrtimer_get_remaining)
864*(.text.get_task_mm)
865*(.text.get_random_int)
866*(.text.free_pipe_info)
867*(.text.filemap_write_and_wait_range)
868*(.text.exit_thread)
869*(.text.enter_idle)
870*(.text.end_that_request_first)
871*(.text.end_8259A_irq)
872*(.text.dummy_file_alloc_security)
873*(.text.do_group_exit)
874*(.text.debug_mutex_init)
875*(.text.cpuset_exit)
876*(.text.cpu_idle)
877*(.text.copy_semundo)
878*(.text.copy_files)
879*(.text.chrdev_open)
880*(.text.cdrom_transfer_packet_command)
881*(.text.cdrom_mode_sense)
882*(.text.blk_phys_contig_segment)
883*(.text.blk_get_queue)
884*(.text.bio_split)
885*(.text.audit_alloc)
886*(.text.anon_pipe_buf_release)
887*(.text.add_wait_queue_exclusive)
888*(.text.add_wait_queue)
889*(.text.acct_process)
890*(.text.account)
891*(.text.zeromap_page_range)
892*(.text.yield)
893*(.text.writeback_acquire)
894*(.text.worker_thread)
895*(.text.wait_on_page_writeback_range)
896*(.text.__wait_on_buffer)
897*(.text.vscnprintf)
898*(.text.vmalloc_to_pfn)
899*(.text.vgacon_save_screen)
900*(.text.vfs_unlink)
901*(.text.vfs_rmdir)
902*(.text.unregister_md_personality)
903*(.text.unlock_new_inode)
904*(.text.unix_stream_sendmsg)
905*(.text.unix_stream_recvmsg)
906*(.text.unhash_process)
907*(.text.udp_v4_lookup_longway)
908*(.text.tty_ldisc_flush)
909*(.text.tty_ldisc_enable)
910*(.text.tty_hung_up_p)
911*(.text.tty_buffer_free_all)
912*(.text.tso_fragment)
913*(.text.try_to_del_timer_sync)
914*(.text.tcp_v4_err)
915*(.text.tcp_unhash)
916*(.text.tcp_seq_next)
917*(.text.tcp_select_initial_window)
918*(.text.tcp_sacktag_write_queue)
919*(.text.tcp_cwnd_validate)
920*(.text.sys_vhangup)
921*(.text.sys_uselib)
922*(.text.sys_symlink)
923*(.text.sys_signal)
924*(.text.sys_poll)
925*(.text.sys_mount)
926*(.text.sys_kill)
927*(.text.sys_ioctl)
928*(.text.sys_inotify_add_watch)
929*(.text.sys_getuid)
930*(.text.sys_getrlimit)
931*(.text.sys_getitimer)
932*(.text.sys_getgroups)
933*(.text.sys_ftruncate)
934*(.text.sysfs_lookup)
935*(.text.sys_exit_group)
936*(.text.stub_fork)
937*(.text.sscanf)
938*(.text.sock_map_fd)
939*(.text.sock_get_timestamp)
940*(.text.__sock_create)
941*(.text.smp_call_function_single)
942*(.text.sk_stop_timer)
943*(.text.skb_copy_and_csum_datagram)
944*(.text.__skb_checksum_complete)
945*(.text.single_next)
946*(.text.sigqueue_alloc)
947*(.text.shrink_dcache_parent)
948*(.text.select_idle_routine)
949*(.text.run_workqueue)
950*(.text.run_local_timers)
951*(.text.remove_inode_hash)
952*(.text.remove_dquot_ref)
953*(.text.register_binfmt)
954*(.text.read_cache_pages)
955*(.text.rb_last)
956*(.text.pty_open)
957*(.text.proc_root_readdir)
958*(.text.proc_pid_flush)
959*(.text.proc_pident_lookup)
960*(.text.proc_fill_super)
961*(.text.proc_exe_link)
962*(.text.posix_locks_deadlock)
963*(.text.pipe_iov_copy_from_user)
964*(.text.opost)
965*(.text.nf_register_hook)
966*(.text.netif_rx_ni)
967*(.text.m_start)
968*(.text.mpage_writepage)
969*(.text.mm_alloc)
970*(.text.memory_open)
971*(.text.mark_buffer_async_write)
972*(.text.lru_add_drain_all)
973*(.text.locks_init_lock)
974*(.text.locks_delete_lock)
975*(.text.lock_hrtimer_base)
976*(.text.load_script)
977*(.text.__kill_fasync)
978*(.text.ip_mc_sf_allow)
979*(.text.__ioremap)
980*(.text.int_with_check)
981*(.text.int_sqrt)
982*(.text.install_thread_keyring)
983*(.text.init_page_buffers)
984*(.text.inet_sock_destruct)
985*(.text.idle_notifier_register)
986*(.text.ide_execute_command)
987*(.text.ide_end_drive_cmd)
988*(.text.__ide_dma_host_on)
989*(.text.hrtimer_run_queues)
990*(.text.hpet_mask_rtc_irq_bit)
991*(.text.__get_zone_counts)
992*(.text.get_zone_counts)
993*(.text.get_write_access)
994*(.text.get_fs_struct)
995*(.text.get_dirty_limits)
996*(.text.generic_readlink)
997*(.text.free_hot_page)
998*(.text.finish_wait)
999*(.text.find_inode)
1000*(.text.find_first_bit)
1001*(.text.__filemap_fdatawrite_range)
1002*(.text.__filemap_copy_from_user_iovec)
1003*(.text.exit_aio)
1004*(.text.elv_set_request)
1005*(.text.elv_former_request)
1006*(.text.dup_namespace)
1007*(.text.dupfd)
1008*(.text.dummy_socket_getsockopt)
1009*(.text.dummy_sb_post_mountroot)
1010*(.text.dummy_quotactl)
1011*(.text.dummy_inode_rename)
1012*(.text.__do_SAK)
1013*(.text.do_pipe)
1014*(.text.do_fsync)
1015*(.text.d_instantiate_unique)
1016*(.text.d_find_alias)
1017*(.text.deny_write_access)
1018*(.text.dentry_unhash)
1019*(.text.d_delete)
1020*(.text.datagram_poll)
1021*(.text.cpuset_fork)
1022*(.text.cpuid_read)
1023*(.text.copy_namespace)
1024*(.text.cond_resched)
1025*(.text.check_version)
1026*(.text.__change_page_attr)
1027*(.text.cfq_slab_kill)
1028*(.text.cfq_completed_request)
1029*(.text.cdrom_pc_intr)
1030*(.text.cdrom_decode_status)
1031*(.text.cap_capset_check)
1032*(.text.blk_put_request)
1033*(.text.bio_fs_destructor)
1034*(.text.bictcp_min_cwnd)
1035*(.text.alloc_chrdev_region)
1036*(.text.add_element)
1037*(.text.acct_update_integrals)
1038*(.text.write_boundary_block)
1039*(.text.writeback_release)
1040*(.text.writeback_inodes)
1041*(.text.wake_up_state)
1042*(.text.__wake_up_locked)
1043*(.text.wake_futex)
1044*(.text.wait_task_inactive)
1045*(.text.__wait_on_freeing_inode)
1046*(.text.wait_noreap_copyout)
1047*(.text.vmstat_start)
1048*(.text.vgacon_do_font_op)
1049*(.text.vfs_readv)
1050*(.text.vfs_quota_sync)
1051*(.text.update_queue)
1052*(.text.unshare_files)
1053*(.text.unmap_vm_area)
1054*(.text.unix_socketpair)
1055*(.text.unix_release_sock)
1056*(.text.unix_detach_fds)
1057*(.text.unix_create1)
1058*(.text.unix_bind)
1059*(.text.udp_sendmsg)
1060*(.text.udp_rcv)
1061*(.text.udp_queue_rcv_skb)
1062*(.text.uart_write)
1063*(.text.uart_startup)
1064*(.text.uart_open)
1065*(.text.tty_vhangup)
1066*(.text.tty_termios_baud_rate)
1067*(.text.tty_release)
1068*(.text.tty_ldisc_ref)
1069*(.text.throttle_vm_writeout)
1070*(.text.058)
1071*(.text.tcp_xmit_probe_skb)
1072*(.text.tcp_v4_send_check)
1073*(.text.tcp_v4_destroy_sock)
1074*(.text.tcp_sync_mss)
1075*(.text.tcp_snd_test)
1076*(.text.tcp_slow_start)
1077*(.text.tcp_send_fin)
1078*(.text.tcp_rtt_estimator)
1079*(.text.tcp_parse_options)
1080*(.text.tcp_ioctl)
1081*(.text.tcp_init_tso_segs)
1082*(.text.tcp_init_cwnd)
1083*(.text.tcp_getsockopt)
1084*(.text.tcp_fin)
1085*(.text.tcp_connect)
1086*(.text.tcp_cong_avoid)
1087*(.text.__tcp_checksum_complete_user)
1088*(.text.task_dumpable)
1089*(.text.sys_wait4)
1090*(.text.sys_utimes)
1091*(.text.sys_symlinkat)
1092*(.text.sys_socketpair)
1093*(.text.sys_rmdir)
1094*(.text.sys_readahead)
1095*(.text.sys_nanosleep)
1096*(.text.sys_linkat)
1097*(.text.sys_fstat)
1098*(.text.sysfs_readdir)
1099*(.text.sys_execve)
1100*(.text.sysenter_tracesys)
1101*(.text.sys_chown)
1102*(.text.stub_clone)
1103*(.text.strrchr)
1104*(.text.strncpy)
1105*(.text.stopmachine_set_state)
1106*(.text.sock_sendmsg)
1107*(.text.sock_release)
1108*(.text.sock_fasync)
1109*(.text.sock_close)
1110*(.text.sk_stream_write_space)
1111*(.text.sk_reset_timer)
1112*(.text.skb_split)
1113*(.text.skb_recv_datagram)
1114*(.text.skb_queue_tail)
1115*(.text.sk_attach_filter)
1116*(.text.si_swapinfo)
1117*(.text.simple_strtoll)
1118*(.text.set_termios)
1119*(.text.set_task_comm)
1120*(.text.set_shrinker)
1121*(.text.set_normalized_timespec)
1122*(.text.set_brk)
1123*(.text.serial_in)
1124*(.text.seq_printf)
1125*(.text.secure_dccp_sequence_number)
1126*(.text.rwlock_bug)
1127*(.text.rt_hash_code)
1128*(.text.__rta_fill)
1129*(.text.__request_resource)
1130*(.text.relocate_new_kernel)
1131*(.text.release_thread)
1132*(.text.release_mem)
1133*(.text.rb_prev)
1134*(.text.rb_first)
1135*(.text.random_poll)
1136*(.text.__put_super_and_need_restart)
1137*(.text.pty_write)
1138*(.text.ptrace_stop)
1139*(.text.proc_self_readlink)
1140*(.text.proc_root_lookup)
1141*(.text.proc_root_link)
1142*(.text.proc_pid_make_inode)
1143*(.text.proc_pid_attr_write)
1144*(.text.proc_lookupfd)
1145*(.text.proc_delete_inode)
1146*(.text.posix_same_owner)
1147*(.text.posix_block_lock)
1148*(.text.poll_initwait)
1149*(.text.pipe_write)
1150*(.text.pipe_read_fasync)
1151*(.text.pipe_ioctl)
1152*(.text.pdflush)
1153*(.text.pci_user_read_config_dword)
1154*(.text.page_readlink)
1155*(.text.null_lseek)
1156*(.text.nf_hook_slow)
1157*(.text.netlink_sock_destruct)
1158*(.text.netlink_broadcast)
1159*(.text.neigh_resolve_output)
1160*(.text.name_to_int)
1161*(.text.mwait_idle)
1162*(.text.mutex_trylock)
1163*(.text.mutex_debug_check_no_locks_held)
1164*(.text.m_stop)
1165*(.text.mpage_end_io_write)
1166*(.text.mpage_alloc)
1167*(.text.move_page_tables)
1168*(.text.mounts_open)
1169*(.text.__memset)
1170*(.text.memcpy_fromiovec)
1171*(.text.make_8259A_irq)
1172*(.text.lookup_user_key_possessed)
1173*(.text.lookup_create)
1174*(.text.locks_insert_lock)
1175*(.text.locks_alloc_lock)
1176*(.text.kthread_should_stop)
1177*(.text.kswapd)
1178*(.text.kobject_uevent)
1179*(.text.kobject_get_path)
1180*(.text.kobject_get)
1181*(.text.klist_children_put)
1182*(.text.__ip_route_output_key)
1183*(.text.ip_flush_pending_frames)
1184*(.text.ip_compute_csum)
1185*(.text.ip_append_data)
1186*(.text.ioc_set_batching)
1187*(.text.invalidate_inode_pages)
1188*(.text.__invalidate_device)
1189*(.text.install_arg_page)
1190*(.text.in_sched_functions)
1191*(.text.inotify_unmount_inodes)
1192*(.text.init_once)
1193*(.text.init_cdrom_command)
1194*(.text.inet_stream_connect)
1195*(.text.inet_sk_rebuild_header)
1196*(.text.inet_csk_addr2sockaddr)
1197*(.text.inet_create)
1198*(.text.ifind)
1199*(.text.ide_setup_dma)
1200*(.text.ide_outsw)
1201*(.text.ide_fixstring)
1202*(.text.ide_dma_setup)
1203*(.text.ide_cdrom_packet)
1204*(.text.ide_cd_put)
1205*(.text.ide_build_sglist)
1206*(.text.i8259A_shutdown)
1207*(.text.hung_up_tty_ioctl)
1208*(.text.hrtimer_nanosleep)
1209*(.text.hrtimer_init)
1210*(.text.hrtimer_cancel)
1211*(.text.hash_futex)
1212*(.text.group_send_sig_info)
1213*(.text.grab_cache_page_nowait)
1214*(.text.get_wchan)
1215*(.text.get_stack)
1216*(.text.get_page_state)
1217*(.text.getnstimeofday)
1218*(.text.get_node)
1219*(.text.get_kprobe)
1220*(.text.generic_unplug_device)
1221*(.text.free_task)
1222*(.text.frag_show)
1223*(.text.find_next_zero_string)
1224*(.text.filp_open)
1225*(.text.fillonedir)
1226*(.text.exit_io_context)
1227*(.text.exit_idle)
1228*(.text.exact_lock)
1229*(.text.eth_header)
1230*(.text.dummy_unregister_security)
1231*(.text.dummy_socket_post_create)
1232*(.text.dummy_socket_listen)
1233*(.text.dummy_quota_on)
1234*(.text.dummy_inode_follow_link)
1235*(.text.dummy_file_receive)
1236*(.text.dummy_file_mprotect)
1237*(.text.dummy_file_lock)
1238*(.text.dummy_file_ioctl)
1239*(.text.dummy_bprm_post_apply_creds)
1240*(.text.do_writepages)
1241*(.text.__down_interruptible)
1242*(.text.do_notify_resume)
1243*(.text.do_acct_process)
1244*(.text.del_timer_sync)
1245*(.text.default_rebuild_header)
1246*(.text.d_callback)
1247*(.text.dcache_readdir)
1248*(.text.ctrl_dumpfamily)
1249*(.text.cpuset_rmdir)
1250*(.text.copy_strings_kernel)
1251*(.text.con_write_room)
1252*(.text.complete_all)
1253*(.text.collect_sigign_sigcatch)
1254*(.text.clear_user)
1255*(.text.check_unthrottle)
1256*(.text.cdrom_release)
1257*(.text.cdrom_newpc_intr)
1258*(.text.cdrom_ioctl)
1259*(.text.cdrom_check_status)
1260*(.text.cdev_put)
1261*(.text.cdev_add)
1262*(.text.cap_ptrace)
1263*(.text.cap_bprm_secureexec)
1264*(.text.cache_alloc_refill)
1265*(.text.bmap)
1266*(.text.blk_run_queue)
1267*(.text.blk_queue_dma_alignment)
1268*(.text.blk_ordered_req_seq)
1269*(.text.blk_backing_dev_unplug)
1270*(.text.__bitmap_subset)
1271*(.text.__bitmap_and)
1272*(.text.bio_unmap_user)
1273*(.text.__bforget)
1274*(.text.bd_forget)
1275*(.text.bad_pipe_w)
1276*(.text.bad_get_user)
1277*(.text.audit_free)
1278*(.text.anon_vma_ctor)
1279*(.text.anon_pipe_buf_map)
1280*(.text.alloc_sock_iocb)
1281*(.text.alloc_fdset)
1282*(.text.aio_kick_handler)
1283*(.text.__add_entropy_words)
1284*(.text.add_disk_randomness)
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 0b3603adf56d..47496a40e84f 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -11,120 +11,54 @@
11#include <linux/threads.h> 11#include <linux/threads.h>
12#include <linux/cpumask.h> 12#include <linux/cpumask.h>
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/module.h>
14#include <linux/kernel.h> 15#include <linux/kernel.h>
15#include <linux/ctype.h> 16#include <linux/ctype.h>
16#include <linux/init.h> 17#include <linux/init.h>
17#include <linux/module.h>
18 18
19#include <asm/smp.h> 19#include <asm/smp.h>
20#include <asm/ipi.h> 20#include <asm/ipi.h>
21#include <asm/genapic.h>
21 22
22#if defined(CONFIG_ACPI) 23#ifdef CONFIG_ACPI
23#include <acpi/acpi_bus.h> 24#include <acpi/acpi_bus.h>
24#endif 25#endif
25 26
26/* which logical CPU number maps to which CPU (physical APIC ID) */ 27/* which logical CPU number maps to which CPU (physical APIC ID) */
27u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 28u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
29 = { [0 ... NR_CPUS-1] = BAD_APICID };
28EXPORT_SYMBOL(x86_cpu_to_apicid); 30EXPORT_SYMBOL(x86_cpu_to_apicid);
29u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
30 31
31extern struct genapic apic_cluster; 32u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
32extern struct genapic apic_flat;
33extern struct genapic apic_physflat;
34 33
35struct genapic *genapic = &apic_flat; 34struct genapic __read_mostly *genapic = &apic_flat;
36struct genapic *genapic_force;
37 35
38/* 36/*
39 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 37 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
40 */ 38 */
41void __init clustered_apic_check(void) 39void __init setup_apic_routing(void)
42{ 40{
43 long i; 41#ifdef CONFIG_ACPI
44 u8 clusters, max_cluster;
45 u8 id;
46 u8 cluster_cnt[NUM_APIC_CLUSTERS];
47 int max_apic = 0;
48
49 /* genapic selection can be forced because of certain quirks.
50 */
51 if (genapic_force) {
52 genapic = genapic_force;
53 goto print;
54 }
55
56#if defined(CONFIG_ACPI)
57 /* 42 /*
58 * Some x86_64 machines use physical APIC mode regardless of how many 43 * Quirk: some x86_64 machines can only use physical APIC mode
59 * procs/clusters are present (x86_64 ES7000 is an example). 44 * regardless of how many processors are present (x86_64 ES7000
45 * is an example).
60 */ 46 */
61 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID) 47 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
62 if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) { 48 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
63 genapic = &apic_cluster;
64 goto print;
65 }
66#endif
67
68 memset(cluster_cnt, 0, sizeof(cluster_cnt));
69 for (i = 0; i < NR_CPUS; i++) {
70 id = bios_cpu_apicid[i];
71 if (id == BAD_APICID)
72 continue;
73 if (id > max_apic)
74 max_apic = id;
75 cluster_cnt[APIC_CLUSTERID(id)]++;
76 }
77
78 /* Don't use clustered mode on AMD platforms. */
79 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
80 genapic = &apic_physflat; 49 genapic = &apic_physflat;
81#ifndef CONFIG_HOTPLUG_CPU 50 else
82 /* In the CPU hotplug case we cannot use broadcast mode
83 because that opens a race when a CPU is removed.
84 Stay at physflat mode in this case.
85 It is bad to do this unconditionally though. Once
86 we have ACPI platform support for CPU hotplug
87 we should detect hotplug capablity from ACPI tables and
88 only do this when really needed. -AK */
89 if (max_apic <= 8)
90 genapic = &apic_flat;
91#endif 51#endif
92 goto print;
93 }
94 52
95 clusters = 0; 53 if (cpus_weight(cpu_possible_map) <= 8)
96 max_cluster = 0;
97
98 for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
99 if (cluster_cnt[i] > 0) {
100 ++clusters;
101 if (cluster_cnt[i] > max_cluster)
102 max_cluster = cluster_cnt[i];
103 }
104 }
105
106 /*
107 * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
108 * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
109 * else physical mode.
110 * (We don't use lowest priority delivery + HW APIC IRQ steering, so
111 * can ignore the clustered logical case and go straight to physical.)
112 */
113 if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) {
114#ifdef CONFIG_HOTPLUG_CPU
115 /* Don't use APIC shortcuts in CPU hotplug to avoid races */
116 genapic = &apic_physflat;
117#else
118 genapic = &apic_flat; 54 genapic = &apic_flat;
119#endif 55 else
120 } else 56 genapic = &apic_physflat;
121 genapic = &apic_cluster;
122 57
123print:
124 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 58 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
125} 59}
126 60
127/* Same for both flat and clustered. */ 61/* Same for both flat and physical. */
128 62
129void send_IPI_self(int vector) 63void send_IPI_self(int vector)
130{ 64{
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c
deleted file mode 100644
index 73d76308b955..000000000000
--- a/arch/x86_64/kernel/genapic_cluster.c
+++ /dev/null
@@ -1,137 +0,0 @@
1/*
2 * Copyright 2004 James Cleverdon, IBM.
3 * Subject to the GNU Public License, v.2
4 *
5 * Clustered APIC subarch code. Up to 255 CPUs, physical delivery.
6 * (A more realistic maximum is around 230 CPUs.)
7 *
8 * Hacked for x86-64 by James Cleverdon from i386 architecture code by
9 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
10 * James Cleverdon.
11 */
12#include <linux/threads.h>
13#include <linux/cpumask.h>
14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h>
17#include <linux/init.h>
18#include <asm/smp.h>
19#include <asm/ipi.h>
20
21
22/*
23 * Set up the logical destination ID.
24 *
25 * Intel recommends to set DFR, LDR and TPR before enabling
26 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
27 * document number 292116). So here it goes...
28 */
29static void cluster_init_apic_ldr(void)
30{
31 unsigned long val, id;
32 long i, count;
33 u8 lid;
34 u8 my_id = hard_smp_processor_id();
35 u8 my_cluster = APIC_CLUSTER(my_id);
36
37 /* Create logical APIC IDs by counting CPUs already in cluster. */
38 for (count = 0, i = NR_CPUS; --i >= 0; ) {
39 lid = x86_cpu_to_log_apicid[i];
40 if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
41 ++count;
42 }
43 /*
44 * We only have a 4 wide bitmap in cluster mode. There's no way
45 * to get above 60 CPUs and still give each one it's own bit.
46 * But, we're using physical IRQ delivery, so we don't care.
47 * Use bit 3 for the 4th through Nth CPU in each cluster.
48 */
49 if (count >= XAPIC_DEST_CPUS_SHIFT)
50 count = 3;
51 id = my_cluster | (1UL << count);
52 x86_cpu_to_log_apicid[smp_processor_id()] = id;
53 apic_write(APIC_DFR, APIC_DFR_CLUSTER);
54 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
55 val |= SET_APIC_LOGICAL_ID(id);
56 apic_write(APIC_LDR, val);
57}
58
59/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
60
61static cpumask_t cluster_target_cpus(void)
62{
63 return cpumask_of_cpu(0);
64}
65
66static cpumask_t cluster_vector_allocation_domain(int cpu)
67{
68 cpumask_t domain = CPU_MASK_NONE;
69 cpu_set(cpu, domain);
70 return domain;
71}
72
73static void cluster_send_IPI_mask(cpumask_t mask, int vector)
74{
75 send_IPI_mask_sequence(mask, vector);
76}
77
78static void cluster_send_IPI_allbutself(int vector)
79{
80 cpumask_t mask = cpu_online_map;
81
82 cpu_clear(smp_processor_id(), mask);
83
84 if (!cpus_empty(mask))
85 cluster_send_IPI_mask(mask, vector);
86}
87
88static void cluster_send_IPI_all(int vector)
89{
90 cluster_send_IPI_mask(cpu_online_map, vector);
91}
92
93static int cluster_apic_id_registered(void)
94{
95 return 1;
96}
97
98static unsigned int cluster_cpu_mask_to_apicid(cpumask_t cpumask)
99{
100 int cpu;
101
102 /*
103 * We're using fixed IRQ delivery, can only return one phys APIC ID.
104 * May as well be the first.
105 */
106 cpu = first_cpu(cpumask);
107 if ((unsigned)cpu < NR_CPUS)
108 return x86_cpu_to_apicid[cpu];
109 else
110 return BAD_APICID;
111}
112
113/* cpuid returns the value latched in the HW at reset, not the APIC ID
114 * register's value. For any box whose BIOS changes APIC IDs, like
115 * clustered APIC systems, we must use hard_smp_processor_id.
116 *
117 * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
118 */
119static unsigned int phys_pkg_id(int index_msb)
120{
121 return hard_smp_processor_id() >> index_msb;
122}
123
124struct genapic apic_cluster = {
125 .name = "clustered",
126 .int_delivery_mode = dest_Fixed,
127 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
128 .target_cpus = cluster_target_cpus,
129 .vector_allocation_domain = cluster_vector_allocation_domain,
130 .apic_id_registered = cluster_apic_id_registered,
131 .init_apic_ldr = cluster_init_apic_ldr,
132 .send_IPI_all = cluster_send_IPI_all,
133 .send_IPI_allbutself = cluster_send_IPI_allbutself,
134 .send_IPI_mask = cluster_send_IPI_mask,
135 .cpu_mask_to_apicid = cluster_cpu_mask_to_apicid,
136 .phys_pkg_id = phys_pkg_id,
137};
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 7c01db8fa9d1..ecb01eefdd27 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -8,6 +8,7 @@
8 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and 8 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
9 * James Cleverdon. 9 * James Cleverdon.
10 */ 10 */
11#include <linux/errno.h>
11#include <linux/threads.h> 12#include <linux/threads.h>
12#include <linux/cpumask.h> 13#include <linux/cpumask.h>
13#include <linux/string.h> 14#include <linux/string.h>
@@ -16,6 +17,7 @@
16#include <linux/init.h> 17#include <linux/init.h>
17#include <asm/smp.h> 18#include <asm/smp.h>
18#include <asm/ipi.h> 19#include <asm/ipi.h>
20#include <asm/genapic.h>
19 21
20static cpumask_t flat_target_cpus(void) 22static cpumask_t flat_target_cpus(void)
21{ 23{
@@ -60,31 +62,10 @@ static void flat_init_apic_ldr(void)
60static void flat_send_IPI_mask(cpumask_t cpumask, int vector) 62static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
61{ 63{
62 unsigned long mask = cpus_addr(cpumask)[0]; 64 unsigned long mask = cpus_addr(cpumask)[0];
63 unsigned long cfg;
64 unsigned long flags; 65 unsigned long flags;
65 66
66 local_irq_save(flags); 67 local_irq_save(flags);
67 68 __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
68 /*
69 * Wait for idle.
70 */
71 apic_wait_icr_idle();
72
73 /*
74 * prepare target chip field
75 */
76 cfg = __prepare_ICR2(mask);
77 apic_write(APIC_ICR2, cfg);
78
79 /*
80 * program the ICR
81 */
82 cfg = __prepare_ICR(0, vector, APIC_DEST_LOGICAL);
83
84 /*
85 * Send the IPI. The write to APIC_ICR fires this off.
86 */
87 apic_write(APIC_ICR, cfg);
88 local_irq_restore(flags); 69 local_irq_restore(flags);
89} 70}
90 71
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 598a4d0351fc..1fab487dee86 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -5,6 +5,7 @@
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> 6 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
7 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> 7 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
8 * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
8 */ 9 */
9 10
10 11
@@ -13,97 +14,131 @@
13#include <linux/init.h> 14#include <linux/init.h>
14#include <asm/desc.h> 15#include <asm/desc.h>
15#include <asm/segment.h> 16#include <asm/segment.h>
17#include <asm/pgtable.h>
16#include <asm/page.h> 18#include <asm/page.h>
17#include <asm/msr.h> 19#include <asm/msr.h>
18#include <asm/cache.h> 20#include <asm/cache.h>
19 21
20/* we are not able to switch in one step to the final KERNEL ADRESS SPACE 22/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
21 * because we need identity-mapped pages on setup so define __START_KERNEL to 23 * because we need identity-mapped pages.
22 * 0x100000 for this stage 24 *
23 *
24 */ 25 */
25 26
26 .text 27 .text
27 .section .bootstrap.text 28 .section .bootstrap.text
28 .code32 29 .code64
29 .globl startup_32 30 .globl startup_64
30/* %bx: 1 if coming from smp trampoline on secondary cpu */ 31startup_64:
31startup_32: 32
32
33 /* 33 /*
34 * At this point the CPU runs in 32bit protected mode (CS.D = 1) with 34 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
35 * paging disabled and the point of this file is to switch to 64bit 35 * and someone has loaded an identity mapped page table
36 * long mode with a kernel mapping for kerneland to jump into the 36 * for us. These identity mapped page tables map all of the
37 * kernel virtual addresses. 37 * kernel pages and possibly all of memory.
38 * There is no stack until we set one up. 38 *
39 * %esi holds a physical pointer to real_mode_data.
40 *
41 * We come here either directly from a 64bit bootloader, or from
42 * arch/x86_64/boot/compressed/head.S.
43 *
44 * We only come here initially at boot nothing else comes here.
45 *
46 * Since we may be loaded at an address different from what we were
47 * compiled to run at we first fixup the physical addresses in our page
48 * tables and then reload them.
39 */ 49 */
40 50
41 /* Initialize the %ds segment register */ 51 /* Compute the delta between the address I am compiled to run at and the
42 movl $__KERNEL_DS,%eax 52 * address I am actually running at.
43 movl %eax,%ds
44
45 /* Load new GDT with the 64bit segments using 32bit descriptor */
46 lgdt pGDT32 - __START_KERNEL_map
47
48 /* If the CPU doesn't support CPUID this will double fault.
49 * Unfortunately it is hard to check for CPUID without a stack.
50 */ 53 */
51 54 leaq _text(%rip), %rbp
52 /* Check if extended functions are implemented */ 55 subq $_text - __START_KERNEL_map, %rbp
53 movl $0x80000000, %eax 56
54 cpuid 57 /* Is the address not 2M aligned? */
55 cmpl $0x80000000, %eax 58 movq %rbp, %rax
56 jbe no_long_mode 59 andl $~LARGE_PAGE_MASK, %eax
57 /* Check if long mode is implemented */ 60 testl %eax, %eax
58 mov $0x80000001, %eax 61 jnz bad_address
59 cpuid 62
60 btl $29, %edx 63 /* Is the address too large? */
61 jnc no_long_mode 64 leaq _text(%rip), %rdx
62 65 movq $PGDIR_SIZE, %rax
63 /* 66 cmpq %rax, %rdx
64 * Prepare for entering 64bits mode 67 jae bad_address
68
69 /* Fixup the physical addresses in the page table
65 */ 70 */
71 addq %rbp, init_level4_pgt + 0(%rip)
72 addq %rbp, init_level4_pgt + (258*8)(%rip)
73 addq %rbp, init_level4_pgt + (511*8)(%rip)
74
75 addq %rbp, level3_ident_pgt + 0(%rip)
76 addq %rbp, level3_kernel_pgt + (510*8)(%rip)
77
78 /* Add an Identity mapping if I am above 1G */
79 leaq _text(%rip), %rdi
80 andq $LARGE_PAGE_MASK, %rdi
81
82 movq %rdi, %rax
83 shrq $PUD_SHIFT, %rax
84 andq $(PTRS_PER_PUD - 1), %rax
85 jz ident_complete
86
87 leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
88 leaq level3_ident_pgt(%rip), %rbx
89 movq %rdx, 0(%rbx, %rax, 8)
90
91 movq %rdi, %rax
92 shrq $PMD_SHIFT, %rax
93 andq $(PTRS_PER_PMD - 1), %rax
94 leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx
95 leaq level2_spare_pgt(%rip), %rbx
96 movq %rdx, 0(%rbx, %rax, 8)
97ident_complete:
98
99 /* Fixup the kernel text+data virtual addresses
100 */
101 leaq level2_kernel_pgt(%rip), %rdi
102 leaq 4096(%rdi), %r8
103 /* See if it is a valid page table entry */
1041: testq $1, 0(%rdi)
105 jz 2f
106 addq %rbp, 0(%rdi)
107 /* Go to the next page */
1082: addq $8, %rdi
109 cmp %r8, %rdi
110 jne 1b
111
112 /* Fixup phys_base */
113 addq %rbp, phys_base(%rip)
66 114
67 /* Enable PAE mode */ 115#ifdef CONFIG_SMP
68 xorl %eax, %eax 116 addq %rbp, trampoline_level4_pgt + 0(%rip)
69 btsl $5, %eax 117 addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
70 movl %eax, %cr4 118#endif
71 119#ifdef CONFIG_ACPI_SLEEP
72 /* Setup early boot stage 4 level pagetables */ 120 addq %rbp, wakeup_level4_pgt + 0(%rip)
73 movl $(boot_level4_pgt - __START_KERNEL_map), %eax 121 addq %rbp, wakeup_level4_pgt + (511*8)(%rip)
74 movl %eax, %cr3 122#endif
75
76 /* Setup EFER (Extended Feature Enable Register) */
77 movl $MSR_EFER, %ecx
78 rdmsr
79
80 /* Enable Long Mode */
81 btsl $_EFER_LME, %eax
82
83 /* Make changes effective */
84 wrmsr
85 123
86 xorl %eax, %eax 124 /* Due to ENTRY(), sometimes the empty space gets filled with
87 btsl $31, %eax /* Enable paging and in turn activate Long Mode */ 125 * zeros. Better take a jmp than relying on empty space being
88 btsl $0, %eax /* Enable protected mode */ 126 * filled with 0x90 (nop)
89 /* Make changes effective */
90 movl %eax, %cr0
91 /*
92 * At this point we're in long mode but in 32bit compatibility mode
93 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
94 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
95 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
96 */ 127 */
97 ljmp $__KERNEL_CS, $(startup_64 - __START_KERNEL_map) 128 jmp secondary_startup_64
98 129ENTRY(secondary_startup_64)
99 .code64 130 /*
100 .org 0x100 131 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
101 .globl startup_64 132 * and someone has loaded a mapped page table.
102startup_64: 133 *
103 /* We come here either from startup_32 134 * %esi holds a physical pointer to real_mode_data.
104 * or directly from a 64bit bootloader. 135 *
105 * Since we may have come directly from a bootloader we 136 * We come here either from startup_64 (using physical addresses)
106 * reload the page tables here. 137 * or from trampoline.S (using virtual addresses).
138 *
139 * Using virtual addresses from trampoline.S removes the need
140 * to have any identity mapped pages in the kernel page table
141 * after the boot processor executes this code.
107 */ 142 */
108 143
109 /* Enable PAE mode and PGE */ 144 /* Enable PAE mode and PGE */
@@ -113,9 +148,15 @@ startup_64:
113 movq %rax, %cr4 148 movq %rax, %cr4
114 149
115 /* Setup early boot stage 4 level pagetables. */ 150 /* Setup early boot stage 4 level pagetables. */
116 movq $(boot_level4_pgt - __START_KERNEL_map), %rax 151 movq $(init_level4_pgt - __START_KERNEL_map), %rax
152 addq phys_base(%rip), %rax
117 movq %rax, %cr3 153 movq %rax, %cr3
118 154
155 /* Ensure I am executing from virtual addresses */
156 movq $1f, %rax
157 jmp *%rax
1581:
159
119 /* Check if nx is implemented */ 160 /* Check if nx is implemented */
120 movl $0x80000001, %eax 161 movl $0x80000001, %eax
121 cpuid 162 cpuid
@@ -124,17 +165,11 @@ startup_64:
124 /* Setup EFER (Extended Feature Enable Register) */ 165 /* Setup EFER (Extended Feature Enable Register) */
125 movl $MSR_EFER, %ecx 166 movl $MSR_EFER, %ecx
126 rdmsr 167 rdmsr
127 168 btsl $_EFER_SCE, %eax /* Enable System Call */
128 /* Enable System Call */ 169 btl $20,%edi /* No Execute supported? */
129 btsl $_EFER_SCE, %eax
130
131 /* No Execute supported? */
132 btl $20,%edi
133 jnc 1f 170 jnc 1f
134 btsl $_EFER_NX, %eax 171 btsl $_EFER_NX, %eax
1351: 1721: wrmsr /* Make changes effective */
136 /* Make changes effective */
137 wrmsr
138 173
139 /* Setup cr0 */ 174 /* Setup cr0 */
140#define CR0_PM 1 /* protected mode */ 175#define CR0_PM 1 /* protected mode */
@@ -161,7 +196,7 @@ startup_64:
161 * addresses where we're currently running on. We have to do that here 196 * addresses where we're currently running on. We have to do that here
162 * because in 32bit we couldn't load a 64bit linear address. 197 * because in 32bit we couldn't load a 64bit linear address.
163 */ 198 */
164 lgdt cpu_gdt_descr 199 lgdt cpu_gdt_descr(%rip)
165 200
166 /* set up data segments. actually 0 would do too */ 201 /* set up data segments. actually 0 would do too */
167 movl $__KERNEL_DS,%eax 202 movl $__KERNEL_DS,%eax
@@ -212,6 +247,9 @@ initial_code:
212init_rsp: 247init_rsp:
213 .quad init_thread_union+THREAD_SIZE-8 248 .quad init_thread_union+THREAD_SIZE-8
214 249
250bad_address:
251 jmp bad_address
252
215ENTRY(early_idt_handler) 253ENTRY(early_idt_handler)
216 cmpl $2,early_recursion_flag(%rip) 254 cmpl $2,early_recursion_flag(%rip)
217 jz 1f 255 jz 1f
@@ -240,110 +278,66 @@ early_idt_msg:
240early_idt_ripmsg: 278early_idt_ripmsg:
241 .asciz "RIP %s\n" 279 .asciz "RIP %s\n"
242 280
243.code32 281.balign PAGE_SIZE
244ENTRY(no_long_mode)
245 /* This isn't an x86-64 CPU so hang */
2461:
247 jmp 1b
248
249.org 0xf00
250 .globl pGDT32
251pGDT32:
252 .word gdt_end-cpu_gdt_table-1
253 .long cpu_gdt_table-__START_KERNEL_map
254
255.org 0xf10
256ljumpvector:
257 .long startup_64-__START_KERNEL_map
258 .word __KERNEL_CS
259 282
260ENTRY(stext)
261ENTRY(_stext)
262
263 $page = 0
264#define NEXT_PAGE(name) \ 283#define NEXT_PAGE(name) \
265 $page = $page + 1; \ 284 .balign PAGE_SIZE; \
266 .org $page * 0x1000; \
267 phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \
268ENTRY(name) 285ENTRY(name)
269 286
287/* Automate the creation of 1 to 1 mapping pmd entries */
288#define PMDS(START, PERM, COUNT) \
289 i = 0 ; \
290 .rept (COUNT) ; \
291 .quad (START) + (i << 21) + (PERM) ; \
292 i = i + 1 ; \
293 .endr
294
295 /*
296 * This default setting generates an ident mapping at address 0x100000
297 * and a mapping for the kernel that precisely maps virtual address
298 * 0xffffffff80000000 to physical address 0x000000. (always using
299 * 2Mbyte large pages provided by PAE mode)
300 */
270NEXT_PAGE(init_level4_pgt) 301NEXT_PAGE(init_level4_pgt)
271 /* This gets initialized in x86_64_start_kernel */ 302 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
272 .fill 512,8,0 303 .fill 257,8,0
304 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
305 .fill 252,8,0
306 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
307 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
273 308
274NEXT_PAGE(level3_ident_pgt) 309NEXT_PAGE(level3_ident_pgt)
275 .quad phys_level2_ident_pgt | 0x007 310 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
276 .fill 511,8,0 311 .fill 511,8,0
277 312
278NEXT_PAGE(level3_kernel_pgt) 313NEXT_PAGE(level3_kernel_pgt)
279 .fill 510,8,0 314 .fill 510,8,0
280 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ 315 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
281 .quad phys_level2_kernel_pgt | 0x007 316 .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
282 .fill 1,8,0 317 .fill 1,8,0
283 318
284NEXT_PAGE(level2_ident_pgt) 319NEXT_PAGE(level2_ident_pgt)
285 /* 40MB for bootup. */ 320 /* Since I easily can, map the first 1G.
286 i = 0 321 * Don't set NX because code runs from these pages.
287 .rept 20 322 */
288 .quad i << 21 | 0x083 323 PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
289 i = i + 1 324
290 .endr
291 /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */
292 .globl temp_boot_pmds
293temp_boot_pmds:
294 .fill 492,8,0
295
296NEXT_PAGE(level2_kernel_pgt) 325NEXT_PAGE(level2_kernel_pgt)
297 /* 40MB kernel mapping. The kernel code cannot be bigger than that. 326 /* 40MB kernel mapping. The kernel code cannot be bigger than that.
298 When you change this change KERNEL_TEXT_SIZE in page.h too. */ 327 When you change this change KERNEL_TEXT_SIZE in page.h too. */
299 /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ 328 /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
300 i = 0 329 PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
301 .rept 20 330 KERNEL_TEXT_SIZE/PMD_SIZE)
302 .quad i << 21 | 0x183
303 i = i + 1
304 .endr
305 /* Module mapping starts here */ 331 /* Module mapping starts here */
306 .fill 492,8,0 332 .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
307 333
308NEXT_PAGE(level3_physmem_pgt) 334NEXT_PAGE(level2_spare_pgt)
309 .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ 335 .fill 512,8,0
310 .fill 511,8,0
311 336
337#undef PMDS
312#undef NEXT_PAGE 338#undef NEXT_PAGE
313 339
314 .data 340 .data
315
316#ifdef CONFIG_ACPI_SLEEP
317 .align PAGE_SIZE
318ENTRY(wakeup_level4_pgt)
319 .quad phys_level3_ident_pgt | 0x007
320 .fill 255,8,0
321 .quad phys_level3_physmem_pgt | 0x007
322 .fill 254,8,0
323 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
324 .quad phys_level3_kernel_pgt | 0x007
325#endif
326
327#ifndef CONFIG_HOTPLUG_CPU
328 __INITDATA
329#endif
330 /*
331 * This default setting generates an ident mapping at address 0x100000
332 * and a mapping for the kernel that precisely maps virtual address
333 * 0xffffffff80000000 to physical address 0x000000. (always using
334 * 2Mbyte large pages provided by PAE mode)
335 */
336 .align PAGE_SIZE
337ENTRY(boot_level4_pgt)
338 .quad phys_level3_ident_pgt | 0x007
339 .fill 255,8,0
340 .quad phys_level3_physmem_pgt | 0x007
341 .fill 254,8,0
342 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
343 .quad phys_level3_kernel_pgt | 0x007
344
345 .data
346
347 .align 16 341 .align 16
348 .globl cpu_gdt_descr 342 .globl cpu_gdt_descr
349cpu_gdt_descr: 343cpu_gdt_descr:
@@ -357,6 +351,10 @@ gdt:
357 .endr 351 .endr
358#endif 352#endif
359 353
354ENTRY(phys_base)
355 /* This must match the first entry in level2_kernel_pgt */
356 .quad 0x0000000000000000
357
360/* We need valid kernel segments for data and code in long mode too 358/* We need valid kernel segments for data and code in long mode too
361 * IRET will check the segment types kkeil 2000/10/28 359 * IRET will check the segment types kkeil 2000/10/28
362 * Also sysret mandates a special GDT layout 360 * Also sysret mandates a special GDT layout
@@ -370,13 +368,13 @@ gdt:
370 368
371ENTRY(cpu_gdt_table) 369ENTRY(cpu_gdt_table)
372 .quad 0x0000000000000000 /* NULL descriptor */ 370 .quad 0x0000000000000000 /* NULL descriptor */
371 .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
372 .quad 0x00af9b000000ffff /* __KERNEL_CS */
373 .quad 0x00cf93000000ffff /* __KERNEL_DS */
374 .quad 0x00cffb000000ffff /* __USER32_CS */
375 .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
376 .quad 0x00affb000000ffff /* __USER_CS */
373 .quad 0x0 /* unused */ 377 .quad 0x0 /* unused */
374 .quad 0x00af9a000000ffff /* __KERNEL_CS */
375 .quad 0x00cf92000000ffff /* __KERNEL_DS */
376 .quad 0x00cffa000000ffff /* __USER32_CS */
377 .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
378 .quad 0x00affa000000ffff /* __USER_CS */
379 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
380 .quad 0,0 /* TSS */ 378 .quad 0,0 /* TSS */
381 .quad 0,0 /* LDT */ 379 .quad 0,0 /* LDT */
382 .quad 0,0,0 /* three TLS descriptors */ 380 .quad 0,0,0 /* three TLS descriptors */
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 5f197b0a330a..213d90e04755 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -18,8 +18,16 @@
18#include <asm/setup.h> 18#include <asm/setup.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
20#include <asm/pgtable.h> 20#include <asm/pgtable.h>
21#include <asm/tlbflush.h>
21#include <asm/sections.h> 22#include <asm/sections.h>
22 23
24static void __init zap_identity_mappings(void)
25{
26 pgd_t *pgd = pgd_offset_k(0UL);
27 pgd_clear(pgd);
28 __flush_tlb();
29}
30
23/* Don't add a printk in there. printk relies on the PDA which is not initialized 31/* Don't add a printk in there. printk relies on the PDA which is not initialized
24 yet. */ 32 yet. */
25static void __init clear_bss(void) 33static void __init clear_bss(void)
@@ -29,25 +37,24 @@ static void __init clear_bss(void)
29} 37}
30 38
31#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ 39#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
32#define OLD_CL_MAGIC_ADDR 0x90020 40#define OLD_CL_MAGIC_ADDR 0x20
33#define OLD_CL_MAGIC 0xA33F 41#define OLD_CL_MAGIC 0xA33F
34#define OLD_CL_BASE_ADDR 0x90000 42#define OLD_CL_OFFSET 0x22
35#define OLD_CL_OFFSET 0x90022
36 43
37static void __init copy_bootdata(char *real_mode_data) 44static void __init copy_bootdata(char *real_mode_data)
38{ 45{
39 int new_data; 46 unsigned long new_data;
40 char * command_line; 47 char * command_line;
41 48
42 memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); 49 memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
43 new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); 50 new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
44 if (!new_data) { 51 if (!new_data) {
45 if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { 52 if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
46 return; 53 return;
47 } 54 }
48 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; 55 new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
49 } 56 }
50 command_line = (char *) ((u64)(new_data)); 57 command_line = __va(new_data);
51 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); 58 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
52} 59}
53 60
@@ -55,26 +62,30 @@ void __init x86_64_start_kernel(char * real_mode_data)
55{ 62{
56 int i; 63 int i;
57 64
65 /*
66 * Make sure kernel is aligned to 2MB address. Catching it at compile
67 * time is better. Change your config file and compile the kernel
68 * for a 2MB aligned address (CONFIG_PHYSICAL_START)
69 */
70 BUILD_BUG_ON(CONFIG_PHYSICAL_START & (__KERNEL_ALIGN - 1));
71
58 /* clear bss before set_intr_gate with early_idt_handler */ 72 /* clear bss before set_intr_gate with early_idt_handler */
59 clear_bss(); 73 clear_bss();
60 74
75 /* Make NULL pointers segfault */
76 zap_identity_mappings();
77
61 for (i = 0; i < IDT_ENTRIES; i++) 78 for (i = 0; i < IDT_ENTRIES; i++)
62 set_intr_gate(i, early_idt_handler); 79 set_intr_gate(i, early_idt_handler);
63 asm volatile("lidt %0" :: "m" (idt_descr)); 80 asm volatile("lidt %0" :: "m" (idt_descr));
64 81
65 early_printk("Kernel alive\n"); 82 early_printk("Kernel alive\n");
66 83
67 /*
68 * switch to init_level4_pgt from boot_level4_pgt
69 */
70 memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
71 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
72
73 for (i = 0; i < NR_CPUS; i++) 84 for (i = 0; i < NR_CPUS; i++)
74 cpu_pda(i) = &boot_cpu_pda[i]; 85 cpu_pda(i) = &boot_cpu_pda[i];
75 86
76 pda_init(0); 87 pda_init(0);
77 copy_bootdata(real_mode_data); 88 copy_bootdata(__va(real_mode_data));
78#ifdef CONFIG_SMP 89#ifdef CONFIG_SMP
79 cpu_set(0, cpu_online_map); 90 cpu_set(0, cpu_online_map);
80#endif 91#endif
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index b7d2b76b92d4..2a2df14dab7e 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -907,10 +907,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
907 enable_8259A_irq(0); 907 enable_8259A_irq(0);
908} 908}
909 909
910void __init UNEXPECTED_IO_APIC(void)
911{
912}
913
914void __apicdebuginit print_IO_APIC(void) 910void __apicdebuginit print_IO_APIC(void)
915{ 911{
916 int apic, i; 912 int apic, i;
@@ -946,40 +942,16 @@ void __apicdebuginit print_IO_APIC(void)
946 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); 942 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
947 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 943 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
948 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 944 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
949 if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
950 UNEXPECTED_IO_APIC();
951 945
952 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01); 946 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
953 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); 947 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
954 if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
955 (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
956 (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
957 (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
958 (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
959 (reg_01.bits.entries != 0x2E) &&
960 (reg_01.bits.entries != 0x3F) &&
961 (reg_01.bits.entries != 0x03)
962 )
963 UNEXPECTED_IO_APIC();
964 948
965 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); 949 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
966 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); 950 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
967 if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
968 (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
969 (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
970 (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
971 (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
972 (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
973 )
974 UNEXPECTED_IO_APIC();
975 if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
976 UNEXPECTED_IO_APIC();
977 951
978 if (reg_01.bits.version >= 0x10) { 952 if (reg_01.bits.version >= 0x10) {
979 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); 953 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
980 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); 954 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
981 if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
982 UNEXPECTED_IO_APIC();
983 } 955 }
984 956
985 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 957 printk(KERN_DEBUG ".... IRQ redirection table:\n");
@@ -1407,8 +1379,7 @@ static void irq_complete_move(unsigned int irq)
1407 1379
1408 vector = ~get_irq_regs()->orig_rax; 1380 vector = ~get_irq_regs()->orig_rax;
1409 me = smp_processor_id(); 1381 me = smp_processor_id();
1410 if ((vector == cfg->vector) && 1382 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
1411 cpu_isset(smp_processor_id(), cfg->domain)) {
1412 cpumask_t cleanup_mask; 1383 cpumask_t cleanup_mask;
1413 1384
1414 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 1385 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c
index 745b1f0f494e..387d347b0e07 100644
--- a/arch/x86_64/kernel/ioport.c
+++ b/arch/x86_64/kernel/ioport.c
@@ -16,6 +16,7 @@
16#include <linux/stddef.h> 16#include <linux/stddef.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/thread_info.h> 18#include <linux/thread_info.h>
19#include <linux/syscalls.h>
19 20
20/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ 21/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
21static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) 22static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
index 0497e3bd5bff..a8bb33c1a8f2 100644
--- a/arch/x86_64/kernel/machine_kexec.c
+++ b/arch/x86_64/kernel/machine_kexec.c
@@ -191,19 +191,19 @@ NORET_TYPE void machine_kexec(struct kimage *image)
191 191
192 page_list[PA_CONTROL_PAGE] = __pa(control_page); 192 page_list[PA_CONTROL_PAGE] = __pa(control_page);
193 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; 193 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
194 page_list[PA_PGD] = __pa(kexec_pgd); 194 page_list[PA_PGD] = __pa_symbol(&kexec_pgd);
195 page_list[VA_PGD] = (unsigned long)kexec_pgd; 195 page_list[VA_PGD] = (unsigned long)kexec_pgd;
196 page_list[PA_PUD_0] = __pa(kexec_pud0); 196 page_list[PA_PUD_0] = __pa_symbol(&kexec_pud0);
197 page_list[VA_PUD_0] = (unsigned long)kexec_pud0; 197 page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
198 page_list[PA_PMD_0] = __pa(kexec_pmd0); 198 page_list[PA_PMD_0] = __pa_symbol(&kexec_pmd0);
199 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; 199 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
200 page_list[PA_PTE_0] = __pa(kexec_pte0); 200 page_list[PA_PTE_0] = __pa_symbol(&kexec_pte0);
201 page_list[VA_PTE_0] = (unsigned long)kexec_pte0; 201 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
202 page_list[PA_PUD_1] = __pa(kexec_pud1); 202 page_list[PA_PUD_1] = __pa_symbol(&kexec_pud1);
203 page_list[VA_PUD_1] = (unsigned long)kexec_pud1; 203 page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
204 page_list[PA_PMD_1] = __pa(kexec_pmd1); 204 page_list[PA_PMD_1] = __pa_symbol(&kexec_pmd1);
205 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; 205 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
206 page_list[PA_PTE_1] = __pa(kexec_pte1); 206 page_list[PA_PTE_1] = __pa_symbol(&kexec_pte1);
207 page_list[VA_PTE_1] = (unsigned long)kexec_pte1; 207 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
208 208
209 page_list[PA_TABLE_PAGE] = 209 page_list[PA_TABLE_PAGE] =
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 8011a8e1c7d4..fa2672682477 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -323,10 +323,13 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
323#endif /* CONFIG_X86_MCE_INTEL */ 323#endif /* CONFIG_X86_MCE_INTEL */
324 324
325/* 325/*
326 * Periodic polling timer for "silent" machine check errors. 326 * Periodic polling timer for "silent" machine check errors. If the
327 * poller finds an MCE, poll 2x faster. When the poller finds no more
328 * errors, poll 2x slower (up to check_interval seconds).
327 */ 329 */
328 330
329static int check_interval = 5 * 60; /* 5 minutes */ 331static int check_interval = 5 * 60; /* 5 minutes */
332static int next_interval; /* in jiffies */
330static void mcheck_timer(struct work_struct *work); 333static void mcheck_timer(struct work_struct *work);
331static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); 334static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
332 335
@@ -339,7 +342,6 @@ static void mcheck_check_cpu(void *info)
339static void mcheck_timer(struct work_struct *work) 342static void mcheck_timer(struct work_struct *work)
340{ 343{
341 on_each_cpu(mcheck_check_cpu, NULL, 1, 1); 344 on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
342 schedule_delayed_work(&mcheck_work, check_interval * HZ);
343 345
344 /* 346 /*
345 * It's ok to read stale data here for notify_user and 347 * It's ok to read stale data here for notify_user and
@@ -349,17 +351,30 @@ static void mcheck_timer(struct work_struct *work)
349 * writes. 351 * writes.
350 */ 352 */
351 if (notify_user && console_logged) { 353 if (notify_user && console_logged) {
354 static unsigned long last_print;
355 unsigned long now = jiffies;
356
357 /* if we logged an MCE, reduce the polling interval */
358 next_interval = max(next_interval/2, HZ/100);
352 notify_user = 0; 359 notify_user = 0;
353 clear_bit(0, &console_logged); 360 clear_bit(0, &console_logged);
354 printk(KERN_INFO "Machine check events logged\n"); 361 if (time_after_eq(now, last_print + (check_interval*HZ))) {
362 last_print = now;
363 printk(KERN_INFO "Machine check events logged\n");
364 }
365 } else {
366 next_interval = min(next_interval*2, check_interval*HZ);
355 } 367 }
368
369 schedule_delayed_work(&mcheck_work, next_interval);
356} 370}
357 371
358 372
359static __init int periodic_mcheck_init(void) 373static __init int periodic_mcheck_init(void)
360{ 374{
361 if (check_interval) 375 next_interval = check_interval * HZ;
362 schedule_delayed_work(&mcheck_work, check_interval*HZ); 376 if (next_interval)
377 schedule_delayed_work(&mcheck_work, next_interval);
363 return 0; 378 return 0;
364} 379}
365__initcall(periodic_mcheck_init); 380__initcall(periodic_mcheck_init);
@@ -597,12 +612,13 @@ static int mce_resume(struct sys_device *dev)
597/* Reinit MCEs after user configuration changes */ 612/* Reinit MCEs after user configuration changes */
598static void mce_restart(void) 613static void mce_restart(void)
599{ 614{
600 if (check_interval) 615 if (next_interval)
601 cancel_delayed_work(&mcheck_work); 616 cancel_delayed_work(&mcheck_work);
602 /* Timer race is harmless here */ 617 /* Timer race is harmless here */
603 on_each_cpu(mce_init, NULL, 1, 1); 618 on_each_cpu(mce_init, NULL, 1, 1);
604 if (check_interval) 619 next_interval = check_interval * HZ;
605 schedule_delayed_work(&mcheck_work, check_interval*HZ); 620 if (next_interval)
621 schedule_delayed_work(&mcheck_work, next_interval);
606} 622}
607 623
608static struct sysdev_class mce_sysclass = { 624static struct sysdev_class mce_sysclass = {
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 455aa0b932f0..d0dc4891599b 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -300,7 +300,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
300 } 300 }
301 } 301 }
302 } 302 }
303 clustered_apic_check(); 303 setup_apic_routing();
304 if (!num_processors) 304 if (!num_processors)
305 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 305 printk(KERN_ERR "MPTABLE: no processors registered!\n");
306 return num_processors; 306 return num_processors;
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index dfab9f167366..6cd2b30e2ffc 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -27,28 +27,11 @@
27#include <asm/proto.h> 27#include <asm/proto.h>
28#include <asm/kdebug.h> 28#include <asm/kdebug.h>
29#include <asm/mce.h> 29#include <asm/mce.h>
30#include <asm/intel_arch_perfmon.h>
31 30
32int unknown_nmi_panic; 31int unknown_nmi_panic;
33int nmi_watchdog_enabled; 32int nmi_watchdog_enabled;
34int panic_on_unrecovered_nmi; 33int panic_on_unrecovered_nmi;
35 34
36/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
37 * evtsel_nmi_owner tracks the ownership of the event selection
38 * - different performance counters/ event selection may be reserved for
39 * different subsystems this reservation system just tries to coordinate
40 * things a little
41 */
42
43/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
44 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
45 */
46#define NMI_MAX_COUNTER_BITS 66
47#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
48
49static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
50static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
51
52static cpumask_t backtrace_mask = CPU_MASK_NONE; 35static cpumask_t backtrace_mask = CPU_MASK_NONE;
53 36
54/* nmi_active: 37/* nmi_active:
@@ -63,191 +46,11 @@ int panic_on_timeout;
63unsigned int nmi_watchdog = NMI_DEFAULT; 46unsigned int nmi_watchdog = NMI_DEFAULT;
64static unsigned int nmi_hz = HZ; 47static unsigned int nmi_hz = HZ;
65 48
66struct nmi_watchdog_ctlblk { 49static DEFINE_PER_CPU(short, wd_enabled);
67 int enabled;
68 u64 check_bit;
69 unsigned int cccr_msr;
70 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
71 unsigned int evntsel_msr; /* the MSR to select the events to handle */
72};
73static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
74 50
75/* local prototypes */ 51/* local prototypes */
76static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); 52static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
77 53
78/* converts an msr to an appropriate reservation bit */
79static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
80{
81 /* returns the bit offset of the performance counter register */
82 switch (boot_cpu_data.x86_vendor) {
83 case X86_VENDOR_AMD:
84 return (msr - MSR_K7_PERFCTR0);
85 case X86_VENDOR_INTEL:
86 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
87 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
88 else
89 return (msr - MSR_P4_BPU_PERFCTR0);
90 }
91 return 0;
92}
93
94/* converts an msr to an appropriate reservation bit */
95static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
96{
97 /* returns the bit offset of the event selection register */
98 switch (boot_cpu_data.x86_vendor) {
99 case X86_VENDOR_AMD:
100 return (msr - MSR_K7_EVNTSEL0);
101 case X86_VENDOR_INTEL:
102 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
103 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
104 else
105 return (msr - MSR_P4_BSU_ESCR0);
106 }
107 return 0;
108}
109
110/* checks for a bit availability (hack for oprofile) */
111int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
112{
113 int cpu;
114 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
115 for_each_possible_cpu (cpu) {
116 if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
117 return 0;
118 }
119 return 1;
120}
121
122/* checks the an msr for availability */
123int avail_to_resrv_perfctr_nmi(unsigned int msr)
124{
125 unsigned int counter;
126 int cpu;
127
128 counter = nmi_perfctr_msr_to_bit(msr);
129 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
130
131 for_each_possible_cpu (cpu) {
132 if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
133 return 0;
134 }
135 return 1;
136}
137
138static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
139{
140 unsigned int counter;
141 if (cpu < 0)
142 cpu = smp_processor_id();
143
144 counter = nmi_perfctr_msr_to_bit(msr);
145 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
146
147 if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
148 return 1;
149 return 0;
150}
151
152static void __release_perfctr_nmi(int cpu, unsigned int msr)
153{
154 unsigned int counter;
155 if (cpu < 0)
156 cpu = smp_processor_id();
157
158 counter = nmi_perfctr_msr_to_bit(msr);
159 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
160
161 clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu));
162}
163
164int reserve_perfctr_nmi(unsigned int msr)
165{
166 int cpu, i;
167 for_each_possible_cpu (cpu) {
168 if (!__reserve_perfctr_nmi(cpu, msr)) {
169 for_each_possible_cpu (i) {
170 if (i >= cpu)
171 break;
172 __release_perfctr_nmi(i, msr);
173 }
174 return 0;
175 }
176 }
177 return 1;
178}
179
180void release_perfctr_nmi(unsigned int msr)
181{
182 int cpu;
183 for_each_possible_cpu (cpu)
184 __release_perfctr_nmi(cpu, msr);
185}
186
187int __reserve_evntsel_nmi(int cpu, unsigned int msr)
188{
189 unsigned int counter;
190 if (cpu < 0)
191 cpu = smp_processor_id();
192
193 counter = nmi_evntsel_msr_to_bit(msr);
194 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
195
196 if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
197 return 1;
198 return 0;
199}
200
201static void __release_evntsel_nmi(int cpu, unsigned int msr)
202{
203 unsigned int counter;
204 if (cpu < 0)
205 cpu = smp_processor_id();
206
207 counter = nmi_evntsel_msr_to_bit(msr);
208 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
209
210 clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
211}
212
213int reserve_evntsel_nmi(unsigned int msr)
214{
215 int cpu, i;
216 for_each_possible_cpu (cpu) {
217 if (!__reserve_evntsel_nmi(cpu, msr)) {
218 for_each_possible_cpu (i) {
219 if (i >= cpu)
220 break;
221 __release_evntsel_nmi(i, msr);
222 }
223 return 0;
224 }
225 }
226 return 1;
227}
228
229void release_evntsel_nmi(unsigned int msr)
230{
231 int cpu;
232 for_each_possible_cpu (cpu) {
233 __release_evntsel_nmi(cpu, msr);
234 }
235}
236
237static __cpuinit inline int nmi_known_cpu(void)
238{
239 switch (boot_cpu_data.x86_vendor) {
240 case X86_VENDOR_AMD:
241 return boot_cpu_data.x86 == 15 || boot_cpu_data.x86 == 16;
242 case X86_VENDOR_INTEL:
243 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
244 return 1;
245 else
246 return (boot_cpu_data.x86 == 15);
247 }
248 return 0;
249}
250
251/* Run after command line and cpu_init init, but before all other checks */ 54/* Run after command line and cpu_init init, but before all other checks */
252void nmi_watchdog_default(void) 55void nmi_watchdog_default(void)
253{ 56{
@@ -277,23 +80,6 @@ static __init void nmi_cpu_busy(void *data)
277} 80}
278#endif 81#endif
279 82
280static unsigned int adjust_for_32bit_ctr(unsigned int hz)
281{
282 unsigned int retval = hz;
283
284 /*
285 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
286 * are writable, with higher bits sign extending from bit 31.
287 * So, we can only program the counter with 31 bit values and
288 * 32nd bit should be 1, for 33.. to be 1.
289 * Find the appropriate nmi_hz
290 */
291 if ((((u64)cpu_khz * 1000) / retval) > 0x7fffffffULL) {
292 retval = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
293 }
294 return retval;
295}
296
297int __init check_nmi_watchdog (void) 83int __init check_nmi_watchdog (void)
298{ 84{
299 int *counts; 85 int *counts;
@@ -322,14 +108,14 @@ int __init check_nmi_watchdog (void)
322 mdelay((20*1000)/nmi_hz); // wait 20 ticks 108 mdelay((20*1000)/nmi_hz); // wait 20 ticks
323 109
324 for_each_online_cpu(cpu) { 110 for_each_online_cpu(cpu) {
325 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) 111 if (!per_cpu(wd_enabled, cpu))
326 continue; 112 continue;
327 if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { 113 if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
328 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", 114 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
329 cpu, 115 cpu,
330 counts[cpu], 116 counts[cpu],
331 cpu_pda(cpu)->__nmi_count); 117 cpu_pda(cpu)->__nmi_count);
332 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; 118 per_cpu(wd_enabled, cpu) = 0;
333 atomic_dec(&nmi_active); 119 atomic_dec(&nmi_active);
334 } 120 }
335 } 121 }
@@ -344,13 +130,8 @@ int __init check_nmi_watchdog (void)
344 130
345 /* now that we know it works we can reduce NMI frequency to 131 /* now that we know it works we can reduce NMI frequency to
346 something more reasonable; makes a difference in some configs */ 132 something more reasonable; makes a difference in some configs */
347 if (nmi_watchdog == NMI_LOCAL_APIC) { 133 if (nmi_watchdog == NMI_LOCAL_APIC)
348 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 134 nmi_hz = lapic_adjust_nmi_hz(1);
349
350 nmi_hz = 1;
351 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0)
352 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
353 }
354 135
355 kfree(counts); 136 kfree(counts);
356 return 0; 137 return 0;
@@ -379,57 +160,6 @@ int __init setup_nmi_watchdog(char *str)
379 160
380__setup("nmi_watchdog=", setup_nmi_watchdog); 161__setup("nmi_watchdog=", setup_nmi_watchdog);
381 162
382static void disable_lapic_nmi_watchdog(void)
383{
384 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
385
386 if (atomic_read(&nmi_active) <= 0)
387 return;
388
389 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
390
391 BUG_ON(atomic_read(&nmi_active) != 0);
392}
393
394static void enable_lapic_nmi_watchdog(void)
395{
396 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
397
398 /* are we already enabled */
399 if (atomic_read(&nmi_active) != 0)
400 return;
401
402 /* are we lapic aware */
403 if (nmi_known_cpu() <= 0)
404 return;
405
406 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
407 touch_nmi_watchdog();
408}
409
410void disable_timer_nmi_watchdog(void)
411{
412 BUG_ON(nmi_watchdog != NMI_IO_APIC);
413
414 if (atomic_read(&nmi_active) <= 0)
415 return;
416
417 disable_irq(0);
418 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
419
420 BUG_ON(atomic_read(&nmi_active) != 0);
421}
422
423void enable_timer_nmi_watchdog(void)
424{
425 BUG_ON(nmi_watchdog != NMI_IO_APIC);
426
427 if (atomic_read(&nmi_active) == 0) {
428 touch_nmi_watchdog();
429 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
430 enable_irq(0);
431 }
432}
433 163
434static void __acpi_nmi_disable(void *__unused) 164static void __acpi_nmi_disable(void *__unused)
435{ 165{
@@ -515,275 +245,9 @@ late_initcall(init_lapic_nmi_sysfs);
515 245
516#endif /* CONFIG_PM */ 246#endif /* CONFIG_PM */
517 247
518/*
519 * Activate the NMI watchdog via the local APIC.
520 * Original code written by Keith Owens.
521 */
522
523/* Note that these events don't tick when the CPU idles. This means
524 the frequency varies with CPU load. */
525
526#define K7_EVNTSEL_ENABLE (1 << 22)
527#define K7_EVNTSEL_INT (1 << 20)
528#define K7_EVNTSEL_OS (1 << 17)
529#define K7_EVNTSEL_USR (1 << 16)
530#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
531#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
532
533static int setup_k7_watchdog(void)
534{
535 unsigned int perfctr_msr, evntsel_msr;
536 unsigned int evntsel;
537 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
538
539 perfctr_msr = MSR_K7_PERFCTR0;
540 evntsel_msr = MSR_K7_EVNTSEL0;
541 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
542 goto fail;
543
544 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
545 goto fail1;
546
547 /* Simulator may not support it */
548 if (checking_wrmsrl(evntsel_msr, 0UL))
549 goto fail2;
550 wrmsrl(perfctr_msr, 0UL);
551
552 evntsel = K7_EVNTSEL_INT
553 | K7_EVNTSEL_OS
554 | K7_EVNTSEL_USR
555 | K7_NMI_EVENT;
556
557 /* setup the timer */
558 wrmsr(evntsel_msr, evntsel, 0);
559 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
560 apic_write(APIC_LVTPC, APIC_DM_NMI);
561 evntsel |= K7_EVNTSEL_ENABLE;
562 wrmsr(evntsel_msr, evntsel, 0);
563
564 wd->perfctr_msr = perfctr_msr;
565 wd->evntsel_msr = evntsel_msr;
566 wd->cccr_msr = 0; //unused
567 wd->check_bit = 1ULL<<63;
568 return 1;
569fail2:
570 __release_evntsel_nmi(-1, evntsel_msr);
571fail1:
572 __release_perfctr_nmi(-1, perfctr_msr);
573fail:
574 return 0;
575}
576
577static void stop_k7_watchdog(void)
578{
579 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
580
581 wrmsr(wd->evntsel_msr, 0, 0);
582
583 __release_evntsel_nmi(-1, wd->evntsel_msr);
584 __release_perfctr_nmi(-1, wd->perfctr_msr);
585}
586
587/* Note that these events don't tick when the CPU idles. This means
588 the frequency varies with CPU load. */
589
590#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
591#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
592#define P4_ESCR_OS (1<<3)
593#define P4_ESCR_USR (1<<2)
594#define P4_CCCR_OVF_PMI0 (1<<26)
595#define P4_CCCR_OVF_PMI1 (1<<27)
596#define P4_CCCR_THRESHOLD(N) ((N)<<20)
597#define P4_CCCR_COMPLEMENT (1<<19)
598#define P4_CCCR_COMPARE (1<<18)
599#define P4_CCCR_REQUIRED (3<<16)
600#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
601#define P4_CCCR_ENABLE (1<<12)
602#define P4_CCCR_OVF (1<<31)
603/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
604 CRU_ESCR0 (with any non-null event selector) through a complemented
605 max threshold. [IA32-Vol3, Section 14.9.9] */
606
607static int setup_p4_watchdog(void)
608{
609 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
610 unsigned int evntsel, cccr_val;
611 unsigned int misc_enable, dummy;
612 unsigned int ht_num;
613 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
614
615 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
616 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
617 return 0;
618
619#ifdef CONFIG_SMP
620 /* detect which hyperthread we are on */
621 if (smp_num_siblings == 2) {
622 unsigned int ebx, apicid;
623
624 ebx = cpuid_ebx(1);
625 apicid = (ebx >> 24) & 0xff;
626 ht_num = apicid & 1;
627 } else
628#endif
629 ht_num = 0;
630
631 /* performance counters are shared resources
632 * assign each hyperthread its own set
633 * (re-use the ESCR0 register, seems safe
634 * and keeps the cccr_val the same)
635 */
636 if (!ht_num) {
637 /* logical cpu 0 */
638 perfctr_msr = MSR_P4_IQ_PERFCTR0;
639 evntsel_msr = MSR_P4_CRU_ESCR0;
640 cccr_msr = MSR_P4_IQ_CCCR0;
641 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
642 } else {
643 /* logical cpu 1 */
644 perfctr_msr = MSR_P4_IQ_PERFCTR1;
645 evntsel_msr = MSR_P4_CRU_ESCR0;
646 cccr_msr = MSR_P4_IQ_CCCR1;
647 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
648 }
649
650 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
651 goto fail;
652
653 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
654 goto fail1;
655
656 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
657 | P4_ESCR_OS
658 | P4_ESCR_USR;
659
660 cccr_val |= P4_CCCR_THRESHOLD(15)
661 | P4_CCCR_COMPLEMENT
662 | P4_CCCR_COMPARE
663 | P4_CCCR_REQUIRED;
664
665 wrmsr(evntsel_msr, evntsel, 0);
666 wrmsr(cccr_msr, cccr_val, 0);
667 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
668 apic_write(APIC_LVTPC, APIC_DM_NMI);
669 cccr_val |= P4_CCCR_ENABLE;
670 wrmsr(cccr_msr, cccr_val, 0);
671
672 wd->perfctr_msr = perfctr_msr;
673 wd->evntsel_msr = evntsel_msr;
674 wd->cccr_msr = cccr_msr;
675 wd->check_bit = 1ULL<<39;
676 return 1;
677fail1:
678 __release_perfctr_nmi(-1, perfctr_msr);
679fail:
680 return 0;
681}
682
683static void stop_p4_watchdog(void)
684{
685 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
686
687 wrmsr(wd->cccr_msr, 0, 0);
688 wrmsr(wd->evntsel_msr, 0, 0);
689
690 __release_evntsel_nmi(-1, wd->evntsel_msr);
691 __release_perfctr_nmi(-1, wd->perfctr_msr);
692}
693
694#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
695#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
696
697static int setup_intel_arch_watchdog(void)
698{
699 unsigned int ebx;
700 union cpuid10_eax eax;
701 unsigned int unused;
702 unsigned int perfctr_msr, evntsel_msr;
703 unsigned int evntsel;
704 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
705
706 /*
707 * Check whether the Architectural PerfMon supports
708 * Unhalted Core Cycles Event or not.
709 * NOTE: Corresponding bit = 0 in ebx indicates event present.
710 */
711 cpuid(10, &(eax.full), &ebx, &unused, &unused);
712 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
713 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
714 goto fail;
715
716 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
717 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
718
719 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
720 goto fail;
721
722 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
723 goto fail1;
724
725 wrmsrl(perfctr_msr, 0UL);
726
727 evntsel = ARCH_PERFMON_EVENTSEL_INT
728 | ARCH_PERFMON_EVENTSEL_OS
729 | ARCH_PERFMON_EVENTSEL_USR
730 | ARCH_PERFMON_NMI_EVENT_SEL
731 | ARCH_PERFMON_NMI_EVENT_UMASK;
732
733 /* setup the timer */
734 wrmsr(evntsel_msr, evntsel, 0);
735
736 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
737 wrmsr(perfctr_msr, (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
738
739 apic_write(APIC_LVTPC, APIC_DM_NMI);
740 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
741 wrmsr(evntsel_msr, evntsel, 0);
742
743 wd->perfctr_msr = perfctr_msr;
744 wd->evntsel_msr = evntsel_msr;
745 wd->cccr_msr = 0; //unused
746 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
747 return 1;
748fail1:
749 __release_perfctr_nmi(-1, perfctr_msr);
750fail:
751 return 0;
752}
753
754static void stop_intel_arch_watchdog(void)
755{
756 unsigned int ebx;
757 union cpuid10_eax eax;
758 unsigned int unused;
759 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
760
761 /*
762 * Check whether the Architectural PerfMon supports
763 * Unhalted Core Cycles Event or not.
764 * NOTE: Corresponding bit = 0 in ebx indicates event present.
765 */
766 cpuid(10, &(eax.full), &ebx, &unused, &unused);
767 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
768 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
769 return;
770
771 wrmsr(wd->evntsel_msr, 0, 0);
772
773 __release_evntsel_nmi(-1, wd->evntsel_msr);
774 __release_perfctr_nmi(-1, wd->perfctr_msr);
775}
776
777void setup_apic_nmi_watchdog(void *unused) 248void setup_apic_nmi_watchdog(void *unused)
778{ 249{
779 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 250 if (__get_cpu_var(wd_enabled) == 1)
780
781 /* only support LOCAL and IO APICs for now */
782 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
783 (nmi_watchdog != NMI_IO_APIC))
784 return;
785
786 if (wd->enabled == 1)
787 return; 251 return;
788 252
789 /* cheap hack to support suspend/resume */ 253 /* cheap hack to support suspend/resume */
@@ -791,62 +255,31 @@ void setup_apic_nmi_watchdog(void *unused)
791 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) 255 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
792 return; 256 return;
793 257
794 if (nmi_watchdog == NMI_LOCAL_APIC) { 258 switch (nmi_watchdog) {
795 switch (boot_cpu_data.x86_vendor) { 259 case NMI_LOCAL_APIC:
796 case X86_VENDOR_AMD: 260 __get_cpu_var(wd_enabled) = 1;
797 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) 261 if (lapic_watchdog_init(nmi_hz) < 0) {
798 return; 262 __get_cpu_var(wd_enabled) = 0;
799 if (!setup_k7_watchdog())
800 return;
801 break;
802 case X86_VENDOR_INTEL:
803 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
804 if (!setup_intel_arch_watchdog())
805 return;
806 break;
807 }
808 if (!setup_p4_watchdog())
809 return;
810 break;
811 default:
812 return; 263 return;
813 } 264 }
265 /* FALL THROUGH */
266 case NMI_IO_APIC:
267 __get_cpu_var(wd_enabled) = 1;
268 atomic_inc(&nmi_active);
814 } 269 }
815 wd->enabled = 1;
816 atomic_inc(&nmi_active);
817} 270}
818 271
819void stop_apic_nmi_watchdog(void *unused) 272void stop_apic_nmi_watchdog(void *unused)
820{ 273{
821 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
822
823 /* only support LOCAL and IO APICs for now */ 274 /* only support LOCAL and IO APICs for now */
824 if ((nmi_watchdog != NMI_LOCAL_APIC) && 275 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
825 (nmi_watchdog != NMI_IO_APIC)) 276 (nmi_watchdog != NMI_IO_APIC))
826 return; 277 return;
827 278 if (__get_cpu_var(wd_enabled) == 0)
828 if (wd->enabled == 0)
829 return; 279 return;
830 280 if (nmi_watchdog == NMI_LOCAL_APIC)
831 if (nmi_watchdog == NMI_LOCAL_APIC) { 281 lapic_watchdog_stop();
832 switch (boot_cpu_data.x86_vendor) { 282 __get_cpu_var(wd_enabled) = 0;
833 case X86_VENDOR_AMD:
834 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
835 return;
836 stop_k7_watchdog();
837 break;
838 case X86_VENDOR_INTEL:
839 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
840 stop_intel_arch_watchdog();
841 break;
842 }
843 stop_p4_watchdog();
844 break;
845 default:
846 return;
847 }
848 }
849 wd->enabled = 0;
850 atomic_dec(&nmi_active); 283 atomic_dec(&nmi_active);
851} 284}
852 285
@@ -885,9 +318,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
885 int sum; 318 int sum;
886 int touched = 0; 319 int touched = 0;
887 int cpu = smp_processor_id(); 320 int cpu = smp_processor_id();
888 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 321 int rc = 0;
889 u64 dummy;
890 int rc=0;
891 322
892 /* check for other users first */ 323 /* check for other users first */
893 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 324 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
@@ -934,55 +365,20 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
934 } 365 }
935 366
936 /* see if the nmi watchdog went off */ 367 /* see if the nmi watchdog went off */
937 if (wd->enabled) { 368 if (!__get_cpu_var(wd_enabled))
938 if (nmi_watchdog == NMI_LOCAL_APIC) { 369 return rc;
939 rdmsrl(wd->perfctr_msr, dummy); 370 switch (nmi_watchdog) {
940 if (dummy & wd->check_bit){ 371 case NMI_LOCAL_APIC:
941 /* this wasn't a watchdog timer interrupt */ 372 rc |= lapic_wd_event(nmi_hz);
942 goto done; 373 break;
943 } 374 case NMI_IO_APIC:
944 375 /* don't know how to accurately check for this.
945 /* only Intel uses the cccr msr */ 376 * just assume it was a watchdog timer interrupt
946 if (wd->cccr_msr != 0) { 377 * This matches the old behaviour.
947 /* 378 */
948 * P4 quirks: 379 rc = 1;
949 * - An overflown perfctr will assert its interrupt 380 break;
950 * until the OVF flag in its CCCR is cleared.
951 * - LVTPC is masked on interrupt and must be
952 * unmasked by the LVTPC handler.
953 */
954 rdmsrl(wd->cccr_msr, dummy);
955 dummy &= ~P4_CCCR_OVF;
956 wrmsrl(wd->cccr_msr, dummy);
957 apic_write(APIC_LVTPC, APIC_DM_NMI);
958 /* start the cycle over again */
959 wrmsrl(wd->perfctr_msr,
960 -((u64)cpu_khz * 1000 / nmi_hz));
961 } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
962 /*
963 * ArchPerfom/Core Duo needs to re-unmask
964 * the apic vector
965 */
966 apic_write(APIC_LVTPC, APIC_DM_NMI);
967 /* ARCH_PERFMON has 32 bit counter writes */
968 wrmsr(wd->perfctr_msr,
969 (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
970 } else {
971 /* start the cycle over again */
972 wrmsrl(wd->perfctr_msr,
973 -((u64)cpu_khz * 1000 / nmi_hz));
974 }
975 rc = 1;
976 } else if (nmi_watchdog == NMI_IO_APIC) {
977 /* don't know how to accurately check for this.
978 * just assume it was a watchdog timer interrupt
979 * This matches the old behaviour.
980 */
981 rc = 1;
982 } else
983 printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
984 } 381 }
985done:
986 return rc; 382 return rc;
987} 383}
988 384
@@ -1067,12 +463,4 @@ void __trigger_all_cpu_backtrace(void)
1067 463
1068EXPORT_SYMBOL(nmi_active); 464EXPORT_SYMBOL(nmi_active);
1069EXPORT_SYMBOL(nmi_watchdog); 465EXPORT_SYMBOL(nmi_watchdog);
1070EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1071EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1072EXPORT_SYMBOL(reserve_perfctr_nmi);
1073EXPORT_SYMBOL(release_perfctr_nmi);
1074EXPORT_SYMBOL(reserve_evntsel_nmi);
1075EXPORT_SYMBOL(release_evntsel_nmi);
1076EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1077EXPORT_SYMBOL(enable_timer_nmi_watchdog);
1078EXPORT_SYMBOL(touch_nmi_watchdog); 466EXPORT_SYMBOL(touch_nmi_watchdog);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 04480c3b68f5..5bd20b542c1e 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -507,7 +507,7 @@ error:
507 return ret; 507 return ret;
508} 508}
509 509
510static struct dma_mapping_ops calgary_dma_ops = { 510static const struct dma_mapping_ops calgary_dma_ops = {
511 .alloc_coherent = calgary_alloc_coherent, 511 .alloc_coherent = calgary_alloc_coherent,
512 .map_single = calgary_map_single, 512 .map_single = calgary_map_single,
513 .unmap_single = calgary_unmap_single, 513 .unmap_single = calgary_unmap_single,
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 0bae862e9a55..0a762e10f2be 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -556,7 +556,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
556 556
557extern int agp_amd64_init(void); 557extern int agp_amd64_init(void);
558 558
559static struct dma_mapping_ops gart_dma_ops = { 559static const struct dma_mapping_ops gart_dma_ops = {
560 .mapping_error = NULL, 560 .mapping_error = NULL,
561 .map_single = gart_map_single, 561 .map_single = gart_map_single,
562 .map_simple = gart_map_simple, 562 .map_simple = gart_map_simple,
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index df09ab05a1bd..6dade0c867cc 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -79,7 +79,7 @@ void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
79{ 79{
80} 80}
81 81
82struct dma_mapping_ops nommu_dma_ops = { 82const struct dma_mapping_ops nommu_dma_ops = {
83 .map_single = nommu_map_single, 83 .map_single = nommu_map_single,
84 .unmap_single = nommu_unmap_single, 84 .unmap_single = nommu_unmap_single,
85 .map_sg = nommu_map_sg, 85 .map_sg = nommu_map_sg,
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index eb18be5a6569..4b4569abc60c 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -12,7 +12,7 @@
12int swiotlb __read_mostly; 12int swiotlb __read_mostly;
13EXPORT_SYMBOL(swiotlb); 13EXPORT_SYMBOL(swiotlb);
14 14
15struct dma_mapping_ops swiotlb_dma_ops = { 15const struct dma_mapping_ops swiotlb_dma_ops = {
16 .mapping_error = swiotlb_dma_mapping_error, 16 .mapping_error = swiotlb_dma_mapping_error,
17 .alloc_coherent = swiotlb_alloc_coherent, 17 .alloc_coherent = swiotlb_alloc_coherent,
18 .free_coherent = swiotlb_free_coherent, 18 .free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index d8d5ccc245c8..4f21765078b7 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -288,16 +288,18 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
288 288
289static int __init idle_setup (char *str) 289static int __init idle_setup (char *str)
290{ 290{
291 if (!strncmp(str, "poll", 4)) { 291 if (!strcmp(str, "poll")) {
292 printk("using polling idle threads.\n"); 292 printk("using polling idle threads.\n");
293 pm_idle = poll_idle; 293 pm_idle = poll_idle;
294 } 294 } else if (!strcmp(str, "mwait"))
295 force_mwait = 1;
296 else
297 return -1;
295 298
296 boot_option_idle_override = 1; 299 boot_option_idle_override = 1;
297 return 1; 300 return 0;
298} 301}
299 302early_param("idle", idle_setup);
300__setup("idle=", idle_setup);
301 303
302/* Prints also some state that isn't saved in the pt_regs */ 304/* Prints also some state that isn't saved in the pt_regs */
303void __show_regs(struct pt_regs * regs) 305void __show_regs(struct pt_regs * regs)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 3d98b696881d..db30b5bcef61 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -79,6 +79,8 @@ int bootloader_type;
79 79
80unsigned long saved_video_mode; 80unsigned long saved_video_mode;
81 81
82int force_mwait __cpuinitdata;
83
82/* 84/*
83 * Early DMI memory 85 * Early DMI memory
84 */ 86 */
@@ -205,10 +207,10 @@ static void discover_ebda(void)
205 * there is a real-mode segmented pointer pointing to the 207 * there is a real-mode segmented pointer pointing to the
206 * 4K EBDA area at 0x40E 208 * 4K EBDA area at 0x40E
207 */ 209 */
208 ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; 210 ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
209 ebda_addr <<= 4; 211 ebda_addr <<= 4;
210 212
211 ebda_size = *(unsigned short *)(unsigned long)ebda_addr; 213 ebda_size = *(unsigned short *)__va(ebda_addr);
212 214
213 /* Round EBDA up to pages */ 215 /* Round EBDA up to pages */
214 if (ebda_size == 0) 216 if (ebda_size == 0)
@@ -243,11 +245,12 @@ void __init setup_arch(char **cmdline_p)
243 init_mm.end_code = (unsigned long) &_etext; 245 init_mm.end_code = (unsigned long) &_etext;
244 init_mm.end_data = (unsigned long) &_edata; 246 init_mm.end_data = (unsigned long) &_edata;
245 init_mm.brk = (unsigned long) &_end; 247 init_mm.brk = (unsigned long) &_end;
248 init_mm.pgd = __va(__pa_symbol(&init_level4_pgt));
246 249
247 code_resource.start = virt_to_phys(&_text); 250 code_resource.start = __pa_symbol(&_text);
248 code_resource.end = virt_to_phys(&_etext)-1; 251 code_resource.end = __pa_symbol(&_etext)-1;
249 data_resource.start = virt_to_phys(&_etext); 252 data_resource.start = __pa_symbol(&_etext);
250 data_resource.end = virt_to_phys(&_edata)-1; 253 data_resource.end = __pa_symbol(&_edata)-1;
251 254
252 early_identify_cpu(&boot_cpu_data); 255 early_identify_cpu(&boot_cpu_data);
253 256
@@ -274,8 +277,6 @@ void __init setup_arch(char **cmdline_p)
274 277
275 dmi_scan_machine(); 278 dmi_scan_machine();
276 279
277 zap_low_mappings(0);
278
279#ifdef CONFIG_ACPI 280#ifdef CONFIG_ACPI
280 /* 281 /*
281 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). 282 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -329,15 +330,8 @@ void __init setup_arch(char **cmdline_p)
329#endif 330#endif
330 331
331#ifdef CONFIG_SMP 332#ifdef CONFIG_SMP
332 /*
333 * But first pinch a few for the stack/trampoline stuff
334 * FIXME: Don't need the extra page at 4K, but need to fix
335 * trampoline before removing it. (see the GDT stuff)
336 */
337 reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
338
339 /* Reserve SMP trampoline */ 333 /* Reserve SMP trampoline */
340 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE); 334 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
341#endif 335#endif
342 336
343#ifdef CONFIG_ACPI_SLEEP 337#ifdef CONFIG_ACPI_SLEEP
@@ -612,6 +606,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
612 606
613 /* RDTSC can be speculated around */ 607 /* RDTSC can be speculated around */
614 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); 608 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
609
610 /* Family 10 doesn't support C states in MWAIT so don't use it */
611 if (c->x86 == 0x10 && !force_mwait)
612 clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
615} 613}
616 614
617static void __cpuinit detect_ht(struct cpuinfo_x86 *c) 615static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -987,9 +985,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
987 "stc", 985 "stc",
988 "100mhzsteps", 986 "100mhzsteps",
989 "hwpstate", 987 "hwpstate",
990 NULL, /* tsc invariant mapped to constant_tsc */ 988 "", /* tsc invariant mapped to constant_tsc */
991 NULL, 989 /* nothing */
992 /* nothing */ /* constant_tsc - moved to flags */
993 }; 990 };
994 991
995 992
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 6a70b55f719d..64379a80d763 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -103,9 +103,9 @@ void __init setup_per_cpu_areas(void)
103 if (!NODE_DATA(cpu_to_node(i))) { 103 if (!NODE_DATA(cpu_to_node(i))) {
104 printk("cpu with no node %d, num_online_nodes %d\n", 104 printk("cpu with no node %d, num_online_nodes %d\n",
105 i, num_online_nodes()); 105 i, num_online_nodes());
106 ptr = alloc_bootmem(size); 106 ptr = alloc_bootmem_pages(size);
107 } else { 107 } else {
108 ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); 108 ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
109 } 109 }
110 if (!ptr) 110 if (!ptr)
111 panic("Cannot allocate cpu data for CPU %d\n", i); 111 panic("Cannot allocate cpu data for CPU %d\n", i);
@@ -201,7 +201,6 @@ void __cpuinit cpu_init (void)
201 /* CPU 0 is initialised in head64.c */ 201 /* CPU 0 is initialised in head64.c */
202 if (cpu != 0) { 202 if (cpu != 0) {
203 pda_init(cpu); 203 pda_init(cpu);
204 zap_low_mappings(cpu);
205 } else 204 } else
206 estacks = boot_exception_stacks; 205 estacks = boot_exception_stacks;
207 206
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index 49ec324cd141..c819625f3316 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -141,7 +141,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
141 goto badframe; 141 goto badframe;
142 142
143#ifdef DEBUG_SIG 143#ifdef DEBUG_SIG
144 printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs.rip,regs.rsp,frame,eax); 144 printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs->rip,regs->rsp,frame,eax);
145#endif 145#endif
146 146
147 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT) 147 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT)
@@ -301,7 +301,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
301 if (test_thread_flag(TIF_SINGLESTEP)) 301 if (test_thread_flag(TIF_SINGLESTEP))
302 ptrace_notify(SIGTRAP); 302 ptrace_notify(SIGTRAP);
303#ifdef DEBUG_SIG 303#ifdef DEBUG_SIG
304 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", 304 printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n",
305 current->comm, current->pid, frame, regs->rip, frame->pretcode); 305 current->comm, current->pid, frame, regs->rip, frame->pretcode);
306#endif 306#endif
307 307
@@ -463,7 +463,7 @@ void
463do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 463do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
464{ 464{
465#ifdef DEBUG_SIG 465#ifdef DEBUG_SIG
466 printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n", 466 printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%p pending:%x\n",
467 thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current)); 467 thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current));
468#endif 468#endif
469 469
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index af1ec4d23cf8..22abae4e9f39 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -76,7 +76,7 @@ static inline void leave_mm(int cpu)
76 if (read_pda(mmu_state) == TLBSTATE_OK) 76 if (read_pda(mmu_state) == TLBSTATE_OK)
77 BUG(); 77 BUG();
78 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); 78 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
79 load_cr3(swapper_pg_dir); 79 load_cr3(init_mm.pgd);
80} 80}
81 81
82/* 82/*
@@ -452,42 +452,34 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
452} 452}
453EXPORT_SYMBOL(smp_call_function); 453EXPORT_SYMBOL(smp_call_function);
454 454
455void smp_stop_cpu(void) 455static void stop_this_cpu(void *dummy)
456{ 456{
457 unsigned long flags; 457 local_irq_disable();
458 /* 458 /*
459 * Remove this CPU: 459 * Remove this CPU:
460 */ 460 */
461 cpu_clear(smp_processor_id(), cpu_online_map); 461 cpu_clear(smp_processor_id(), cpu_online_map);
462 local_irq_save(flags);
463 disable_local_APIC(); 462 disable_local_APIC();
464 local_irq_restore(flags);
465}
466
467static void smp_really_stop_cpu(void *dummy)
468{
469 smp_stop_cpu();
470 for (;;) 463 for (;;)
471 halt(); 464 halt();
472} 465}
473 466
474void smp_send_stop(void) 467void smp_send_stop(void)
475{ 468{
476 int nolock = 0; 469 int nolock;
470 unsigned long flags;
471
477 if (reboot_force) 472 if (reboot_force)
478 return; 473 return;
474
479 /* Don't deadlock on the call lock in panic */ 475 /* Don't deadlock on the call lock in panic */
480 if (!spin_trylock(&call_lock)) { 476 nolock = !spin_trylock(&call_lock);
481 /* ignore locking because we have panicked anyways */ 477 local_irq_save(flags);
482 nolock = 1; 478 __smp_call_function(stop_this_cpu, NULL, 0, 0);
483 }
484 __smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
485 if (!nolock) 479 if (!nolock)
486 spin_unlock(&call_lock); 480 spin_unlock(&call_lock);
487
488 local_irq_disable();
489 disable_local_APIC(); 481 disable_local_APIC();
490 local_irq_enable(); 482 local_irq_restore(flags);
491} 483}
492 484
493/* 485/*
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index cd4643a37022..4d9dacfae575 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -60,7 +60,6 @@
60#include <asm/irq.h> 60#include <asm/irq.h>
61#include <asm/hw_irq.h> 61#include <asm/hw_irq.h>
62#include <asm/numa.h> 62#include <asm/numa.h>
63#include <asm/genapic.h>
64 63
65/* Number of siblings per CPU package */ 64/* Number of siblings per CPU package */
66int smp_num_siblings = 1; 65int smp_num_siblings = 1;
@@ -68,7 +67,6 @@ EXPORT_SYMBOL(smp_num_siblings);
68 67
69/* Last level cache ID of each logical CPU */ 68/* Last level cache ID of each logical CPU */
70u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; 69u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
71EXPORT_SYMBOL(cpu_llc_id);
72 70
73/* Bitmask of currently online CPUs */ 71/* Bitmask of currently online CPUs */
74cpumask_t cpu_online_map __read_mostly; 72cpumask_t cpu_online_map __read_mostly;
@@ -392,7 +390,8 @@ static void inquire_remote_apic(int apicid)
392{ 390{
393 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; 391 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
394 char *names[] = { "ID", "VERSION", "SPIV" }; 392 char *names[] = { "ID", "VERSION", "SPIV" };
395 int timeout, status; 393 int timeout;
394 unsigned int status;
396 395
397 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); 396 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
398 397
@@ -402,7 +401,9 @@ static void inquire_remote_apic(int apicid)
402 /* 401 /*
403 * Wait for idle. 402 * Wait for idle.
404 */ 403 */
405 apic_wait_icr_idle(); 404 status = safe_apic_wait_icr_idle();
405 if (status)
406 printk("a previous APIC delivery may have failed\n");
406 407
407 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 408 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
408 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); 409 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
@@ -430,8 +431,8 @@ static void inquire_remote_apic(int apicid)
430 */ 431 */
431static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) 432static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
432{ 433{
433 unsigned long send_status = 0, accept_status = 0; 434 unsigned long send_status, accept_status = 0;
434 int maxlvt, timeout, num_starts, j; 435 int maxlvt, num_starts, j;
435 436
436 Dprintk("Asserting INIT.\n"); 437 Dprintk("Asserting INIT.\n");
437 438
@@ -447,12 +448,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
447 | APIC_DM_INIT); 448 | APIC_DM_INIT);
448 449
449 Dprintk("Waiting for send to finish...\n"); 450 Dprintk("Waiting for send to finish...\n");
450 timeout = 0; 451 send_status = safe_apic_wait_icr_idle();
451 do {
452 Dprintk("+");
453 udelay(100);
454 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
455 } while (send_status && (timeout++ < 1000));
456 452
457 mdelay(10); 453 mdelay(10);
458 454
@@ -465,12 +461,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
465 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 461 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
466 462
467 Dprintk("Waiting for send to finish...\n"); 463 Dprintk("Waiting for send to finish...\n");
468 timeout = 0; 464 send_status = safe_apic_wait_icr_idle();
469 do {
470 Dprintk("+");
471 udelay(100);
472 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
473 } while (send_status && (timeout++ < 1000));
474 465
475 mb(); 466 mb();
476 atomic_set(&init_deasserted, 1); 467 atomic_set(&init_deasserted, 1);
@@ -509,12 +500,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
509 Dprintk("Startup point 1.\n"); 500 Dprintk("Startup point 1.\n");
510 501
511 Dprintk("Waiting for send to finish...\n"); 502 Dprintk("Waiting for send to finish...\n");
512 timeout = 0; 503 send_status = safe_apic_wait_icr_idle();
513 do {
514 Dprintk("+");
515 udelay(100);
516 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
517 } while (send_status && (timeout++ < 1000));
518 504
519 /* 505 /*
520 * Give the other CPU some time to accept the IPI. 506 * Give the other CPU some time to accept the IPI.
@@ -945,6 +931,12 @@ int __cpuinit __cpu_up(unsigned int cpu)
945 return -ENOSYS; 931 return -ENOSYS;
946 } 932 }
947 933
934 /*
935 * Save current MTRR state in case it was changed since early boot
936 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
937 */
938 mtrr_save_state();
939
948 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 940 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
949 /* Boot it! */ 941 /* Boot it! */
950 err = do_boot_cpu(cpu, apicid); 942 err = do_boot_cpu(cpu, apicid);
@@ -965,13 +957,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
965 957
966 while (!cpu_isset(cpu, cpu_online_map)) 958 while (!cpu_isset(cpu, cpu_online_map))
967 cpu_relax(); 959 cpu_relax();
968
969 if (num_online_cpus() > 8 && genapic == &apic_flat) {
970 printk(KERN_WARNING
971 "flat APIC routing can't be used with > 8 cpus\n");
972 BUG();
973 }
974
975 err = 0; 960 err = 0;
976 961
977 return err; 962 return err;
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
index 91f7e678bae7..6a5a98f2a75c 100644
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -12,6 +12,10 @@
12#include <asm/proto.h> 12#include <asm/proto.h>
13#include <asm/page.h> 13#include <asm/page.h>
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15#include <asm/mtrr.h>
16
17/* References to section boundaries */
18extern const void __nosave_begin, __nosave_end;
15 19
16struct saved_context saved_context; 20struct saved_context saved_context;
17 21
@@ -33,7 +37,6 @@ void __save_processor_state(struct saved_context *ctxt)
33 asm volatile ("str %0" : "=m" (ctxt->tr)); 37 asm volatile ("str %0" : "=m" (ctxt->tr));
34 38
35 /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ 39 /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
36 /* EFER should be constant for kernel version, no need to handle it. */
37 /* 40 /*
38 * segment registers 41 * segment registers
39 */ 42 */
@@ -46,10 +49,12 @@ void __save_processor_state(struct saved_context *ctxt)
46 rdmsrl(MSR_FS_BASE, ctxt->fs_base); 49 rdmsrl(MSR_FS_BASE, ctxt->fs_base);
47 rdmsrl(MSR_GS_BASE, ctxt->gs_base); 50 rdmsrl(MSR_GS_BASE, ctxt->gs_base);
48 rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 51 rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
52 mtrr_save_fixed_ranges(NULL);
49 53
50 /* 54 /*
51 * control registers 55 * control registers
52 */ 56 */
57 rdmsrl(MSR_EFER, ctxt->efer);
53 asm volatile ("movq %%cr0, %0" : "=r" (ctxt->cr0)); 58 asm volatile ("movq %%cr0, %0" : "=r" (ctxt->cr0));
54 asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2)); 59 asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2));
55 asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3)); 60 asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3));
@@ -75,6 +80,7 @@ void __restore_processor_state(struct saved_context *ctxt)
75 /* 80 /*
76 * control registers 81 * control registers
77 */ 82 */
83 wrmsrl(MSR_EFER, ctxt->efer);
78 asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8)); 84 asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8));
79 asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4)); 85 asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4));
80 asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3)); 86 asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3));
@@ -219,4 +225,15 @@ int swsusp_arch_resume(void)
219 restore_image(); 225 restore_image();
220 return 0; 226 return 0;
221} 227}
228
229/*
230 * pfn_is_nosave - check if given pfn is in the 'nosave' section
231 */
232
233int pfn_is_nosave(unsigned long pfn)
234{
235 unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
236 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
237 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
238}
222#endif /* CONFIG_SOFTWARE_SUSPEND */ 239#endif /* CONFIG_SOFTWARE_SUSPEND */
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S
index bfbe00763c68..16d183f67bc1 100644
--- a/arch/x86_64/kernel/suspend_asm.S
+++ b/arch/x86_64/kernel/suspend_asm.S
@@ -71,9 +71,10 @@ loop:
71 jmp loop 71 jmp loop
72done: 72done:
73 /* go back to the original page tables */ 73 /* go back to the original page tables */
74 leaq init_level4_pgt(%rip), %rax 74 movq $(init_level4_pgt - __START_KERNEL_map), %rax
75 subq $__START_KERNEL_map, %rax 75 addq phys_base(%rip), %rax
76 movq %rax, %cr3 76 movq %rax, %cr3
77
77 /* Flush TLB, including "global" things (vmalloc) */ 78 /* Flush TLB, including "global" things (vmalloc) */
78 movq mmu_cr4_features(%rip), %rax 79 movq mmu_cr4_features(%rip), %rax
79 movq %rax, %rdx 80 movq %rax, %rdx
diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c
index 213fd6ab789d..63d592c276cc 100644
--- a/arch/x86_64/kernel/syscall.c
+++ b/arch/x86_64/kernel/syscall.c
@@ -3,6 +3,7 @@
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <linux/sys.h> 4#include <linux/sys.h>
5#include <linux/cache.h> 5#include <linux/cache.h>
6#include <asm/asm-offsets.h>
6 7
7#define __NO_STUBS 8#define __NO_STUBS
8 9
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 75d73a9aa9ff..0652e173813b 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -39,13 +39,11 @@
39#include <asm/proto.h> 39#include <asm/proto.h>
40#include <asm/hpet.h> 40#include <asm/hpet.h>
41#include <asm/sections.h> 41#include <asm/sections.h>
42#include <linux/cpufreq.h>
43#include <linux/hpet.h> 42#include <linux/hpet.h>
44#include <asm/apic.h> 43#include <asm/apic.h>
45#include <asm/hpet.h> 44#include <asm/hpet.h>
46 45#include <asm/mpspec.h>
47extern void i8254_timer_resume(void); 46#include <asm/nmi.h>
48extern int using_apic_timer;
49 47
50static char *timename = NULL; 48static char *timename = NULL;
51 49
@@ -252,6 +250,51 @@ static unsigned long get_cmos_time(void)
252 return mktime(year, mon, day, hour, min, sec); 250 return mktime(year, mon, day, hour, min, sec);
253} 251}
254 252
253/* calibrate_cpu is used on systems with fixed rate TSCs to determine
254 * processor frequency */
255#define TICK_COUNT 100000000
256static unsigned int __init tsc_calibrate_cpu_khz(void)
257{
258 int tsc_start, tsc_now;
259 int i, no_ctr_free;
260 unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
261 unsigned long flags;
262
263 for (i = 0; i < 4; i++)
264 if (avail_to_resrv_perfctr_nmi_bit(i))
265 break;
266 no_ctr_free = (i == 4);
267 if (no_ctr_free) {
268 i = 3;
269 rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
270 wrmsrl(MSR_K7_EVNTSEL3, 0);
271 rdmsrl(MSR_K7_PERFCTR3, pmc3);
272 } else {
273 reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
274 reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
275 }
276 local_irq_save(flags);
277 /* start meauring cycles, incrementing from 0 */
278 wrmsrl(MSR_K7_PERFCTR0 + i, 0);
279 wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
280 rdtscl(tsc_start);
281 do {
282 rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
283 tsc_now = get_cycles_sync();
284 } while ((tsc_now - tsc_start) < TICK_COUNT);
285
286 local_irq_restore(flags);
287 if (no_ctr_free) {
288 wrmsrl(MSR_K7_EVNTSEL3, 0);
289 wrmsrl(MSR_K7_PERFCTR3, pmc3);
290 wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
291 } else {
292 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
293 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
294 }
295
296 return pmc_now * tsc_khz / (tsc_now - tsc_start);
297}
255 298
256/* 299/*
257 * pit_calibrate_tsc() uses the speaker output (channel 2) of 300 * pit_calibrate_tsc() uses the speaker output (channel 2) of
@@ -285,7 +328,7 @@ static unsigned int __init pit_calibrate_tsc(void)
285#define PIT_MODE 0x43 328#define PIT_MODE 0x43
286#define PIT_CH0 0x40 329#define PIT_CH0 0x40
287 330
288static void __init __pit_init(int val, u8 mode) 331static void __pit_init(int val, u8 mode)
289{ 332{
290 unsigned long flags; 333 unsigned long flags;
291 334
@@ -301,12 +344,12 @@ void __init pit_init(void)
301 __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */ 344 __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */
302} 345}
303 346
304void __init pit_stop_interrupt(void) 347void pit_stop_interrupt(void)
305{ 348{
306 __pit_init(0, 0x30); /* mode 0 */ 349 __pit_init(0, 0x30); /* mode 0 */
307} 350}
308 351
309void __init stop_timer_interrupt(void) 352void stop_timer_interrupt(void)
310{ 353{
311 char *name; 354 char *name;
312 if (hpet_address) { 355 if (hpet_address) {
@@ -339,23 +382,29 @@ void __init time_init(void)
339 if (hpet_use_timer) { 382 if (hpet_use_timer) {
340 /* set tick_nsec to use the proper rate for HPET */ 383 /* set tick_nsec to use the proper rate for HPET */
341 tick_nsec = TICK_NSEC_HPET; 384 tick_nsec = TICK_NSEC_HPET;
342 cpu_khz = hpet_calibrate_tsc(); 385 tsc_khz = hpet_calibrate_tsc();
343 timename = "HPET"; 386 timename = "HPET";
344 } else { 387 } else {
345 pit_init(); 388 pit_init();
346 cpu_khz = pit_calibrate_tsc(); 389 tsc_khz = pit_calibrate_tsc();
347 timename = "PIT"; 390 timename = "PIT";
348 } 391 }
349 392
393 cpu_khz = tsc_khz;
394 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
395 boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
396 boot_cpu_data.x86 == 16)
397 cpu_khz = tsc_calibrate_cpu_khz();
398
350 if (unsynchronized_tsc()) 399 if (unsynchronized_tsc())
351 mark_tsc_unstable(); 400 mark_tsc_unstable("TSCs unsynchronized");
352 401
353 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) 402 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
354 vgetcpu_mode = VGETCPU_RDTSCP; 403 vgetcpu_mode = VGETCPU_RDTSCP;
355 else 404 else
356 vgetcpu_mode = VGETCPU_LSL; 405 vgetcpu_mode = VGETCPU_LSL;
357 406
358 set_cyc2ns_scale(cpu_khz); 407 set_cyc2ns_scale(tsc_khz);
359 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 408 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
360 cpu_khz / 1000, cpu_khz % 1000); 409 cpu_khz / 1000, cpu_khz % 1000);
361 init_tsc_clocksource(); 410 init_tsc_clocksource();
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S
index c79b99a9e2f6..e7e2764c461b 100644
--- a/arch/x86_64/kernel/trampoline.S
+++ b/arch/x86_64/kernel/trampoline.S
@@ -3,6 +3,7 @@
3 * Trampoline.S Derived from Setup.S by Linus Torvalds 3 * Trampoline.S Derived from Setup.S by Linus Torvalds
4 * 4 *
5 * 4 Jan 1997 Michael Chastain: changed to gnu as. 5 * 4 Jan 1997 Michael Chastain: changed to gnu as.
6 * 15 Sept 2005 Eric Biederman: 64bit PIC support
6 * 7 *
7 * Entry: CS:IP point to the start of our code, we are 8 * Entry: CS:IP point to the start of our code, we are
8 * in real mode with no stack, but the rest of the 9 * in real mode with no stack, but the rest of the
@@ -17,15 +18,20 @@
17 * and IP is zero. Thus, data addresses need to be absolute 18 * and IP is zero. Thus, data addresses need to be absolute
18 * (no relocation) and are taken with regard to r_base. 19 * (no relocation) and are taken with regard to r_base.
19 * 20 *
21 * With the addition of trampoline_level4_pgt this code can
22 * now enter a 64bit kernel that lives at arbitrary 64bit
23 * physical addresses.
24 *
20 * If you work on this file, check the object module with objdump 25 * If you work on this file, check the object module with objdump
21 * --full-contents --reloc to make sure there are no relocation 26 * --full-contents --reloc to make sure there are no relocation
22 * entries. For the GDT entry we do hand relocation in smpboot.c 27 * entries.
23 * because of 64bit linker limitations.
24 */ 28 */
25 29
26#include <linux/linkage.h> 30#include <linux/linkage.h>
27#include <asm/segment.h> 31#include <asm/pgtable.h>
28#include <asm/page.h> 32#include <asm/page.h>
33#include <asm/msr.h>
34#include <asm/segment.h>
29 35
30.data 36.data
31 37
@@ -33,15 +39,33 @@
33 39
34ENTRY(trampoline_data) 40ENTRY(trampoline_data)
35r_base = . 41r_base = .
42 cli # We should be safe anyway
36 wbinvd 43 wbinvd
37 mov %cs, %ax # Code and data in the same place 44 mov %cs, %ax # Code and data in the same place
38 mov %ax, %ds 45 mov %ax, %ds
46 mov %ax, %es
47 mov %ax, %ss
39 48
40 cli # We should be safe anyway
41 49
42 movl $0xA5A5A5A5, trampoline_data - r_base 50 movl $0xA5A5A5A5, trampoline_data - r_base
43 # write marker for master knows we're running 51 # write marker for master knows we're running
44 52
53 # Setup stack
54 movw $(trampoline_stack_end - r_base), %sp
55
56 call verify_cpu # Verify the cpu supports long mode
57 testl %eax, %eax # Check for return code
58 jnz no_longmode
59
60 mov %cs, %ax
61 movzx %ax, %esi # Find the 32bit trampoline location
62 shll $4, %esi
63
64 # Fixup the vectors
65 addl %esi, startup_32_vector - r_base
66 addl %esi, startup_64_vector - r_base
67 addl %esi, tgdt + 2 - r_base # Fixup the gdt pointer
68
45 /* 69 /*
46 * GDT tables in non default location kernel can be beyond 16MB and 70 * GDT tables in non default location kernel can be beyond 16MB and
47 * lgdt will not be able to load the address as in real mode default 71 * lgdt will not be able to load the address as in real mode default
@@ -49,23 +73,94 @@ r_base = .
49 * to 32 bit. 73 * to 32 bit.
50 */ 74 */
51 75
52 lidtl idt_48 - r_base # load idt with 0, 0 76 lidtl tidt - r_base # load idt with 0, 0
53 lgdtl gdt_48 - r_base # load gdt with whatever is appropriate 77 lgdtl tgdt - r_base # load gdt with whatever is appropriate
54 78
55 xor %ax, %ax 79 xor %ax, %ax
56 inc %ax # protected mode (PE) bit 80 inc %ax # protected mode (PE) bit
57 lmsw %ax # into protected mode 81 lmsw %ax # into protected mode
58 # flaush prefetch and jump to startup_32 in arch/x86_64/kernel/head.S 82
59 ljmpl $__KERNEL32_CS, $(startup_32-__START_KERNEL_map) 83 # flush prefetch and jump to startup_32
84 ljmpl *(startup_32_vector - r_base)
85
86 .code32
87 .balign 4
88startup_32:
89 movl $__KERNEL_DS, %eax # Initialize the %ds segment register
90 movl %eax, %ds
91
92 xorl %eax, %eax
93 btsl $5, %eax # Enable PAE mode
94 movl %eax, %cr4
95
96 # Setup trampoline 4 level pagetables
97 leal (trampoline_level4_pgt - r_base)(%esi), %eax
98 movl %eax, %cr3
99
100 movl $MSR_EFER, %ecx
101 movl $(1 << _EFER_LME), %eax # Enable Long Mode
102 xorl %edx, %edx
103 wrmsr
104
105 xorl %eax, %eax
106 btsl $31, %eax # Enable paging and in turn activate Long Mode
107 btsl $0, %eax # Enable protected mode
108 movl %eax, %cr0
109
110 /*
111 * At this point we're in long mode but in 32bit compatibility mode
112 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
113 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
114 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
115 */
116 ljmp *(startup_64_vector - r_base)(%esi)
117
118 .code64
119 .balign 4
120startup_64:
121 # Now jump into the kernel using virtual addresses
122 movq $secondary_startup_64, %rax
123 jmp *%rax
124
125 .code16
126no_longmode:
127 hlt
128 jmp no_longmode
129#include "verify_cpu.S"
60 130
61 # Careful these need to be in the same 64K segment as the above; 131 # Careful these need to be in the same 64K segment as the above;
62idt_48: 132tidt:
63 .word 0 # idt limit = 0 133 .word 0 # idt limit = 0
64 .word 0, 0 # idt base = 0L 134 .word 0, 0 # idt base = 0L
65 135
66gdt_48: 136 # Duplicate the global descriptor table
67 .short GDT_ENTRIES*8 - 1 # gdt limit 137 # so the kernel can live anywhere
68 .long cpu_gdt_table-__START_KERNEL_map 138 .balign 4
139tgdt:
140 .short tgdt_end - tgdt # gdt limit
141 .long tgdt - r_base
142 .short 0
143 .quad 0x00cf9b000000ffff # __KERNEL32_CS
144 .quad 0x00af9b000000ffff # __KERNEL_CS
145 .quad 0x00cf93000000ffff # __KERNEL_DS
146tgdt_end:
147
148 .balign 4
149startup_32_vector:
150 .long startup_32 - r_base
151 .word __KERNEL32_CS, 0
152
153 .balign 4
154startup_64_vector:
155 .long startup_64 - r_base
156 .word __KERNEL_CS, 0
157
158trampoline_stack:
159 .org 0x1000
160trampoline_stack_end:
161ENTRY(trampoline_level4_pgt)
162 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
163 .fill 510,8,0
164 .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
69 165
70.globl trampoline_end 166ENTRY(trampoline_end)
71trampoline_end:
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 09d2e8a10a49..d76fc32d4599 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -426,8 +426,7 @@ void show_registers(struct pt_regs *regs)
426 const int cpu = smp_processor_id(); 426 const int cpu = smp_processor_id();
427 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 427 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
428 428
429 rsp = regs->rsp; 429 rsp = regs->rsp;
430
431 printk("CPU %d ", cpu); 430 printk("CPU %d ", cpu);
432 __show_regs(regs); 431 __show_regs(regs);
433 printk("Process %s (pid: %d, threadinfo %p, task %p)\n", 432 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -438,7 +437,6 @@ void show_registers(struct pt_regs *regs)
438 * time of the fault.. 437 * time of the fault..
439 */ 438 */
440 if (in_kernel) { 439 if (in_kernel) {
441
442 printk("Stack: "); 440 printk("Stack: ");
443 _show_stack(NULL, regs, (unsigned long*)rsp); 441 _show_stack(NULL, regs, (unsigned long*)rsp);
444 442
@@ -581,10 +579,20 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
581{ 579{
582 struct task_struct *tsk = current; 580 struct task_struct *tsk = current;
583 581
584 tsk->thread.error_code = error_code;
585 tsk->thread.trap_no = trapnr;
586
587 if (user_mode(regs)) { 582 if (user_mode(regs)) {
583 /*
584 * We want error_code and trap_no set for userspace
585 * faults and kernelspace faults which result in
586 * die(), but not kernelspace faults which are fixed
587 * up. die() gives the process no chance to handle
588 * the signal and notice the kernel fault information,
589 * so that won't result in polluting the information
590 * about previously queued, but not yet delivered,
591 * faults. See also do_general_protection below.
592 */
593 tsk->thread.error_code = error_code;
594 tsk->thread.trap_no = trapnr;
595
588 if (exception_trace && unhandled_signal(tsk, signr)) 596 if (exception_trace && unhandled_signal(tsk, signr))
589 printk(KERN_INFO 597 printk(KERN_INFO
590 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", 598 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
@@ -605,8 +613,11 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
605 fixup = search_exception_tables(regs->rip); 613 fixup = search_exception_tables(regs->rip);
606 if (fixup) 614 if (fixup)
607 regs->rip = fixup->fixup; 615 regs->rip = fixup->fixup;
608 else 616 else {
617 tsk->thread.error_code = error_code;
618 tsk->thread.trap_no = trapnr;
609 die(str, regs, error_code); 619 die(str, regs, error_code);
620 }
610 return; 621 return;
611 } 622 }
612} 623}
@@ -682,10 +693,10 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
682 693
683 conditional_sti(regs); 694 conditional_sti(regs);
684 695
685 tsk->thread.error_code = error_code;
686 tsk->thread.trap_no = 13;
687
688 if (user_mode(regs)) { 696 if (user_mode(regs)) {
697 tsk->thread.error_code = error_code;
698 tsk->thread.trap_no = 13;
699
689 if (exception_trace && unhandled_signal(tsk, SIGSEGV)) 700 if (exception_trace && unhandled_signal(tsk, SIGSEGV))
690 printk(KERN_INFO 701 printk(KERN_INFO
691 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", 702 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
@@ -704,6 +715,9 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
704 regs->rip = fixup->fixup; 715 regs->rip = fixup->fixup;
705 return; 716 return;
706 } 717 }
718
719 tsk->thread.error_code = error_code;
720 tsk->thread.trap_no = 13;
707 if (notify_die(DIE_GPF, "general protection fault", regs, 721 if (notify_die(DIE_GPF, "general protection fault", regs,
708 error_code, 13, SIGSEGV) == NOTIFY_STOP) 722 error_code, 13, SIGSEGV) == NOTIFY_STOP)
709 return; 723 return;
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c
index 1a0edbbffaa0..48f9a8e6aa91 100644
--- a/arch/x86_64/kernel/tsc.c
+++ b/arch/x86_64/kernel/tsc.c
@@ -13,6 +13,8 @@ static int notsc __initdata = 0;
13 13
14unsigned int cpu_khz; /* TSC clocks / usec, not used here */ 14unsigned int cpu_khz; /* TSC clocks / usec, not used here */
15EXPORT_SYMBOL(cpu_khz); 15EXPORT_SYMBOL(cpu_khz);
16unsigned int tsc_khz;
17EXPORT_SYMBOL(tsc_khz);
16 18
17static unsigned int cyc2ns_scale __read_mostly; 19static unsigned int cyc2ns_scale __read_mostly;
18 20
@@ -77,7 +79,7 @@ static void handle_cpufreq_delayed_get(struct work_struct *v)
77static unsigned int ref_freq = 0; 79static unsigned int ref_freq = 0;
78static unsigned long loops_per_jiffy_ref = 0; 80static unsigned long loops_per_jiffy_ref = 0;
79 81
80static unsigned long cpu_khz_ref = 0; 82static unsigned long tsc_khz_ref = 0;
81 83
82static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 84static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
83 void *data) 85 void *data)
@@ -99,7 +101,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
99 if (!ref_freq) { 101 if (!ref_freq) {
100 ref_freq = freq->old; 102 ref_freq = freq->old;
101 loops_per_jiffy_ref = *lpj; 103 loops_per_jiffy_ref = *lpj;
102 cpu_khz_ref = cpu_khz; 104 tsc_khz_ref = tsc_khz;
103 } 105 }
104 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || 106 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
105 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || 107 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
@@ -107,12 +109,12 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
107 *lpj = 109 *lpj =
108 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); 110 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
109 111
110 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); 112 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
111 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 113 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
112 mark_tsc_unstable(); 114 mark_tsc_unstable("cpufreq changes");
113 } 115 }
114 116
115 set_cyc2ns_scale(cpu_khz_ref); 117 set_cyc2ns_scale(tsc_khz_ref);
116 118
117 return 0; 119 return 0;
118} 120}
@@ -197,10 +199,11 @@ static struct clocksource clocksource_tsc = {
197 .vread = vread_tsc, 199 .vread = vread_tsc,
198}; 200};
199 201
200void mark_tsc_unstable(void) 202void mark_tsc_unstable(char *reason)
201{ 203{
202 if (!tsc_unstable) { 204 if (!tsc_unstable) {
203 tsc_unstable = 1; 205 tsc_unstable = 1;
206 printk("Marking TSC unstable due to %s\n", reason);
204 /* Change only the rating, when not registered */ 207 /* Change only the rating, when not registered */
205 if (clocksource_tsc.mult) 208 if (clocksource_tsc.mult)
206 clocksource_change_rating(&clocksource_tsc, 0); 209 clocksource_change_rating(&clocksource_tsc, 0);
@@ -213,7 +216,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
213void __init init_tsc_clocksource(void) 216void __init init_tsc_clocksource(void)
214{ 217{
215 if (!notsc) { 218 if (!notsc) {
216 clocksource_tsc.mult = clocksource_khz2mult(cpu_khz, 219 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
217 clocksource_tsc.shift); 220 clocksource_tsc.shift);
218 if (check_tsc_unstable()) 221 if (check_tsc_unstable())
219 clocksource_tsc.rating = 0; 222 clocksource_tsc.rating = 0;
diff --git a/arch/x86_64/kernel/tsc_sync.c b/arch/x86_64/kernel/tsc_sync.c
index 014f0db45dfa..355f5f506c81 100644
--- a/arch/x86_64/kernel/tsc_sync.c
+++ b/arch/x86_64/kernel/tsc_sync.c
@@ -50,7 +50,7 @@ static __cpuinit void check_tsc_warp(void)
50 /* 50 /*
51 * The measurement runs for 20 msecs: 51 * The measurement runs for 20 msecs:
52 */ 52 */
53 end = start + cpu_khz * 20ULL; 53 end = start + tsc_khz * 20ULL;
54 now = start; 54 now = start;
55 55
56 for (i = 0; ; i++) { 56 for (i = 0; ; i++) {
@@ -138,7 +138,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
138 printk("\n"); 138 printk("\n");
139 printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," 139 printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
140 " turning off TSC clock.\n", max_warp); 140 " turning off TSC clock.\n", max_warp);
141 mark_tsc_unstable(); 141 mark_tsc_unstable("check_tsc_sync_source failed");
142 nr_warps = 0; 142 nr_warps = 0;
143 max_warp = 0; 143 max_warp = 0;
144 last_tsc = 0; 144 last_tsc = 0;
diff --git a/arch/x86_64/kernel/verify_cpu.S b/arch/x86_64/kernel/verify_cpu.S
new file mode 100644
index 000000000000..e035f5948199
--- /dev/null
+++ b/arch/x86_64/kernel/verify_cpu.S
@@ -0,0 +1,119 @@
1/*
2 *
3 * verify_cpu.S - Code for cpu long mode and SSE verification. This
4 * code has been borrowed from boot/setup.S and was introduced by
5 * Andi Kleen.
6 *
7 * Copyright (c) 2007 Andi Kleen (ak@suse.de)
8 * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com)
9 * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
10 *
11 * This source code is licensed under the GNU General Public License,
12 * Version 2. See the file COPYING for more details.
13 *
14 * This is a common code for verification whether CPU supports
15 * long mode and SSE or not. It is not called directly instead this
16 * file is included at various places and compiled in that context.
17 * Following are the current usage.
18 *
19 * This file is included by both 16bit and 32bit code.
20 *
21 * arch/x86_64/boot/setup.S : Boot cpu verification (16bit)
22 * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
23 * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
24 * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
25 *
26 * verify_cpu, returns the status of cpu check in register %eax.
27 * 0: Success 1: Failure
28 *
29 * The caller needs to check for the error code and take the action
30 * appropriately. Either display a message or halt.
31 */
32
33#include <asm/cpufeature.h>
34
35verify_cpu:
36 pushfl # Save caller passed flags
37 pushl $0 # Kill any dangerous flags
38 popfl
39
40 /* minimum CPUID flags for x86-64 as defined by AMD */
41#define M(x) (1<<(x))
42#define M2(a,b) M(a)|M(b)
43#define M4(a,b,c,d) M(a)|M(b)|M(c)|M(d)
44
45#define SSE_MASK \
46 (M2(X86_FEATURE_XMM,X86_FEATURE_XMM2))
47#define REQUIRED_MASK1 \
48 (M4(X86_FEATURE_FPU,X86_FEATURE_PSE,X86_FEATURE_TSC,X86_FEATURE_MSR)|\
49 M4(X86_FEATURE_PAE,X86_FEATURE_CX8,X86_FEATURE_PGE,X86_FEATURE_CMOV)|\
50 M(X86_FEATURE_FXSR))
51#define REQUIRED_MASK2 \
52 (M(X86_FEATURE_LM - 32))
53
54 pushfl # standard way to check for cpuid
55 popl %eax
56 movl %eax,%ebx
57 xorl $0x200000,%eax
58 pushl %eax
59 popfl
60 pushfl
61 popl %eax
62 cmpl %eax,%ebx
63 jz verify_cpu_no_longmode # cpu has no cpuid
64
65 movl $0x0,%eax # See if cpuid 1 is implemented
66 cpuid
67 cmpl $0x1,%eax
68 jb verify_cpu_no_longmode # no cpuid 1
69
70 xor %di,%di
71 cmpl $0x68747541,%ebx # AuthenticAMD
72 jnz verify_cpu_noamd
73 cmpl $0x69746e65,%edx
74 jnz verify_cpu_noamd
75 cmpl $0x444d4163,%ecx
76 jnz verify_cpu_noamd
77 mov $1,%di # cpu is from AMD
78
79verify_cpu_noamd:
80 movl $0x1,%eax # Does the cpu have what it takes
81 cpuid
82 andl $REQUIRED_MASK1,%edx
83 xorl $REQUIRED_MASK1,%edx
84 jnz verify_cpu_no_longmode
85
86 movl $0x80000000,%eax # See if extended cpuid is implemented
87 cpuid
88 cmpl $0x80000001,%eax
89 jb verify_cpu_no_longmode # no extended cpuid
90
91 movl $0x80000001,%eax # Does the cpu have what it takes
92 cpuid
93 andl $REQUIRED_MASK2,%edx
94 xorl $REQUIRED_MASK2,%edx
95 jnz verify_cpu_no_longmode
96
97verify_cpu_sse_test:
98 movl $1,%eax
99 cpuid
100 andl $SSE_MASK,%edx
101 cmpl $SSE_MASK,%edx
102 je verify_cpu_sse_ok
103 test %di,%di
104 jz verify_cpu_no_longmode # only try to force SSE on AMD
105 movl $0xc0010015,%ecx # HWCR
106 rdmsr
107 btr $15,%eax # enable SSE
108 wrmsr
109 xor %di,%di # don't loop
110 jmp verify_cpu_sse_test # try again
111
112verify_cpu_no_longmode:
113 popfl # Restore caller passed flags
114 movl $1,%eax
115 ret
116verify_cpu_sse_ok:
117 popfl # Restore caller passed flags
118 xorl %eax, %eax
119 ret
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 5176ecf006ee..88cfa50b424d 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -29,9 +29,7 @@ SECTIONS
29 .text : AT(ADDR(.text) - LOAD_OFFSET) { 29 .text : AT(ADDR(.text) - LOAD_OFFSET) {
30 /* First the code that has to be first for bootstrapping */ 30 /* First the code that has to be first for bootstrapping */
31 *(.bootstrap.text) 31 *(.bootstrap.text)
32 /* Then all the functions that are "hot" in profiles, to group them 32 _stext = .;
33 onto the same hugetlb entry */
34 #include "functionlist"
35 /* Then the rest */ 33 /* Then the rest */
36 *(.text) 34 *(.text)
37 SCHED_TEXT 35 SCHED_TEXT
@@ -50,10 +48,10 @@ SECTIONS
50 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } 48 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
51 __stop___ex_table = .; 49 __stop___ex_table = .;
52 50
53 RODATA
54
55 BUG_TABLE 51 BUG_TABLE
56 52
53 RODATA
54
57 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ 55 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
58 /* Data */ 56 /* Data */
59 .data : AT(ADDR(.data) - LOAD_OFFSET) { 57 .data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -94,6 +92,12 @@ SECTIONS
94 { *(.vsyscall_gtod_data) } 92 { *(.vsyscall_gtod_data) }
95 vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); 93 vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
96 94
95
96 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
97 { *(.vsyscall_1) }
98 .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
99 { *(.vsyscall_2) }
100
97 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } 101 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
98 vgetcpu_mode = VVIRT(.vgetcpu_mode); 102 vgetcpu_mode = VVIRT(.vgetcpu_mode);
99 103
@@ -101,10 +105,6 @@ SECTIONS
101 .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } 105 .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) }
102 jiffies = VVIRT(.jiffies); 106 jiffies = VVIRT(.jiffies);
103 107
104 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
105 { *(.vsyscall_1) }
106 .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
107 { *(.vsyscall_2) }
108 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) 108 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3))
109 { *(.vsyscall_3) } 109 { *(.vsyscall_3) }
110 110
@@ -194,7 +194,7 @@ SECTIONS
194 __initramfs_end = .; 194 __initramfs_end = .;
195#endif 195#endif
196 196
197 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 197 . = ALIGN(4096);
198 __per_cpu_start = .; 198 __per_cpu_start = .;
199 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } 199 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
200 __per_cpu_end = .; 200 __per_cpu_end = .;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index b43c698cf7d3..dc32cef96195 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -45,14 +45,34 @@
45 45
46#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 46#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
47#define __syscall_clobber "r11","rcx","memory" 47#define __syscall_clobber "r11","rcx","memory"
48#define __pa_vsymbol(x) \
49 ({unsigned long v; \
50 extern char __vsyscall_0; \
51 asm("" : "=r" (v) : "0" (x)); \
52 ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
48 53
54/*
55 * vsyscall_gtod_data contains data that is :
56 * - readonly from vsyscalls
57 * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
58 * Try to keep this structure as small as possible to avoid cache line ping pongs
59 */
49struct vsyscall_gtod_data_t { 60struct vsyscall_gtod_data_t {
50 seqlock_t lock; 61 seqlock_t lock;
51 int sysctl_enabled; 62
52 struct timeval wall_time_tv; 63 /* open coded 'struct timespec' */
64 time_t wall_time_sec;
65 u32 wall_time_nsec;
66
67 int sysctl_enabled;
53 struct timezone sys_tz; 68 struct timezone sys_tz;
54 cycle_t offset_base; 69 struct { /* extract of a clocksource struct */
55 struct clocksource clock; 70 cycle_t (*vread)(void);
71 cycle_t cycle_last;
72 cycle_t mask;
73 u32 mult;
74 u32 shift;
75 } clock;
56}; 76};
57int __vgetcpu_mode __section_vgetcpu_mode; 77int __vgetcpu_mode __section_vgetcpu_mode;
58 78
@@ -68,9 +88,13 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
68 88
69 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); 89 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
70 /* copy vsyscall data */ 90 /* copy vsyscall data */
71 vsyscall_gtod_data.clock = *clock; 91 vsyscall_gtod_data.clock.vread = clock->vread;
72 vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; 92 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
73 vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; 93 vsyscall_gtod_data.clock.mask = clock->mask;
94 vsyscall_gtod_data.clock.mult = clock->mult;
95 vsyscall_gtod_data.clock.shift = clock->shift;
96 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
97 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
74 vsyscall_gtod_data.sys_tz = sys_tz; 98 vsyscall_gtod_data.sys_tz = sys_tz;
75 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); 99 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
76} 100}
@@ -105,7 +129,8 @@ static __always_inline long time_syscall(long *t)
105static __always_inline void do_vgettimeofday(struct timeval * tv) 129static __always_inline void do_vgettimeofday(struct timeval * tv)
106{ 130{
107 cycle_t now, base, mask, cycle_delta; 131 cycle_t now, base, mask, cycle_delta;
108 unsigned long seq, mult, shift, nsec_delta; 132 unsigned seq;
133 unsigned long mult, shift, nsec;
109 cycle_t (*vread)(void); 134 cycle_t (*vread)(void);
110 do { 135 do {
111 seq = read_seqbegin(&__vsyscall_gtod_data.lock); 136 seq = read_seqbegin(&__vsyscall_gtod_data.lock);
@@ -121,21 +146,20 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
121 mult = __vsyscall_gtod_data.clock.mult; 146 mult = __vsyscall_gtod_data.clock.mult;
122 shift = __vsyscall_gtod_data.clock.shift; 147 shift = __vsyscall_gtod_data.clock.shift;
123 148
124 *tv = __vsyscall_gtod_data.wall_time_tv; 149 tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
125 150 nsec = __vsyscall_gtod_data.wall_time_nsec;
126 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); 151 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
127 152
128 /* calculate interval: */ 153 /* calculate interval: */
129 cycle_delta = (now - base) & mask; 154 cycle_delta = (now - base) & mask;
130 /* convert to nsecs: */ 155 /* convert to nsecs: */
131 nsec_delta = (cycle_delta * mult) >> shift; 156 nsec += (cycle_delta * mult) >> shift;
132 157
133 /* convert to usecs and add to timespec: */ 158 while (nsec >= NSEC_PER_SEC) {
134 tv->tv_usec += nsec_delta / NSEC_PER_USEC;
135 while (tv->tv_usec > USEC_PER_SEC) {
136 tv->tv_sec += 1; 159 tv->tv_sec += 1;
137 tv->tv_usec -= USEC_PER_SEC; 160 nsec -= NSEC_PER_SEC;
138 } 161 }
162 tv->tv_usec = nsec / NSEC_PER_USEC;
139} 163}
140 164
141int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) 165int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
@@ -151,11 +175,13 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
151 * unlikely */ 175 * unlikely */
152time_t __vsyscall(1) vtime(time_t *t) 176time_t __vsyscall(1) vtime(time_t *t)
153{ 177{
178 time_t result;
154 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) 179 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
155 return time_syscall(t); 180 return time_syscall(t);
156 else if (t) 181 result = __vsyscall_gtod_data.wall_time_sec;
157 *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; 182 if (t)
158 return __vsyscall_gtod_data.wall_time_tv.tv_sec; 183 *t = result;
184 return result;
159} 185}
160 186
161/* Fast way to get current CPU and node. 187/* Fast way to get current CPU and node.
@@ -224,10 +250,10 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
224 return ret; 250 return ret;
225 /* gcc has some trouble with __va(__pa()), so just do it this 251 /* gcc has some trouble with __va(__pa()), so just do it this
226 way. */ 252 way. */
227 map1 = ioremap(__pa_symbol(&vsysc1), 2); 253 map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
228 if (!map1) 254 if (!map1)
229 return -ENOMEM; 255 return -ENOMEM;
230 map2 = ioremap(__pa_symbol(&vsysc2), 2); 256 map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
231 if (!map2) { 257 if (!map2) {
232 ret = -ENOMEM; 258 ret = -ENOMEM;
233 goto out; 259 goto out;