diff options
Diffstat (limited to 'arch/i386/kernel')
47 files changed, 1647 insertions, 2333 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 96fb8a020af2..5e70c2fb273a 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -7,10 +7,9 @@ extra-y := head.o init_task.o vmlinux.lds | |||
7 | obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ | 7 | obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ |
8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ | 8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ |
9 | pci-dma.o i386_ksyms.o i387.o bootflag.o \ | 9 | pci-dma.o i386_ksyms.o i387.o bootflag.o \ |
10 | quirks.o i8237.o topology.o alternative.o | 10 | quirks.o i8237.o topology.o alternative.o i8253.o tsc.o |
11 | 11 | ||
12 | obj-y += cpu/ | 12 | obj-y += cpu/ |
13 | obj-y += timers/ | ||
14 | obj-y += acpi/ | 13 | obj-y += acpi/ |
15 | obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o | 14 | obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o |
16 | obj-$(CONFIG_MCA) += mca.o | 15 | obj-$(CONFIG_MCA) += mca.o |
@@ -37,6 +36,8 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o | |||
37 | obj-$(CONFIG_DOUBLEFAULT) += doublefault.o | 36 | obj-$(CONFIG_DOUBLEFAULT) += doublefault.o |
38 | obj-$(CONFIG_VM86) += vm86.o | 37 | obj-$(CONFIG_VM86) += vm86.o |
39 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 38 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
39 | obj-$(CONFIG_HPET_TIMER) += hpet.o | ||
40 | obj-$(CONFIG_K8_NB) += k8.o | ||
40 | 41 | ||
41 | EXTRA_AFLAGS := -traditional | 42 | EXTRA_AFLAGS := -traditional |
42 | 43 | ||
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r | |||
76 | $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ | 77 | $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ |
77 | $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE | 78 | $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE |
78 | $(call if_changed,syscall) | 79 | $(call if_changed,syscall) |
80 | |||
81 | k8-y += ../../x86_64/kernel/k8.o | ||
82 | |||
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 5cbd6f99fb2a..50eb0e03777e 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c | |||
@@ -4,27 +4,41 @@ | |||
4 | #include <asm/alternative.h> | 4 | #include <asm/alternative.h> |
5 | #include <asm/sections.h> | 5 | #include <asm/sections.h> |
6 | 6 | ||
7 | #define DEBUG 0 | 7 | static int no_replacement = 0; |
8 | #if DEBUG | 8 | static int smp_alt_once = 0; |
9 | # define DPRINTK(fmt, args...) printk(fmt, args) | 9 | static int debug_alternative = 0; |
10 | #else | 10 | |
11 | # define DPRINTK(fmt, args...) | 11 | static int __init noreplacement_setup(char *s) |
12 | #endif | 12 | { |
13 | no_replacement = 1; | ||
14 | return 1; | ||
15 | } | ||
16 | static int __init bootonly(char *str) | ||
17 | { | ||
18 | smp_alt_once = 1; | ||
19 | return 1; | ||
20 | } | ||
21 | static int __init debug_alt(char *str) | ||
22 | { | ||
23 | debug_alternative = 1; | ||
24 | return 1; | ||
25 | } | ||
13 | 26 | ||
27 | __setup("noreplacement", noreplacement_setup); | ||
28 | __setup("smp-alt-boot", bootonly); | ||
29 | __setup("debug-alternative", debug_alt); | ||
30 | |||
31 | #define DPRINTK(fmt, args...) if (debug_alternative) \ | ||
32 | printk(KERN_DEBUG fmt, args) | ||
33 | |||
34 | #ifdef GENERIC_NOP1 | ||
14 | /* Use inline assembly to define this because the nops are defined | 35 | /* Use inline assembly to define this because the nops are defined |
15 | as inline assembly strings in the include files and we cannot | 36 | as inline assembly strings in the include files and we cannot |
16 | get them easily into strings. */ | 37 | get them easily into strings. */ |
17 | asm("\t.data\nintelnops: " | 38 | asm("\t.data\nintelnops: " |
18 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 | 39 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 |
19 | GENERIC_NOP7 GENERIC_NOP8); | 40 | GENERIC_NOP7 GENERIC_NOP8); |
20 | asm("\t.data\nk8nops: " | 41 | extern unsigned char intelnops[]; |
21 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 | ||
22 | K8_NOP7 K8_NOP8); | ||
23 | asm("\t.data\nk7nops: " | ||
24 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 | ||
25 | K7_NOP7 K7_NOP8); | ||
26 | |||
27 | extern unsigned char intelnops[], k8nops[], k7nops[]; | ||
28 | static unsigned char *intel_nops[ASM_NOP_MAX+1] = { | 42 | static unsigned char *intel_nops[ASM_NOP_MAX+1] = { |
29 | NULL, | 43 | NULL, |
30 | intelnops, | 44 | intelnops, |
@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = { | |||
36 | intelnops + 1 + 2 + 3 + 4 + 5 + 6, | 50 | intelnops + 1 + 2 + 3 + 4 + 5 + 6, |
37 | intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 51 | intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
38 | }; | 52 | }; |
53 | #endif | ||
54 | |||
55 | #ifdef K8_NOP1 | ||
56 | asm("\t.data\nk8nops: " | ||
57 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 | ||
58 | K8_NOP7 K8_NOP8); | ||
59 | extern unsigned char k8nops[]; | ||
39 | static unsigned char *k8_nops[ASM_NOP_MAX+1] = { | 60 | static unsigned char *k8_nops[ASM_NOP_MAX+1] = { |
40 | NULL, | 61 | NULL, |
41 | k8nops, | 62 | k8nops, |
@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = { | |||
47 | k8nops + 1 + 2 + 3 + 4 + 5 + 6, | 68 | k8nops + 1 + 2 + 3 + 4 + 5 + 6, |
48 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 69 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
49 | }; | 70 | }; |
71 | #endif | ||
72 | |||
73 | #ifdef K7_NOP1 | ||
74 | asm("\t.data\nk7nops: " | ||
75 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 | ||
76 | K7_NOP7 K7_NOP8); | ||
77 | extern unsigned char k7nops[]; | ||
50 | static unsigned char *k7_nops[ASM_NOP_MAX+1] = { | 78 | static unsigned char *k7_nops[ASM_NOP_MAX+1] = { |
51 | NULL, | 79 | NULL, |
52 | k7nops, | 80 | k7nops, |
@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = { | |||
58 | k7nops + 1 + 2 + 3 + 4 + 5 + 6, | 86 | k7nops + 1 + 2 + 3 + 4 + 5 + 6, |
59 | k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 87 | k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
60 | }; | 88 | }; |
89 | #endif | ||
90 | |||
91 | #ifdef CONFIG_X86_64 | ||
92 | |||
93 | extern char __vsyscall_0; | ||
94 | static inline unsigned char** find_nop_table(void) | ||
95 | { | ||
96 | return k8_nops; | ||
97 | } | ||
98 | |||
99 | #else /* CONFIG_X86_64 */ | ||
100 | |||
61 | static struct nop { | 101 | static struct nop { |
62 | int cpuid; | 102 | int cpuid; |
63 | unsigned char **noptable; | 103 | unsigned char **noptable; |
@@ -67,14 +107,6 @@ static struct nop { | |||
67 | { -1, NULL } | 107 | { -1, NULL } |
68 | }; | 108 | }; |
69 | 109 | ||
70 | |||
71 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | ||
72 | extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; | ||
73 | extern u8 *__smp_locks[], *__smp_locks_end[]; | ||
74 | |||
75 | extern u8 __smp_alt_begin[], __smp_alt_end[]; | ||
76 | |||
77 | |||
78 | static unsigned char** find_nop_table(void) | 110 | static unsigned char** find_nop_table(void) |
79 | { | 111 | { |
80 | unsigned char **noptable = intel_nops; | 112 | unsigned char **noptable = intel_nops; |
@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void) | |||
89 | return noptable; | 121 | return noptable; |
90 | } | 122 | } |
91 | 123 | ||
124 | #endif /* CONFIG_X86_64 */ | ||
125 | |||
126 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | ||
127 | extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; | ||
128 | extern u8 *__smp_locks[], *__smp_locks_end[]; | ||
129 | |||
130 | extern u8 __smp_alt_begin[], __smp_alt_end[]; | ||
131 | |||
92 | /* Replace instructions with better alternatives for this CPU type. | 132 | /* Replace instructions with better alternatives for this CPU type. |
93 | This runs before SMP is initialized to avoid SMP problems with | 133 | This runs before SMP is initialized to avoid SMP problems with |
94 | self modifying code. This implies that assymetric systems where | 134 | self modifying code. This implies that assymetric systems where |
@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) | |||
99 | { | 139 | { |
100 | unsigned char **noptable = find_nop_table(); | 140 | unsigned char **noptable = find_nop_table(); |
101 | struct alt_instr *a; | 141 | struct alt_instr *a; |
142 | u8 *instr; | ||
102 | int diff, i, k; | 143 | int diff, i, k; |
103 | 144 | ||
104 | DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); | 145 | DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); |
@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) | |||
106 | BUG_ON(a->replacementlen > a->instrlen); | 147 | BUG_ON(a->replacementlen > a->instrlen); |
107 | if (!boot_cpu_has(a->cpuid)) | 148 | if (!boot_cpu_has(a->cpuid)) |
108 | continue; | 149 | continue; |
109 | memcpy(a->instr, a->replacement, a->replacementlen); | 150 | instr = a->instr; |
151 | #ifdef CONFIG_X86_64 | ||
152 | /* vsyscall code is not mapped yet. resolve it manually. */ | ||
153 | if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { | ||
154 | instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); | ||
155 | DPRINTK("%s: vsyscall fixup: %p => %p\n", | ||
156 | __FUNCTION__, a->instr, instr); | ||
157 | } | ||
158 | #endif | ||
159 | memcpy(instr, a->replacement, a->replacementlen); | ||
110 | diff = a->instrlen - a->replacementlen; | 160 | diff = a->instrlen - a->replacementlen; |
111 | /* Pad the rest with nops */ | 161 | /* Pad the rest with nops */ |
112 | for (i = a->replacementlen; diff > 0; diff -= k, i += k) { | 162 | for (i = a->replacementlen; diff > 0; diff -= k, i += k) { |
@@ -186,14 +236,6 @@ struct smp_alt_module { | |||
186 | static LIST_HEAD(smp_alt_modules); | 236 | static LIST_HEAD(smp_alt_modules); |
187 | static DEFINE_SPINLOCK(smp_alt); | 237 | static DEFINE_SPINLOCK(smp_alt); |
188 | 238 | ||
189 | static int smp_alt_once = 0; | ||
190 | static int __init bootonly(char *str) | ||
191 | { | ||
192 | smp_alt_once = 1; | ||
193 | return 1; | ||
194 | } | ||
195 | __setup("smp-alt-boot", bootonly); | ||
196 | |||
197 | void alternatives_smp_module_add(struct module *mod, char *name, | 239 | void alternatives_smp_module_add(struct module *mod, char *name, |
198 | void *locks, void *locks_end, | 240 | void *locks, void *locks_end, |
199 | void *text, void *text_end) | 241 | void *text, void *text_end) |
@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name, | |||
201 | struct smp_alt_module *smp; | 243 | struct smp_alt_module *smp; |
202 | unsigned long flags; | 244 | unsigned long flags; |
203 | 245 | ||
246 | if (no_replacement) | ||
247 | return; | ||
248 | |||
204 | if (smp_alt_once) { | 249 | if (smp_alt_once) { |
205 | if (boot_cpu_has(X86_FEATURE_UP)) | 250 | if (boot_cpu_has(X86_FEATURE_UP)) |
206 | alternatives_smp_unlock(locks, locks_end, | 251 | alternatives_smp_unlock(locks, locks_end, |
@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod) | |||
235 | struct smp_alt_module *item; | 280 | struct smp_alt_module *item; |
236 | unsigned long flags; | 281 | unsigned long flags; |
237 | 282 | ||
238 | if (smp_alt_once) | 283 | if (no_replacement || smp_alt_once) |
239 | return; | 284 | return; |
240 | 285 | ||
241 | spin_lock_irqsave(&smp_alt, flags); | 286 | spin_lock_irqsave(&smp_alt, flags); |
@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp) | |||
256 | struct smp_alt_module *mod; | 301 | struct smp_alt_module *mod; |
257 | unsigned long flags; | 302 | unsigned long flags; |
258 | 303 | ||
259 | if (smp_alt_once) | 304 | if (no_replacement || smp_alt_once) |
260 | return; | 305 | return; |
261 | BUG_ON(!smp && (num_online_cpus() > 1)); | 306 | BUG_ON(!smp && (num_online_cpus() > 1)); |
262 | 307 | ||
@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp) | |||
285 | 330 | ||
286 | void __init alternative_instructions(void) | 331 | void __init alternative_instructions(void) |
287 | { | 332 | { |
333 | if (no_replacement) { | ||
334 | printk(KERN_INFO "(SMP-)alternatives turned off\n"); | ||
335 | free_init_pages("SMP alternatives", | ||
336 | (unsigned long)__smp_alt_begin, | ||
337 | (unsigned long)__smp_alt_end); | ||
338 | return; | ||
339 | } | ||
288 | apply_alternatives(__alt_instructions, __alt_instructions_end); | 340 | apply_alternatives(__alt_instructions, __alt_instructions_end); |
289 | 341 | ||
290 | /* switch to patch-once-at-boottime-only mode and free the | 342 | /* switch to patch-once-at-boottime-only mode and free the |
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 5ab59c12335b..7ce09492fc0c 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/arch_hooks.h> | 36 | #include <asm/arch_hooks.h> |
37 | #include <asm/hpet.h> | 37 | #include <asm/hpet.h> |
38 | #include <asm/i8253.h> | 38 | #include <asm/i8253.h> |
39 | #include <asm/nmi.h> | ||
39 | 40 | ||
40 | #include <mach_apic.h> | 41 | #include <mach_apic.h> |
41 | #include <mach_apicdef.h> | 42 | #include <mach_apicdef.h> |
@@ -156,7 +157,7 @@ void clear_local_APIC(void) | |||
156 | maxlvt = get_maxlvt(); | 157 | maxlvt = get_maxlvt(); |
157 | 158 | ||
158 | /* | 159 | /* |
159 | * Masking an LVT entry on a P6 can trigger a local APIC error | 160 | * Masking an LVT entry can trigger a local APIC error |
160 | * if the vector is zero. Mask LVTERR first to prevent this. | 161 | * if the vector is zero. Mask LVTERR first to prevent this. |
161 | */ | 162 | */ |
162 | if (maxlvt >= 3) { | 163 | if (maxlvt >= 3) { |
@@ -1117,7 +1118,18 @@ void disable_APIC_timer(void) | |||
1117 | unsigned long v; | 1118 | unsigned long v; |
1118 | 1119 | ||
1119 | v = apic_read(APIC_LVTT); | 1120 | v = apic_read(APIC_LVTT); |
1120 | apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); | 1121 | /* |
1122 | * When an illegal vector value (0-15) is written to an LVT | ||
1123 | * entry and delivery mode is Fixed, the APIC may signal an | ||
1124 | * illegal vector error, with out regard to whether the mask | ||
1125 | * bit is set or whether an interrupt is actually seen on input. | ||
1126 | * | ||
1127 | * Boot sequence might call this function when the LVTT has | ||
1128 | * '0' vector value. So make sure vector field is set to | ||
1129 | * valid value. | ||
1130 | */ | ||
1131 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
1132 | apic_write_around(APIC_LVTT, v); | ||
1121 | } | 1133 | } |
1122 | } | 1134 | } |
1123 | 1135 | ||
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 9e819eb68229..7c5729d1fd06 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c | |||
@@ -764,9 +764,9 @@ static int apm_do_idle(void) | |||
764 | int idled = 0; | 764 | int idled = 0; |
765 | int polling; | 765 | int polling; |
766 | 766 | ||
767 | polling = test_thread_flag(TIF_POLLING_NRFLAG); | 767 | polling = !!(current_thread_info()->status & TS_POLLING); |
768 | if (polling) { | 768 | if (polling) { |
769 | clear_thread_flag(TIF_POLLING_NRFLAG); | 769 | current_thread_info()->status &= ~TS_POLLING; |
770 | smp_mb__after_clear_bit(); | 770 | smp_mb__after_clear_bit(); |
771 | } | 771 | } |
772 | if (!need_resched()) { | 772 | if (!need_resched()) { |
@@ -774,7 +774,7 @@ static int apm_do_idle(void) | |||
774 | ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); | 774 | ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); |
775 | } | 775 | } |
776 | if (polling) | 776 | if (polling) |
777 | set_thread_flag(TIF_POLLING_NRFLAG); | 777 | current_thread_info()->status |= TS_POLLING; |
778 | 778 | ||
779 | if (!idled) | 779 | if (!idled) |
780 | return 0; | 780 | return 0; |
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 36d66e2077d0..c80271f8f084 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * to extract and format the required data. | 4 | * to extract and format the required data. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/crypto.h> | ||
7 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
8 | #include <linux/signal.h> | 9 | #include <linux/signal.h> |
9 | #include <linux/personality.h> | 10 | #include <linux/personality.h> |
@@ -13,6 +14,7 @@ | |||
13 | #include <asm/fixmap.h> | 14 | #include <asm/fixmap.h> |
14 | #include <asm/processor.h> | 15 | #include <asm/processor.h> |
15 | #include <asm/thread_info.h> | 16 | #include <asm/thread_info.h> |
17 | #include <asm/elf.h> | ||
16 | 18 | ||
17 | #define DEFINE(sym, val) \ | 19 | #define DEFINE(sym, val) \ |
18 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) | 20 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) |
@@ -53,6 +55,7 @@ void foo(void) | |||
53 | OFFSET(TI_preempt_count, thread_info, preempt_count); | 55 | OFFSET(TI_preempt_count, thread_info, preempt_count); |
54 | OFFSET(TI_addr_limit, thread_info, addr_limit); | 56 | OFFSET(TI_addr_limit, thread_info, addr_limit); |
55 | OFFSET(TI_restart_block, thread_info, restart_block); | 57 | OFFSET(TI_restart_block, thread_info, restart_block); |
58 | OFFSET(TI_sysenter_return, thread_info, sysenter_return); | ||
56 | BLANK(); | 59 | BLANK(); |
57 | 60 | ||
58 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); | 61 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); |
@@ -68,5 +71,7 @@ void foo(void) | |||
68 | sizeof(struct tss_struct)); | 71 | sizeof(struct tss_struct)); |
69 | 72 | ||
70 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | 73 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); |
71 | DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); | 74 | DEFINE(VDSO_PRELINK, VDSO_PRELINK); |
75 | |||
76 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | ||
72 | } | 77 | } |
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 786d1a57048b..e6a2d6b80cda 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c | |||
@@ -224,22 +224,26 @@ static void __init init_amd(struct cpuinfo_x86 *c) | |||
224 | 224 | ||
225 | #ifdef CONFIG_X86_HT | 225 | #ifdef CONFIG_X86_HT |
226 | /* | 226 | /* |
227 | * On a AMD dual core setup the lower bits of the APIC id | 227 | * On a AMD multi core setup the lower bits of the APIC id |
228 | * distingush the cores. Assumes number of cores is a power | 228 | * distingush the cores. |
229 | * of two. | ||
230 | */ | 229 | */ |
231 | if (c->x86_max_cores > 1) { | 230 | if (c->x86_max_cores > 1) { |
232 | int cpu = smp_processor_id(); | 231 | int cpu = smp_processor_id(); |
233 | unsigned bits = 0; | 232 | unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; |
234 | while ((1 << bits) < c->x86_max_cores) | 233 | |
235 | bits++; | 234 | if (bits == 0) { |
236 | cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); | 235 | while ((1 << bits) < c->x86_max_cores) |
237 | phys_proc_id[cpu] >>= bits; | 236 | bits++; |
237 | } | ||
238 | c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1); | ||
239 | c->phys_proc_id >>= bits; | ||
238 | printk(KERN_INFO "CPU %d(%d) -> Core %d\n", | 240 | printk(KERN_INFO "CPU %d(%d) -> Core %d\n", |
239 | cpu, c->x86_max_cores, cpu_core_id[cpu]); | 241 | cpu, c->x86_max_cores, c->cpu_core_id); |
240 | } | 242 | } |
241 | #endif | 243 | #endif |
242 | 244 | ||
245 | if (cpuid_eax(0x80000000) >= 0x80000006) | ||
246 | num_cache_leaves = 3; | ||
243 | } | 247 | } |
244 | 248 | ||
245 | static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) | 249 | static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) |
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 44f2c5f2dda1..70c87de582c7 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -294,7 +294,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) | |||
294 | if (c->x86 >= 0x6) | 294 | if (c->x86 >= 0x6) |
295 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | 295 | c->x86_model += ((tfms >> 16) & 0xF) << 4; |
296 | c->x86_mask = tfms & 15; | 296 | c->x86_mask = tfms & 15; |
297 | #ifdef CONFIG_SMP | 297 | #ifdef CONFIG_X86_HT |
298 | c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); | 298 | c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); |
299 | #else | 299 | #else |
300 | c->apicid = (ebx >> 24) & 0xFF; | 300 | c->apicid = (ebx >> 24) & 0xFF; |
@@ -319,7 +319,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) | |||
319 | early_intel_workaround(c); | 319 | early_intel_workaround(c); |
320 | 320 | ||
321 | #ifdef CONFIG_X86_HT | 321 | #ifdef CONFIG_X86_HT |
322 | phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; | 322 | c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; |
323 | #endif | 323 | #endif |
324 | } | 324 | } |
325 | 325 | ||
@@ -477,11 +477,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
477 | { | 477 | { |
478 | u32 eax, ebx, ecx, edx; | 478 | u32 eax, ebx, ecx, edx; |
479 | int index_msb, core_bits; | 479 | int index_msb, core_bits; |
480 | int cpu = smp_processor_id(); | ||
481 | 480 | ||
482 | cpuid(1, &eax, &ebx, &ecx, &edx); | 481 | cpuid(1, &eax, &ebx, &ecx, &edx); |
483 | 482 | ||
484 | |||
485 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 483 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
486 | return; | 484 | return; |
487 | 485 | ||
@@ -492,16 +490,17 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
492 | } else if (smp_num_siblings > 1 ) { | 490 | } else if (smp_num_siblings > 1 ) { |
493 | 491 | ||
494 | if (smp_num_siblings > NR_CPUS) { | 492 | if (smp_num_siblings > NR_CPUS) { |
495 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); | 493 | printk(KERN_WARNING "CPU: Unsupported number of the " |
494 | "siblings %d", smp_num_siblings); | ||
496 | smp_num_siblings = 1; | 495 | smp_num_siblings = 1; |
497 | return; | 496 | return; |
498 | } | 497 | } |
499 | 498 | ||
500 | index_msb = get_count_order(smp_num_siblings); | 499 | index_msb = get_count_order(smp_num_siblings); |
501 | phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); | 500 | c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); |
502 | 501 | ||
503 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 502 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
504 | phys_proc_id[cpu]); | 503 | c->phys_proc_id); |
505 | 504 | ||
506 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | 505 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
507 | 506 | ||
@@ -509,12 +508,12 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
509 | 508 | ||
510 | core_bits = get_count_order(c->x86_max_cores); | 509 | core_bits = get_count_order(c->x86_max_cores); |
511 | 510 | ||
512 | cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & | 511 | c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & |
513 | ((1 << core_bits) - 1); | 512 | ((1 << core_bits) - 1); |
514 | 513 | ||
515 | if (c->x86_max_cores > 1) | 514 | if (c->x86_max_cores > 1) |
516 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 515 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
517 | cpu_core_id[cpu]); | 516 | c->cpu_core_id); |
518 | } | 517 | } |
519 | } | 518 | } |
520 | #endif | 519 | #endif |
@@ -613,6 +612,12 @@ void __cpuinit cpu_init(void) | |||
613 | set_in_cr4(X86_CR4_TSD); | 612 | set_in_cr4(X86_CR4_TSD); |
614 | } | 613 | } |
615 | 614 | ||
615 | /* The CPU hotplug case */ | ||
616 | if (cpu_gdt_descr->address) { | ||
617 | gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
618 | memset(gdt, 0, PAGE_SIZE); | ||
619 | goto old_gdt; | ||
620 | } | ||
616 | /* | 621 | /* |
617 | * This is a horrible hack to allocate the GDT. The problem | 622 | * This is a horrible hack to allocate the GDT. The problem |
618 | * is that cpu_init() is called really early for the boot CPU | 623 | * is that cpu_init() is called really early for the boot CPU |
@@ -631,7 +636,7 @@ void __cpuinit cpu_init(void) | |||
631 | local_irq_enable(); | 636 | local_irq_enable(); |
632 | } | 637 | } |
633 | } | 638 | } |
634 | 639 | old_gdt: | |
635 | /* | 640 | /* |
636 | * Initialize the per-CPU GDT with the boot GDT, | 641 | * Initialize the per-CPU GDT with the boot GDT, |
637 | * and set up the GDT descriptor: | 642 | * and set up the GDT descriptor: |
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index fc32c8028e24..f03b7f94c304 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c | |||
@@ -354,7 +354,7 @@ static void __init init_nsc(struct cpuinfo_x86 *c) | |||
354 | * This function only handles the GX processor, and kicks every | 354 | * This function only handles the GX processor, and kicks every |
355 | * thing else to the Cyrix init function above - that should | 355 | * thing else to the Cyrix init function above - that should |
356 | * cover any processors that might have been branded differently | 356 | * cover any processors that might have been branded differently |
357 | * after NSC aquired Cyrix. | 357 | * after NSC acquired Cyrix. |
358 | * | 358 | * |
359 | * If this breaks your GX1 horribly, please e-mail | 359 | * If this breaks your GX1 horribly, please e-mail |
360 | * info-linux@ldcmail.amd.com to tell us. | 360 | * info-linux@ldcmail.amd.com to tell us. |
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 5386b29bb5a5..10afc645c540 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c | |||
@@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
122 | 122 | ||
123 | select_idle_routine(c); | 123 | select_idle_routine(c); |
124 | l2 = init_intel_cacheinfo(c); | 124 | l2 = init_intel_cacheinfo(c); |
125 | if (c->cpuid_level > 9 ) { | ||
126 | unsigned eax = cpuid_eax(10); | ||
127 | /* Check for version and the number of counters */ | ||
128 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) | ||
129 | set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); | ||
130 | } | ||
125 | 131 | ||
126 | /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ | 132 | /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ |
127 | if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) | 133 | if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) |
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index c8547a6fa7e6..e9f0b928b0a9 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * Changes: | 4 | * Changes: |
5 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) | 5 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) |
6 | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. | 6 | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. |
7 | * Andi Kleen : CPUID4 emulation on AMD. | ||
7 | */ | 8 | */ |
8 | 9 | ||
9 | #include <linux/init.h> | 10 | #include <linux/init.h> |
@@ -130,25 +131,111 @@ struct _cpuid4_info { | |||
130 | cpumask_t shared_cpu_map; | 131 | cpumask_t shared_cpu_map; |
131 | }; | 132 | }; |
132 | 133 | ||
133 | static unsigned short num_cache_leaves; | 134 | unsigned short num_cache_leaves; |
135 | |||
136 | /* AMD doesn't have CPUID4. Emulate it here to report the same | ||
137 | information to the user. This makes some assumptions about the machine: | ||
138 | No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs. | ||
139 | |||
140 | In theory the TLBs could be reported as fake type (they are in "dummy"). | ||
141 | Maybe later */ | ||
142 | union l1_cache { | ||
143 | struct { | ||
144 | unsigned line_size : 8; | ||
145 | unsigned lines_per_tag : 8; | ||
146 | unsigned assoc : 8; | ||
147 | unsigned size_in_kb : 8; | ||
148 | }; | ||
149 | unsigned val; | ||
150 | }; | ||
151 | |||
152 | union l2_cache { | ||
153 | struct { | ||
154 | unsigned line_size : 8; | ||
155 | unsigned lines_per_tag : 4; | ||
156 | unsigned assoc : 4; | ||
157 | unsigned size_in_kb : 16; | ||
158 | }; | ||
159 | unsigned val; | ||
160 | }; | ||
161 | |||
162 | static const unsigned short assocs[] = { | ||
163 | [1] = 1, [2] = 2, [4] = 4, [6] = 8, | ||
164 | [8] = 16, | ||
165 | [0xf] = 0xffff // ?? | ||
166 | }; | ||
167 | static const unsigned char levels[] = { 1, 1, 2 }; | ||
168 | static const unsigned char types[] = { 1, 2, 3 }; | ||
169 | |||
170 | static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | ||
171 | union _cpuid4_leaf_ebx *ebx, | ||
172 | union _cpuid4_leaf_ecx *ecx) | ||
173 | { | ||
174 | unsigned dummy; | ||
175 | unsigned line_size, lines_per_tag, assoc, size_in_kb; | ||
176 | union l1_cache l1i, l1d; | ||
177 | union l2_cache l2; | ||
178 | |||
179 | eax->full = 0; | ||
180 | ebx->full = 0; | ||
181 | ecx->full = 0; | ||
182 | |||
183 | cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); | ||
184 | cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy); | ||
185 | |||
186 | if (leaf > 2 || !l1d.val || !l1i.val || !l2.val) | ||
187 | return; | ||
188 | |||
189 | eax->split.is_self_initializing = 1; | ||
190 | eax->split.type = types[leaf]; | ||
191 | eax->split.level = levels[leaf]; | ||
192 | eax->split.num_threads_sharing = 0; | ||
193 | eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; | ||
194 | |||
195 | if (leaf <= 1) { | ||
196 | union l1_cache *l1 = leaf == 0 ? &l1d : &l1i; | ||
197 | assoc = l1->assoc; | ||
198 | line_size = l1->line_size; | ||
199 | lines_per_tag = l1->lines_per_tag; | ||
200 | size_in_kb = l1->size_in_kb; | ||
201 | } else { | ||
202 | assoc = l2.assoc; | ||
203 | line_size = l2.line_size; | ||
204 | lines_per_tag = l2.lines_per_tag; | ||
205 | /* cpu_data has errata corrections for K7 applied */ | ||
206 | size_in_kb = current_cpu_data.x86_cache_size; | ||
207 | } | ||
208 | |||
209 | if (assoc == 0xf) | ||
210 | eax->split.is_fully_associative = 1; | ||
211 | ebx->split.coherency_line_size = line_size - 1; | ||
212 | ebx->split.ways_of_associativity = assocs[assoc] - 1; | ||
213 | ebx->split.physical_line_partition = lines_per_tag - 1; | ||
214 | ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / | ||
215 | (ebx->split.ways_of_associativity + 1) - 1; | ||
216 | } | ||
134 | 217 | ||
135 | static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) | 218 | static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) |
136 | { | 219 | { |
137 | unsigned int eax, ebx, ecx, edx; | 220 | union _cpuid4_leaf_eax eax; |
138 | union _cpuid4_leaf_eax cache_eax; | 221 | union _cpuid4_leaf_ebx ebx; |
222 | union _cpuid4_leaf_ecx ecx; | ||
223 | unsigned edx; | ||
139 | 224 | ||
140 | cpuid_count(4, index, &eax, &ebx, &ecx, &edx); | 225 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) |
141 | cache_eax.full = eax; | 226 | amd_cpuid4(index, &eax, &ebx, &ecx); |
142 | if (cache_eax.split.type == CACHE_TYPE_NULL) | 227 | else |
228 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | ||
229 | if (eax.split.type == CACHE_TYPE_NULL) | ||
143 | return -EIO; /* better error ? */ | 230 | return -EIO; /* better error ? */ |
144 | 231 | ||
145 | this_leaf->eax.full = eax; | 232 | this_leaf->eax = eax; |
146 | this_leaf->ebx.full = ebx; | 233 | this_leaf->ebx = ebx; |
147 | this_leaf->ecx.full = ecx; | 234 | this_leaf->ecx = ecx; |
148 | this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) * | 235 | this_leaf->size = (ecx.split.number_of_sets + 1) * |
149 | (this_leaf->ebx.split.coherency_line_size + 1) * | 236 | (ebx.split.coherency_line_size + 1) * |
150 | (this_leaf->ebx.split.physical_line_partition + 1) * | 237 | (ebx.split.physical_line_partition + 1) * |
151 | (this_leaf->ebx.split.ways_of_associativity + 1); | 238 | (ebx.split.ways_of_associativity + 1); |
152 | return 0; | 239 | return 0; |
153 | } | 240 | } |
154 | 241 | ||
@@ -174,7 +261,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
174 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ | 261 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ |
175 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ | 262 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ |
176 | unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; | 263 | unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; |
177 | #ifdef CONFIG_SMP | 264 | #ifdef CONFIG_X86_HT |
178 | unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); | 265 | unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); |
179 | #endif | 266 | #endif |
180 | 267 | ||
@@ -296,14 +383,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
296 | 383 | ||
297 | if (new_l2) { | 384 | if (new_l2) { |
298 | l2 = new_l2; | 385 | l2 = new_l2; |
299 | #ifdef CONFIG_SMP | 386 | #ifdef CONFIG_X86_HT |
300 | cpu_llc_id[cpu] = l2_id; | 387 | cpu_llc_id[cpu] = l2_id; |
301 | #endif | 388 | #endif |
302 | } | 389 | } |
303 | 390 | ||
304 | if (new_l3) { | 391 | if (new_l3) { |
305 | l3 = new_l3; | 392 | l3 = new_l3; |
306 | #ifdef CONFIG_SMP | 393 | #ifdef CONFIG_X86_HT |
307 | cpu_llc_id[cpu] = l3_id; | 394 | cpu_llc_id[cpu] = l3_id; |
308 | #endif | 395 | #endif |
309 | } | 396 | } |
@@ -642,7 +729,7 @@ static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) | |||
642 | return; | 729 | return; |
643 | } | 730 | } |
644 | 731 | ||
645 | static int cacheinfo_cpu_callback(struct notifier_block *nfb, | 732 | static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, |
646 | unsigned long action, void *hcpu) | 733 | unsigned long action, void *hcpu) |
647 | { | 734 | { |
648 | unsigned int cpu = (unsigned long)hcpu; | 735 | unsigned int cpu = (unsigned long)hcpu; |
@@ -660,7 +747,7 @@ static int cacheinfo_cpu_callback(struct notifier_block *nfb, | |||
660 | return NOTIFY_OK; | 747 | return NOTIFY_OK; |
661 | } | 748 | } |
662 | 749 | ||
663 | static struct notifier_block cacheinfo_cpu_notifier = | 750 | static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = |
664 | { | 751 | { |
665 | .notifier_call = cacheinfo_cpu_callback, | 752 | .notifier_call = cacheinfo_cpu_callback, |
666 | }; | 753 | }; |
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index a19fcb262dbb..f54a15268ed7 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c | |||
@@ -18,7 +18,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
18 | * applications want to get the raw CPUID data, they should access | 18 | * applications want to get the raw CPUID data, they should access |
19 | * /dev/cpu/<cpu_nr>/cpuid instead. | 19 | * /dev/cpu/<cpu_nr>/cpuid instead. |
20 | */ | 20 | */ |
21 | static char *x86_cap_flags[] = { | 21 | static const char * const x86_cap_flags[] = { |
22 | /* Intel-defined */ | 22 | /* Intel-defined */ |
23 | "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", | 23 | "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", |
24 | "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", | 24 | "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", |
@@ -62,7 +62,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
62 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 62 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
63 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 63 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
64 | }; | 64 | }; |
65 | static char *x86_power_flags[] = { | 65 | static const char * const x86_power_flags[] = { |
66 | "ts", /* temperature sensor */ | 66 | "ts", /* temperature sensor */ |
67 | "fid", /* frequency id control */ | 67 | "fid", /* frequency id control */ |
68 | "vid", /* voltage id control */ | 68 | "vid", /* voltage id control */ |
@@ -109,9 +109,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
109 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); | 109 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); |
110 | #ifdef CONFIG_X86_HT | 110 | #ifdef CONFIG_X86_HT |
111 | if (c->x86_max_cores * smp_num_siblings > 1) { | 111 | if (c->x86_max_cores * smp_num_siblings > 1) { |
112 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); | 112 | seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); |
113 | seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); | 113 | seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); |
114 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); | 114 | seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); |
115 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); | 115 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); |
116 | } | 116 | } |
117 | #endif | 117 | #endif |
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 1d9a4abcdfc7..f6dfa9fb675c 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c | |||
@@ -183,7 +183,7 @@ static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long ac | |||
183 | return NOTIFY_OK; | 183 | return NOTIFY_OK; |
184 | } | 184 | } |
185 | 185 | ||
186 | static struct notifier_block cpuid_class_cpu_notifier = | 186 | static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = |
187 | { | 187 | { |
188 | .notifier_call = cpuid_class_cpu_callback, | 188 | .notifier_call = cpuid_class_cpu_callback, |
189 | }; | 189 | }; |
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 21dc1bbb8067..48f0f62f781c 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c | |||
@@ -120,14 +120,9 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) | |||
120 | return 1; | 120 | return 1; |
121 | } | 121 | } |
122 | 122 | ||
123 | /* | ||
124 | * By using the NMI code instead of a vector we just sneak thru the | ||
125 | * word generator coming out with just what we want. AND it does | ||
126 | * not matter if clustered_apic_mode is set or not. | ||
127 | */ | ||
128 | static void smp_send_nmi_allbutself(void) | 123 | static void smp_send_nmi_allbutself(void) |
129 | { | 124 | { |
130 | send_IPI_allbutself(APIC_DM_NMI); | 125 | send_IPI_allbutself(NMI_VECTOR); |
131 | } | 126 | } |
132 | 127 | ||
133 | static void nmi_shootdown_cpus(void) | 128 | static void nmi_shootdown_cpus(void) |
@@ -163,7 +158,7 @@ static void nmi_shootdown_cpus(void) | |||
163 | void machine_crash_shutdown(struct pt_regs *regs) | 158 | void machine_crash_shutdown(struct pt_regs *regs) |
164 | { | 159 | { |
165 | /* This function is only called after the system | 160 | /* This function is only called after the system |
166 | * has paniced or is otherwise in a critical state. | 161 | * has panicked or is otherwise in a critical state. |
167 | * The minimum amount of code to allow a kexec'd kernel | 162 | * The minimum amount of code to allow a kexec'd kernel |
168 | * to run successfully needs to happen here. | 163 | * to run successfully needs to happen here. |
169 | * | 164 | * |
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index cfc683f153b9..fbdb933251b6 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <asm/smp.h> | 48 | #include <asm/smp.h> |
49 | #include <asm/page.h> | 49 | #include <asm/page.h> |
50 | #include <asm/desc.h> | 50 | #include <asm/desc.h> |
51 | #include <asm/dwarf2.h> | ||
51 | #include "irq_vectors.h" | 52 | #include "irq_vectors.h" |
52 | 53 | ||
53 | #define nr_syscalls ((syscall_table_size)/4) | 54 | #define nr_syscalls ((syscall_table_size)/4) |
@@ -82,34 +83,76 @@ VM_MASK = 0x00020000 | |||
82 | #define resume_kernel restore_nocheck | 83 | #define resume_kernel restore_nocheck |
83 | #endif | 84 | #endif |
84 | 85 | ||
86 | #ifdef CONFIG_VM86 | ||
87 | #define resume_userspace_sig check_userspace | ||
88 | #else | ||
89 | #define resume_userspace_sig resume_userspace | ||
90 | #endif | ||
91 | |||
85 | #define SAVE_ALL \ | 92 | #define SAVE_ALL \ |
86 | cld; \ | 93 | cld; \ |
87 | pushl %es; \ | 94 | pushl %es; \ |
95 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
96 | /*CFI_REL_OFFSET es, 0;*/\ | ||
88 | pushl %ds; \ | 97 | pushl %ds; \ |
98 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
99 | /*CFI_REL_OFFSET ds, 0;*/\ | ||
89 | pushl %eax; \ | 100 | pushl %eax; \ |
101 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
102 | CFI_REL_OFFSET eax, 0;\ | ||
90 | pushl %ebp; \ | 103 | pushl %ebp; \ |
104 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
105 | CFI_REL_OFFSET ebp, 0;\ | ||
91 | pushl %edi; \ | 106 | pushl %edi; \ |
107 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
108 | CFI_REL_OFFSET edi, 0;\ | ||
92 | pushl %esi; \ | 109 | pushl %esi; \ |
110 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
111 | CFI_REL_OFFSET esi, 0;\ | ||
93 | pushl %edx; \ | 112 | pushl %edx; \ |
113 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
114 | CFI_REL_OFFSET edx, 0;\ | ||
94 | pushl %ecx; \ | 115 | pushl %ecx; \ |
116 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
117 | CFI_REL_OFFSET ecx, 0;\ | ||
95 | pushl %ebx; \ | 118 | pushl %ebx; \ |
119 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
120 | CFI_REL_OFFSET ebx, 0;\ | ||
96 | movl $(__USER_DS), %edx; \ | 121 | movl $(__USER_DS), %edx; \ |
97 | movl %edx, %ds; \ | 122 | movl %edx, %ds; \ |
98 | movl %edx, %es; | 123 | movl %edx, %es; |
99 | 124 | ||
100 | #define RESTORE_INT_REGS \ | 125 | #define RESTORE_INT_REGS \ |
101 | popl %ebx; \ | 126 | popl %ebx; \ |
127 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
128 | CFI_RESTORE ebx;\ | ||
102 | popl %ecx; \ | 129 | popl %ecx; \ |
130 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
131 | CFI_RESTORE ecx;\ | ||
103 | popl %edx; \ | 132 | popl %edx; \ |
133 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
134 | CFI_RESTORE edx;\ | ||
104 | popl %esi; \ | 135 | popl %esi; \ |
136 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
137 | CFI_RESTORE esi;\ | ||
105 | popl %edi; \ | 138 | popl %edi; \ |
139 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
140 | CFI_RESTORE edi;\ | ||
106 | popl %ebp; \ | 141 | popl %ebp; \ |
107 | popl %eax | 142 | CFI_ADJUST_CFA_OFFSET -4;\ |
143 | CFI_RESTORE ebp;\ | ||
144 | popl %eax; \ | ||
145 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
146 | CFI_RESTORE eax | ||
108 | 147 | ||
109 | #define RESTORE_REGS \ | 148 | #define RESTORE_REGS \ |
110 | RESTORE_INT_REGS; \ | 149 | RESTORE_INT_REGS; \ |
111 | 1: popl %ds; \ | 150 | 1: popl %ds; \ |
151 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
152 | /*CFI_RESTORE ds;*/\ | ||
112 | 2: popl %es; \ | 153 | 2: popl %es; \ |
154 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
155 | /*CFI_RESTORE es;*/\ | ||
113 | .section .fixup,"ax"; \ | 156 | .section .fixup,"ax"; \ |
114 | 3: movl $0,(%esp); \ | 157 | 3: movl $0,(%esp); \ |
115 | jmp 1b; \ | 158 | jmp 1b; \ |
@@ -122,13 +165,43 @@ VM_MASK = 0x00020000 | |||
122 | .long 2b,4b; \ | 165 | .long 2b,4b; \ |
123 | .previous | 166 | .previous |
124 | 167 | ||
168 | #define RING0_INT_FRAME \ | ||
169 | CFI_STARTPROC simple;\ | ||
170 | CFI_DEF_CFA esp, 3*4;\ | ||
171 | /*CFI_OFFSET cs, -2*4;*/\ | ||
172 | CFI_OFFSET eip, -3*4 | ||
173 | |||
174 | #define RING0_EC_FRAME \ | ||
175 | CFI_STARTPROC simple;\ | ||
176 | CFI_DEF_CFA esp, 4*4;\ | ||
177 | /*CFI_OFFSET cs, -2*4;*/\ | ||
178 | CFI_OFFSET eip, -3*4 | ||
179 | |||
180 | #define RING0_PTREGS_FRAME \ | ||
181 | CFI_STARTPROC simple;\ | ||
182 | CFI_DEF_CFA esp, OLDESP-EBX;\ | ||
183 | /*CFI_OFFSET cs, CS-OLDESP;*/\ | ||
184 | CFI_OFFSET eip, EIP-OLDESP;\ | ||
185 | /*CFI_OFFSET es, ES-OLDESP;*/\ | ||
186 | /*CFI_OFFSET ds, DS-OLDESP;*/\ | ||
187 | CFI_OFFSET eax, EAX-OLDESP;\ | ||
188 | CFI_OFFSET ebp, EBP-OLDESP;\ | ||
189 | CFI_OFFSET edi, EDI-OLDESP;\ | ||
190 | CFI_OFFSET esi, ESI-OLDESP;\ | ||
191 | CFI_OFFSET edx, EDX-OLDESP;\ | ||
192 | CFI_OFFSET ecx, ECX-OLDESP;\ | ||
193 | CFI_OFFSET ebx, EBX-OLDESP | ||
125 | 194 | ||
126 | ENTRY(ret_from_fork) | 195 | ENTRY(ret_from_fork) |
196 | CFI_STARTPROC | ||
127 | pushl %eax | 197 | pushl %eax |
198 | CFI_ADJUST_CFA_OFFSET -4 | ||
128 | call schedule_tail | 199 | call schedule_tail |
129 | GET_THREAD_INFO(%ebp) | 200 | GET_THREAD_INFO(%ebp) |
130 | popl %eax | 201 | popl %eax |
202 | CFI_ADJUST_CFA_OFFSET -4 | ||
131 | jmp syscall_exit | 203 | jmp syscall_exit |
204 | CFI_ENDPROC | ||
132 | 205 | ||
133 | /* | 206 | /* |
134 | * Return to user mode is not as complex as all this looks, | 207 | * Return to user mode is not as complex as all this looks, |
@@ -139,10 +212,12 @@ ENTRY(ret_from_fork) | |||
139 | 212 | ||
140 | # userspace resumption stub bypassing syscall exit tracing | 213 | # userspace resumption stub bypassing syscall exit tracing |
141 | ALIGN | 214 | ALIGN |
215 | RING0_PTREGS_FRAME | ||
142 | ret_from_exception: | 216 | ret_from_exception: |
143 | preempt_stop | 217 | preempt_stop |
144 | ret_from_intr: | 218 | ret_from_intr: |
145 | GET_THREAD_INFO(%ebp) | 219 | GET_THREAD_INFO(%ebp) |
220 | check_userspace: | ||
146 | movl EFLAGS(%esp), %eax # mix EFLAGS and CS | 221 | movl EFLAGS(%esp), %eax # mix EFLAGS and CS |
147 | movb CS(%esp), %al | 222 | movb CS(%esp), %al |
148 | testl $(VM_MASK | 3), %eax | 223 | testl $(VM_MASK | 3), %eax |
@@ -171,20 +246,38 @@ need_resched: | |||
171 | call preempt_schedule_irq | 246 | call preempt_schedule_irq |
172 | jmp need_resched | 247 | jmp need_resched |
173 | #endif | 248 | #endif |
249 | CFI_ENDPROC | ||
174 | 250 | ||
175 | /* SYSENTER_RETURN points to after the "sysenter" instruction in | 251 | /* SYSENTER_RETURN points to after the "sysenter" instruction in |
176 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | 252 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ |
177 | 253 | ||
178 | # sysenter call handler stub | 254 | # sysenter call handler stub |
179 | ENTRY(sysenter_entry) | 255 | ENTRY(sysenter_entry) |
256 | CFI_STARTPROC simple | ||
257 | CFI_DEF_CFA esp, 0 | ||
258 | CFI_REGISTER esp, ebp | ||
180 | movl TSS_sysenter_esp0(%esp),%esp | 259 | movl TSS_sysenter_esp0(%esp),%esp |
181 | sysenter_past_esp: | 260 | sysenter_past_esp: |
182 | sti | 261 | sti |
183 | pushl $(__USER_DS) | 262 | pushl $(__USER_DS) |
263 | CFI_ADJUST_CFA_OFFSET 4 | ||
264 | /*CFI_REL_OFFSET ss, 0*/ | ||
184 | pushl %ebp | 265 | pushl %ebp |
266 | CFI_ADJUST_CFA_OFFSET 4 | ||
267 | CFI_REL_OFFSET esp, 0 | ||
185 | pushfl | 268 | pushfl |
269 | CFI_ADJUST_CFA_OFFSET 4 | ||
186 | pushl $(__USER_CS) | 270 | pushl $(__USER_CS) |
187 | pushl $SYSENTER_RETURN | 271 | CFI_ADJUST_CFA_OFFSET 4 |
272 | /*CFI_REL_OFFSET cs, 0*/ | ||
273 | /* | ||
274 | * Push current_thread_info()->sysenter_return to the stack. | ||
275 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | ||
276 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | ||
277 | */ | ||
278 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) | ||
279 | CFI_ADJUST_CFA_OFFSET 4 | ||
280 | CFI_REL_OFFSET eip, 0 | ||
188 | 281 | ||
189 | /* | 282 | /* |
190 | * Load the potential sixth argument from user stack. | 283 | * Load the potential sixth argument from user stack. |
@@ -199,6 +292,7 @@ sysenter_past_esp: | |||
199 | .previous | 292 | .previous |
200 | 293 | ||
201 | pushl %eax | 294 | pushl %eax |
295 | CFI_ADJUST_CFA_OFFSET 4 | ||
202 | SAVE_ALL | 296 | SAVE_ALL |
203 | GET_THREAD_INFO(%ebp) | 297 | GET_THREAD_INFO(%ebp) |
204 | 298 | ||
@@ -219,11 +313,14 @@ sysenter_past_esp: | |||
219 | xorl %ebp,%ebp | 313 | xorl %ebp,%ebp |
220 | sti | 314 | sti |
221 | sysexit | 315 | sysexit |
316 | CFI_ENDPROC | ||
222 | 317 | ||
223 | 318 | ||
224 | # system call handler stub | 319 | # system call handler stub |
225 | ENTRY(system_call) | 320 | ENTRY(system_call) |
321 | RING0_INT_FRAME # can't unwind into user space anyway | ||
226 | pushl %eax # save orig_eax | 322 | pushl %eax # save orig_eax |
323 | CFI_ADJUST_CFA_OFFSET 4 | ||
227 | SAVE_ALL | 324 | SAVE_ALL |
228 | GET_THREAD_INFO(%ebp) | 325 | GET_THREAD_INFO(%ebp) |
229 | testl $TF_MASK,EFLAGS(%esp) | 326 | testl $TF_MASK,EFLAGS(%esp) |
@@ -256,10 +353,12 @@ restore_all: | |||
256 | movb CS(%esp), %al | 353 | movb CS(%esp), %al |
257 | andl $(VM_MASK | (4 << 8) | 3), %eax | 354 | andl $(VM_MASK | (4 << 8) | 3), %eax |
258 | cmpl $((4 << 8) | 3), %eax | 355 | cmpl $((4 << 8) | 3), %eax |
356 | CFI_REMEMBER_STATE | ||
259 | je ldt_ss # returning to user-space with LDT SS | 357 | je ldt_ss # returning to user-space with LDT SS |
260 | restore_nocheck: | 358 | restore_nocheck: |
261 | RESTORE_REGS | 359 | RESTORE_REGS |
262 | addl $4, %esp | 360 | addl $4, %esp |
361 | CFI_ADJUST_CFA_OFFSET -4 | ||
263 | 1: iret | 362 | 1: iret |
264 | .section .fixup,"ax" | 363 | .section .fixup,"ax" |
265 | iret_exc: | 364 | iret_exc: |
@@ -273,6 +372,7 @@ iret_exc: | |||
273 | .long 1b,iret_exc | 372 | .long 1b,iret_exc |
274 | .previous | 373 | .previous |
275 | 374 | ||
375 | CFI_RESTORE_STATE | ||
276 | ldt_ss: | 376 | ldt_ss: |
277 | larl OLDSS(%esp), %eax | 377 | larl OLDSS(%esp), %eax |
278 | jnz restore_nocheck | 378 | jnz restore_nocheck |
@@ -285,11 +385,13 @@ ldt_ss: | |||
285 | * CPUs, which we can try to work around to make | 385 | * CPUs, which we can try to work around to make |
286 | * dosemu and wine happy. */ | 386 | * dosemu and wine happy. */ |
287 | subl $8, %esp # reserve space for switch16 pointer | 387 | subl $8, %esp # reserve space for switch16 pointer |
388 | CFI_ADJUST_CFA_OFFSET 8 | ||
288 | cli | 389 | cli |
289 | movl %esp, %eax | 390 | movl %esp, %eax |
290 | /* Set up the 16bit stack frame with switch32 pointer on top, | 391 | /* Set up the 16bit stack frame with switch32 pointer on top, |
291 | * and a switch16 pointer on top of the current frame. */ | 392 | * and a switch16 pointer on top of the current frame. */ |
292 | call setup_x86_bogus_stack | 393 | call setup_x86_bogus_stack |
394 | CFI_ADJUST_CFA_OFFSET -8 # frame has moved | ||
293 | RESTORE_REGS | 395 | RESTORE_REGS |
294 | lss 20+4(%esp), %esp # switch to 16bit stack | 396 | lss 20+4(%esp), %esp # switch to 16bit stack |
295 | 1: iret | 397 | 1: iret |
@@ -297,9 +399,11 @@ ldt_ss: | |||
297 | .align 4 | 399 | .align 4 |
298 | .long 1b,iret_exc | 400 | .long 1b,iret_exc |
299 | .previous | 401 | .previous |
402 | CFI_ENDPROC | ||
300 | 403 | ||
301 | # perform work that needs to be done immediately before resumption | 404 | # perform work that needs to be done immediately before resumption |
302 | ALIGN | 405 | ALIGN |
406 | RING0_PTREGS_FRAME # can't unwind into user space anyway | ||
303 | work_pending: | 407 | work_pending: |
304 | testb $_TIF_NEED_RESCHED, %cl | 408 | testb $_TIF_NEED_RESCHED, %cl |
305 | jz work_notifysig | 409 | jz work_notifysig |
@@ -323,18 +427,20 @@ work_notifysig: # deal with pending signals and | |||
323 | # vm86-space | 427 | # vm86-space |
324 | xorl %edx, %edx | 428 | xorl %edx, %edx |
325 | call do_notify_resume | 429 | call do_notify_resume |
326 | jmp resume_userspace | 430 | jmp resume_userspace_sig |
327 | 431 | ||
328 | ALIGN | 432 | ALIGN |
329 | work_notifysig_v86: | 433 | work_notifysig_v86: |
330 | #ifdef CONFIG_VM86 | 434 | #ifdef CONFIG_VM86 |
331 | pushl %ecx # save ti_flags for do_notify_resume | 435 | pushl %ecx # save ti_flags for do_notify_resume |
436 | CFI_ADJUST_CFA_OFFSET 4 | ||
332 | call save_v86_state # %eax contains pt_regs pointer | 437 | call save_v86_state # %eax contains pt_regs pointer |
333 | popl %ecx | 438 | popl %ecx |
439 | CFI_ADJUST_CFA_OFFSET -4 | ||
334 | movl %eax, %esp | 440 | movl %eax, %esp |
335 | xorl %edx, %edx | 441 | xorl %edx, %edx |
336 | call do_notify_resume | 442 | call do_notify_resume |
337 | jmp resume_userspace | 443 | jmp resume_userspace_sig |
338 | #endif | 444 | #endif |
339 | 445 | ||
340 | # perform syscall exit tracing | 446 | # perform syscall exit tracing |
@@ -363,19 +469,21 @@ syscall_exit_work: | |||
363 | movl $1, %edx | 469 | movl $1, %edx |
364 | call do_syscall_trace | 470 | call do_syscall_trace |
365 | jmp resume_userspace | 471 | jmp resume_userspace |
472 | CFI_ENDPROC | ||
366 | 473 | ||
367 | ALIGN | 474 | RING0_INT_FRAME # can't unwind into user space anyway |
368 | syscall_fault: | 475 | syscall_fault: |
369 | pushl %eax # save orig_eax | 476 | pushl %eax # save orig_eax |
477 | CFI_ADJUST_CFA_OFFSET 4 | ||
370 | SAVE_ALL | 478 | SAVE_ALL |
371 | GET_THREAD_INFO(%ebp) | 479 | GET_THREAD_INFO(%ebp) |
372 | movl $-EFAULT,EAX(%esp) | 480 | movl $-EFAULT,EAX(%esp) |
373 | jmp resume_userspace | 481 | jmp resume_userspace |
374 | 482 | ||
375 | ALIGN | ||
376 | syscall_badsys: | 483 | syscall_badsys: |
377 | movl $-ENOSYS,EAX(%esp) | 484 | movl $-ENOSYS,EAX(%esp) |
378 | jmp resume_userspace | 485 | jmp resume_userspace |
486 | CFI_ENDPROC | ||
379 | 487 | ||
380 | #define FIXUP_ESPFIX_STACK \ | 488 | #define FIXUP_ESPFIX_STACK \ |
381 | movl %esp, %eax; \ | 489 | movl %esp, %eax; \ |
@@ -387,16 +495,21 @@ syscall_badsys: | |||
387 | movl %eax, %esp; | 495 | movl %eax, %esp; |
388 | #define UNWIND_ESPFIX_STACK \ | 496 | #define UNWIND_ESPFIX_STACK \ |
389 | pushl %eax; \ | 497 | pushl %eax; \ |
498 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
390 | movl %ss, %eax; \ | 499 | movl %ss, %eax; \ |
391 | /* see if on 16bit stack */ \ | 500 | /* see if on 16bit stack */ \ |
392 | cmpw $__ESPFIX_SS, %ax; \ | 501 | cmpw $__ESPFIX_SS, %ax; \ |
393 | jne 28f; \ | 502 | je 28f; \ |
394 | movl $__KERNEL_DS, %edx; \ | 503 | 27: popl %eax; \ |
395 | movl %edx, %ds; \ | 504 | CFI_ADJUST_CFA_OFFSET -4; \ |
396 | movl %edx, %es; \ | 505 | .section .fixup,"ax"; \ |
506 | 28: movl $__KERNEL_DS, %eax; \ | ||
507 | movl %eax, %ds; \ | ||
508 | movl %eax, %es; \ | ||
397 | /* switch to 32bit stack */ \ | 509 | /* switch to 32bit stack */ \ |
398 | FIXUP_ESPFIX_STACK \ | 510 | FIXUP_ESPFIX_STACK; \ |
399 | 28: popl %eax; | 511 | jmp 27b; \ |
512 | .previous | ||
400 | 513 | ||
401 | /* | 514 | /* |
402 | * Build the entry stubs and pointer table with | 515 | * Build the entry stubs and pointer table with |
@@ -408,9 +521,14 @@ ENTRY(interrupt) | |||
408 | 521 | ||
409 | vector=0 | 522 | vector=0 |
410 | ENTRY(irq_entries_start) | 523 | ENTRY(irq_entries_start) |
524 | RING0_INT_FRAME | ||
411 | .rept NR_IRQS | 525 | .rept NR_IRQS |
412 | ALIGN | 526 | ALIGN |
413 | 1: pushl $vector-256 | 527 | .if vector |
528 | CFI_ADJUST_CFA_OFFSET -4 | ||
529 | .endif | ||
530 | 1: pushl $~(vector) | ||
531 | CFI_ADJUST_CFA_OFFSET 4 | ||
414 | jmp common_interrupt | 532 | jmp common_interrupt |
415 | .data | 533 | .data |
416 | .long 1b | 534 | .long 1b |
@@ -424,60 +542,99 @@ common_interrupt: | |||
424 | movl %esp,%eax | 542 | movl %esp,%eax |
425 | call do_IRQ | 543 | call do_IRQ |
426 | jmp ret_from_intr | 544 | jmp ret_from_intr |
545 | CFI_ENDPROC | ||
427 | 546 | ||
428 | #define BUILD_INTERRUPT(name, nr) \ | 547 | #define BUILD_INTERRUPT(name, nr) \ |
429 | ENTRY(name) \ | 548 | ENTRY(name) \ |
430 | pushl $nr-256; \ | 549 | RING0_INT_FRAME; \ |
431 | SAVE_ALL \ | 550 | pushl $~(nr); \ |
551 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
552 | SAVE_ALL; \ | ||
432 | movl %esp,%eax; \ | 553 | movl %esp,%eax; \ |
433 | call smp_/**/name; \ | 554 | call smp_/**/name; \ |
434 | jmp ret_from_intr; | 555 | jmp ret_from_intr; \ |
556 | CFI_ENDPROC | ||
435 | 557 | ||
436 | /* The include is where all of the SMP etc. interrupts come from */ | 558 | /* The include is where all of the SMP etc. interrupts come from */ |
437 | #include "entry_arch.h" | 559 | #include "entry_arch.h" |
438 | 560 | ||
439 | ENTRY(divide_error) | 561 | ENTRY(divide_error) |
562 | RING0_INT_FRAME | ||
440 | pushl $0 # no error code | 563 | pushl $0 # no error code |
564 | CFI_ADJUST_CFA_OFFSET 4 | ||
441 | pushl $do_divide_error | 565 | pushl $do_divide_error |
566 | CFI_ADJUST_CFA_OFFSET 4 | ||
442 | ALIGN | 567 | ALIGN |
443 | error_code: | 568 | error_code: |
444 | pushl %ds | 569 | pushl %ds |
570 | CFI_ADJUST_CFA_OFFSET 4 | ||
571 | /*CFI_REL_OFFSET ds, 0*/ | ||
445 | pushl %eax | 572 | pushl %eax |
573 | CFI_ADJUST_CFA_OFFSET 4 | ||
574 | CFI_REL_OFFSET eax, 0 | ||
446 | xorl %eax, %eax | 575 | xorl %eax, %eax |
447 | pushl %ebp | 576 | pushl %ebp |
577 | CFI_ADJUST_CFA_OFFSET 4 | ||
578 | CFI_REL_OFFSET ebp, 0 | ||
448 | pushl %edi | 579 | pushl %edi |
580 | CFI_ADJUST_CFA_OFFSET 4 | ||
581 | CFI_REL_OFFSET edi, 0 | ||
449 | pushl %esi | 582 | pushl %esi |
583 | CFI_ADJUST_CFA_OFFSET 4 | ||
584 | CFI_REL_OFFSET esi, 0 | ||
450 | pushl %edx | 585 | pushl %edx |
586 | CFI_ADJUST_CFA_OFFSET 4 | ||
587 | CFI_REL_OFFSET edx, 0 | ||
451 | decl %eax # eax = -1 | 588 | decl %eax # eax = -1 |
452 | pushl %ecx | 589 | pushl %ecx |
590 | CFI_ADJUST_CFA_OFFSET 4 | ||
591 | CFI_REL_OFFSET ecx, 0 | ||
453 | pushl %ebx | 592 | pushl %ebx |
593 | CFI_ADJUST_CFA_OFFSET 4 | ||
594 | CFI_REL_OFFSET ebx, 0 | ||
454 | cld | 595 | cld |
455 | pushl %es | 596 | pushl %es |
597 | CFI_ADJUST_CFA_OFFSET 4 | ||
598 | /*CFI_REL_OFFSET es, 0*/ | ||
456 | UNWIND_ESPFIX_STACK | 599 | UNWIND_ESPFIX_STACK |
457 | popl %ecx | 600 | popl %ecx |
601 | CFI_ADJUST_CFA_OFFSET -4 | ||
602 | /*CFI_REGISTER es, ecx*/ | ||
458 | movl ES(%esp), %edi # get the function address | 603 | movl ES(%esp), %edi # get the function address |
459 | movl ORIG_EAX(%esp), %edx # get the error code | 604 | movl ORIG_EAX(%esp), %edx # get the error code |
460 | movl %eax, ORIG_EAX(%esp) | 605 | movl %eax, ORIG_EAX(%esp) |
461 | movl %ecx, ES(%esp) | 606 | movl %ecx, ES(%esp) |
607 | /*CFI_REL_OFFSET es, ES*/ | ||
462 | movl $(__USER_DS), %ecx | 608 | movl $(__USER_DS), %ecx |
463 | movl %ecx, %ds | 609 | movl %ecx, %ds |
464 | movl %ecx, %es | 610 | movl %ecx, %es |
465 | movl %esp,%eax # pt_regs pointer | 611 | movl %esp,%eax # pt_regs pointer |
466 | call *%edi | 612 | call *%edi |
467 | jmp ret_from_exception | 613 | jmp ret_from_exception |
614 | CFI_ENDPROC | ||
468 | 615 | ||
469 | ENTRY(coprocessor_error) | 616 | ENTRY(coprocessor_error) |
617 | RING0_INT_FRAME | ||
470 | pushl $0 | 618 | pushl $0 |
619 | CFI_ADJUST_CFA_OFFSET 4 | ||
471 | pushl $do_coprocessor_error | 620 | pushl $do_coprocessor_error |
621 | CFI_ADJUST_CFA_OFFSET 4 | ||
472 | jmp error_code | 622 | jmp error_code |
623 | CFI_ENDPROC | ||
473 | 624 | ||
474 | ENTRY(simd_coprocessor_error) | 625 | ENTRY(simd_coprocessor_error) |
626 | RING0_INT_FRAME | ||
475 | pushl $0 | 627 | pushl $0 |
628 | CFI_ADJUST_CFA_OFFSET 4 | ||
476 | pushl $do_simd_coprocessor_error | 629 | pushl $do_simd_coprocessor_error |
630 | CFI_ADJUST_CFA_OFFSET 4 | ||
477 | jmp error_code | 631 | jmp error_code |
632 | CFI_ENDPROC | ||
478 | 633 | ||
479 | ENTRY(device_not_available) | 634 | ENTRY(device_not_available) |
635 | RING0_INT_FRAME | ||
480 | pushl $-1 # mark this as an int | 636 | pushl $-1 # mark this as an int |
637 | CFI_ADJUST_CFA_OFFSET 4 | ||
481 | SAVE_ALL | 638 | SAVE_ALL |
482 | movl %cr0, %eax | 639 | movl %cr0, %eax |
483 | testl $0x4, %eax # EM (math emulation bit) | 640 | testl $0x4, %eax # EM (math emulation bit) |
@@ -487,9 +644,12 @@ ENTRY(device_not_available) | |||
487 | jmp ret_from_exception | 644 | jmp ret_from_exception |
488 | device_not_available_emulate: | 645 | device_not_available_emulate: |
489 | pushl $0 # temporary storage for ORIG_EIP | 646 | pushl $0 # temporary storage for ORIG_EIP |
647 | CFI_ADJUST_CFA_OFFSET 4 | ||
490 | call math_emulate | 648 | call math_emulate |
491 | addl $4, %esp | 649 | addl $4, %esp |
650 | CFI_ADJUST_CFA_OFFSET -4 | ||
492 | jmp ret_from_exception | 651 | jmp ret_from_exception |
652 | CFI_ENDPROC | ||
493 | 653 | ||
494 | /* | 654 | /* |
495 | * Debug traps and NMI can happen at the one SYSENTER instruction | 655 | * Debug traps and NMI can happen at the one SYSENTER instruction |
@@ -514,16 +674,19 @@ label: \ | |||
514 | pushl $sysenter_past_esp | 674 | pushl $sysenter_past_esp |
515 | 675 | ||
516 | KPROBE_ENTRY(debug) | 676 | KPROBE_ENTRY(debug) |
677 | RING0_INT_FRAME | ||
517 | cmpl $sysenter_entry,(%esp) | 678 | cmpl $sysenter_entry,(%esp) |
518 | jne debug_stack_correct | 679 | jne debug_stack_correct |
519 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | 680 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) |
520 | debug_stack_correct: | 681 | debug_stack_correct: |
521 | pushl $-1 # mark this as an int | 682 | pushl $-1 # mark this as an int |
683 | CFI_ADJUST_CFA_OFFSET 4 | ||
522 | SAVE_ALL | 684 | SAVE_ALL |
523 | xorl %edx,%edx # error code 0 | 685 | xorl %edx,%edx # error code 0 |
524 | movl %esp,%eax # pt_regs pointer | 686 | movl %esp,%eax # pt_regs pointer |
525 | call do_debug | 687 | call do_debug |
526 | jmp ret_from_exception | 688 | jmp ret_from_exception |
689 | CFI_ENDPROC | ||
527 | .previous .text | 690 | .previous .text |
528 | /* | 691 | /* |
529 | * NMI is doubly nasty. It can happen _while_ we're handling | 692 | * NMI is doubly nasty. It can happen _while_ we're handling |
@@ -534,14 +697,18 @@ debug_stack_correct: | |||
534 | * fault happened on the sysenter path. | 697 | * fault happened on the sysenter path. |
535 | */ | 698 | */ |
536 | ENTRY(nmi) | 699 | ENTRY(nmi) |
700 | RING0_INT_FRAME | ||
537 | pushl %eax | 701 | pushl %eax |
702 | CFI_ADJUST_CFA_OFFSET 4 | ||
538 | movl %ss, %eax | 703 | movl %ss, %eax |
539 | cmpw $__ESPFIX_SS, %ax | 704 | cmpw $__ESPFIX_SS, %ax |
540 | popl %eax | 705 | popl %eax |
706 | CFI_ADJUST_CFA_OFFSET -4 | ||
541 | je nmi_16bit_stack | 707 | je nmi_16bit_stack |
542 | cmpl $sysenter_entry,(%esp) | 708 | cmpl $sysenter_entry,(%esp) |
543 | je nmi_stack_fixup | 709 | je nmi_stack_fixup |
544 | pushl %eax | 710 | pushl %eax |
711 | CFI_ADJUST_CFA_OFFSET 4 | ||
545 | movl %esp,%eax | 712 | movl %esp,%eax |
546 | /* Do not access memory above the end of our stack page, | 713 | /* Do not access memory above the end of our stack page, |
547 | * it might not exist. | 714 | * it might not exist. |
@@ -549,16 +716,19 @@ ENTRY(nmi) | |||
549 | andl $(THREAD_SIZE-1),%eax | 716 | andl $(THREAD_SIZE-1),%eax |
550 | cmpl $(THREAD_SIZE-20),%eax | 717 | cmpl $(THREAD_SIZE-20),%eax |
551 | popl %eax | 718 | popl %eax |
719 | CFI_ADJUST_CFA_OFFSET -4 | ||
552 | jae nmi_stack_correct | 720 | jae nmi_stack_correct |
553 | cmpl $sysenter_entry,12(%esp) | 721 | cmpl $sysenter_entry,12(%esp) |
554 | je nmi_debug_stack_check | 722 | je nmi_debug_stack_check |
555 | nmi_stack_correct: | 723 | nmi_stack_correct: |
556 | pushl %eax | 724 | pushl %eax |
725 | CFI_ADJUST_CFA_OFFSET 4 | ||
557 | SAVE_ALL | 726 | SAVE_ALL |
558 | xorl %edx,%edx # zero error code | 727 | xorl %edx,%edx # zero error code |
559 | movl %esp,%eax # pt_regs pointer | 728 | movl %esp,%eax # pt_regs pointer |
560 | call do_nmi | 729 | call do_nmi |
561 | jmp restore_all | 730 | jmp restore_all |
731 | CFI_ENDPROC | ||
562 | 732 | ||
563 | nmi_stack_fixup: | 733 | nmi_stack_fixup: |
564 | FIX_STACK(12,nmi_stack_correct, 1) | 734 | FIX_STACK(12,nmi_stack_correct, 1) |
@@ -574,94 +744,177 @@ nmi_debug_stack_check: | |||
574 | jmp nmi_stack_correct | 744 | jmp nmi_stack_correct |
575 | 745 | ||
576 | nmi_16bit_stack: | 746 | nmi_16bit_stack: |
747 | RING0_INT_FRAME | ||
577 | /* create the pointer to lss back */ | 748 | /* create the pointer to lss back */ |
578 | pushl %ss | 749 | pushl %ss |
750 | CFI_ADJUST_CFA_OFFSET 4 | ||
579 | pushl %esp | 751 | pushl %esp |
752 | CFI_ADJUST_CFA_OFFSET 4 | ||
580 | movzwl %sp, %esp | 753 | movzwl %sp, %esp |
581 | addw $4, (%esp) | 754 | addw $4, (%esp) |
582 | /* copy the iret frame of 12 bytes */ | 755 | /* copy the iret frame of 12 bytes */ |
583 | .rept 3 | 756 | .rept 3 |
584 | pushl 16(%esp) | 757 | pushl 16(%esp) |
758 | CFI_ADJUST_CFA_OFFSET 4 | ||
585 | .endr | 759 | .endr |
586 | pushl %eax | 760 | pushl %eax |
761 | CFI_ADJUST_CFA_OFFSET 4 | ||
587 | SAVE_ALL | 762 | SAVE_ALL |
588 | FIXUP_ESPFIX_STACK # %eax == %esp | 763 | FIXUP_ESPFIX_STACK # %eax == %esp |
764 | CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved | ||
589 | xorl %edx,%edx # zero error code | 765 | xorl %edx,%edx # zero error code |
590 | call do_nmi | 766 | call do_nmi |
591 | RESTORE_REGS | 767 | RESTORE_REGS |
592 | lss 12+4(%esp), %esp # back to 16bit stack | 768 | lss 12+4(%esp), %esp # back to 16bit stack |
593 | 1: iret | 769 | 1: iret |
770 | CFI_ENDPROC | ||
594 | .section __ex_table,"a" | 771 | .section __ex_table,"a" |
595 | .align 4 | 772 | .align 4 |
596 | .long 1b,iret_exc | 773 | .long 1b,iret_exc |
597 | .previous | 774 | .previous |
598 | 775 | ||
599 | KPROBE_ENTRY(int3) | 776 | KPROBE_ENTRY(int3) |
777 | RING0_INT_FRAME | ||
600 | pushl $-1 # mark this as an int | 778 | pushl $-1 # mark this as an int |
779 | CFI_ADJUST_CFA_OFFSET 4 | ||
601 | SAVE_ALL | 780 | SAVE_ALL |
602 | xorl %edx,%edx # zero error code | 781 | xorl %edx,%edx # zero error code |
603 | movl %esp,%eax # pt_regs pointer | 782 | movl %esp,%eax # pt_regs pointer |
604 | call do_int3 | 783 | call do_int3 |
605 | jmp ret_from_exception | 784 | jmp ret_from_exception |
785 | CFI_ENDPROC | ||
606 | .previous .text | 786 | .previous .text |
607 | 787 | ||
608 | ENTRY(overflow) | 788 | ENTRY(overflow) |
789 | RING0_INT_FRAME | ||
609 | pushl $0 | 790 | pushl $0 |
791 | CFI_ADJUST_CFA_OFFSET 4 | ||
610 | pushl $do_overflow | 792 | pushl $do_overflow |
793 | CFI_ADJUST_CFA_OFFSET 4 | ||
611 | jmp error_code | 794 | jmp error_code |
795 | CFI_ENDPROC | ||
612 | 796 | ||
613 | ENTRY(bounds) | 797 | ENTRY(bounds) |
798 | RING0_INT_FRAME | ||
614 | pushl $0 | 799 | pushl $0 |
800 | CFI_ADJUST_CFA_OFFSET 4 | ||
615 | pushl $do_bounds | 801 | pushl $do_bounds |
802 | CFI_ADJUST_CFA_OFFSET 4 | ||
616 | jmp error_code | 803 | jmp error_code |
804 | CFI_ENDPROC | ||
617 | 805 | ||
618 | ENTRY(invalid_op) | 806 | ENTRY(invalid_op) |
807 | RING0_INT_FRAME | ||
619 | pushl $0 | 808 | pushl $0 |
809 | CFI_ADJUST_CFA_OFFSET 4 | ||
620 | pushl $do_invalid_op | 810 | pushl $do_invalid_op |
811 | CFI_ADJUST_CFA_OFFSET 4 | ||
621 | jmp error_code | 812 | jmp error_code |
813 | CFI_ENDPROC | ||
622 | 814 | ||
623 | ENTRY(coprocessor_segment_overrun) | 815 | ENTRY(coprocessor_segment_overrun) |
816 | RING0_INT_FRAME | ||
624 | pushl $0 | 817 | pushl $0 |
818 | CFI_ADJUST_CFA_OFFSET 4 | ||
625 | pushl $do_coprocessor_segment_overrun | 819 | pushl $do_coprocessor_segment_overrun |
820 | CFI_ADJUST_CFA_OFFSET 4 | ||
626 | jmp error_code | 821 | jmp error_code |
822 | CFI_ENDPROC | ||
627 | 823 | ||
628 | ENTRY(invalid_TSS) | 824 | ENTRY(invalid_TSS) |
825 | RING0_EC_FRAME | ||
629 | pushl $do_invalid_TSS | 826 | pushl $do_invalid_TSS |
827 | CFI_ADJUST_CFA_OFFSET 4 | ||
630 | jmp error_code | 828 | jmp error_code |
829 | CFI_ENDPROC | ||
631 | 830 | ||
632 | ENTRY(segment_not_present) | 831 | ENTRY(segment_not_present) |
832 | RING0_EC_FRAME | ||
633 | pushl $do_segment_not_present | 833 | pushl $do_segment_not_present |
834 | CFI_ADJUST_CFA_OFFSET 4 | ||
634 | jmp error_code | 835 | jmp error_code |
836 | CFI_ENDPROC | ||
635 | 837 | ||
636 | ENTRY(stack_segment) | 838 | ENTRY(stack_segment) |
839 | RING0_EC_FRAME | ||
637 | pushl $do_stack_segment | 840 | pushl $do_stack_segment |
841 | CFI_ADJUST_CFA_OFFSET 4 | ||
638 | jmp error_code | 842 | jmp error_code |
843 | CFI_ENDPROC | ||
639 | 844 | ||
640 | KPROBE_ENTRY(general_protection) | 845 | KPROBE_ENTRY(general_protection) |
846 | RING0_EC_FRAME | ||
641 | pushl $do_general_protection | 847 | pushl $do_general_protection |
848 | CFI_ADJUST_CFA_OFFSET 4 | ||
642 | jmp error_code | 849 | jmp error_code |
850 | CFI_ENDPROC | ||
643 | .previous .text | 851 | .previous .text |
644 | 852 | ||
645 | ENTRY(alignment_check) | 853 | ENTRY(alignment_check) |
854 | RING0_EC_FRAME | ||
646 | pushl $do_alignment_check | 855 | pushl $do_alignment_check |
856 | CFI_ADJUST_CFA_OFFSET 4 | ||
647 | jmp error_code | 857 | jmp error_code |
858 | CFI_ENDPROC | ||
648 | 859 | ||
649 | KPROBE_ENTRY(page_fault) | 860 | KPROBE_ENTRY(page_fault) |
861 | RING0_EC_FRAME | ||
650 | pushl $do_page_fault | 862 | pushl $do_page_fault |
863 | CFI_ADJUST_CFA_OFFSET 4 | ||
651 | jmp error_code | 864 | jmp error_code |
865 | CFI_ENDPROC | ||
652 | .previous .text | 866 | .previous .text |
653 | 867 | ||
654 | #ifdef CONFIG_X86_MCE | 868 | #ifdef CONFIG_X86_MCE |
655 | ENTRY(machine_check) | 869 | ENTRY(machine_check) |
870 | RING0_INT_FRAME | ||
656 | pushl $0 | 871 | pushl $0 |
872 | CFI_ADJUST_CFA_OFFSET 4 | ||
657 | pushl machine_check_vector | 873 | pushl machine_check_vector |
874 | CFI_ADJUST_CFA_OFFSET 4 | ||
658 | jmp error_code | 875 | jmp error_code |
876 | CFI_ENDPROC | ||
659 | #endif | 877 | #endif |
660 | 878 | ||
661 | ENTRY(spurious_interrupt_bug) | 879 | ENTRY(spurious_interrupt_bug) |
880 | RING0_INT_FRAME | ||
662 | pushl $0 | 881 | pushl $0 |
882 | CFI_ADJUST_CFA_OFFSET 4 | ||
663 | pushl $do_spurious_interrupt_bug | 883 | pushl $do_spurious_interrupt_bug |
884 | CFI_ADJUST_CFA_OFFSET 4 | ||
664 | jmp error_code | 885 | jmp error_code |
886 | CFI_ENDPROC | ||
887 | |||
888 | #ifdef CONFIG_STACK_UNWIND | ||
889 | ENTRY(arch_unwind_init_running) | ||
890 | CFI_STARTPROC | ||
891 | movl 4(%esp), %edx | ||
892 | movl (%esp), %ecx | ||
893 | leal 4(%esp), %eax | ||
894 | movl %ebx, EBX(%edx) | ||
895 | xorl %ebx, %ebx | ||
896 | movl %ebx, ECX(%edx) | ||
897 | movl %ebx, EDX(%edx) | ||
898 | movl %esi, ESI(%edx) | ||
899 | movl %edi, EDI(%edx) | ||
900 | movl %ebp, EBP(%edx) | ||
901 | movl %ebx, EAX(%edx) | ||
902 | movl $__USER_DS, DS(%edx) | ||
903 | movl $__USER_DS, ES(%edx) | ||
904 | movl %ebx, ORIG_EAX(%edx) | ||
905 | movl %ecx, EIP(%edx) | ||
906 | movl 12(%esp), %ecx | ||
907 | movl $__KERNEL_CS, CS(%edx) | ||
908 | movl %ebx, EFLAGS(%edx) | ||
909 | movl %eax, OLDESP(%edx) | ||
910 | movl 8(%esp), %eax | ||
911 | movl %ecx, 8(%esp) | ||
912 | movl EBX(%edx), %ebx | ||
913 | movl $__KERNEL_DS, OLDSS(%edx) | ||
914 | jmpl *%eax | ||
915 | CFI_ENDPROC | ||
916 | ENDPROC(arch_unwind_init_running) | ||
917 | #endif | ||
665 | 918 | ||
666 | .section .rodata,"a" | 919 | .section .rodata,"a" |
667 | #include "syscall_table.S" | 920 | #include "syscall_table.S" |
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c new file mode 100644 index 000000000000..c6737c35815d --- /dev/null +++ b/arch/i386/kernel/hpet.c | |||
@@ -0,0 +1,67 @@ | |||
1 | #include <linux/clocksource.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/hpet.h> | ||
4 | #include <linux/init.h> | ||
5 | |||
6 | #include <asm/hpet.h> | ||
7 | #include <asm/io.h> | ||
8 | |||
9 | #define HPET_MASK CLOCKSOURCE_MASK(32) | ||
10 | #define HPET_SHIFT 22 | ||
11 | |||
12 | /* FSEC = 10^-15 NSEC = 10^-9 */ | ||
13 | #define FSEC_PER_NSEC 1000000 | ||
14 | |||
15 | static void *hpet_ptr; | ||
16 | |||
17 | static cycle_t read_hpet(void) | ||
18 | { | ||
19 | return (cycle_t)readl(hpet_ptr); | ||
20 | } | ||
21 | |||
22 | static struct clocksource clocksource_hpet = { | ||
23 | .name = "hpet", | ||
24 | .rating = 250, | ||
25 | .read = read_hpet, | ||
26 | .mask = HPET_MASK, | ||
27 | .mult = 0, /* set below */ | ||
28 | .shift = HPET_SHIFT, | ||
29 | .is_continuous = 1, | ||
30 | }; | ||
31 | |||
32 | static int __init init_hpet_clocksource(void) | ||
33 | { | ||
34 | unsigned long hpet_period; | ||
35 | void __iomem* hpet_base; | ||
36 | u64 tmp; | ||
37 | |||
38 | if (!hpet_address) | ||
39 | return -ENODEV; | ||
40 | |||
41 | /* calculate the hpet address: */ | ||
42 | hpet_base = | ||
43 | (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | ||
44 | hpet_ptr = hpet_base + HPET_COUNTER; | ||
45 | |||
46 | /* calculate the frequency: */ | ||
47 | hpet_period = readl(hpet_base + HPET_PERIOD); | ||
48 | |||
49 | /* | ||
50 | * hpet period is in femto seconds per cycle | ||
51 | * so we need to convert this to ns/cyc units | ||
52 | * aproximated by mult/2^shift | ||
53 | * | ||
54 | * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift | ||
55 | * fsec/cyc * 1ns/1000000fsec * 2^shift = mult | ||
56 | * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult | ||
57 | * (fsec/cyc << shift)/1000000 = mult | ||
58 | * (hpet_period << shift)/FSEC_PER_NSEC = mult | ||
59 | */ | ||
60 | tmp = (u64)hpet_period << HPET_SHIFT; | ||
61 | do_div(tmp, FSEC_PER_NSEC); | ||
62 | clocksource_hpet.mult = (u32)tmp; | ||
63 | |||
64 | return clocksource_register(&clocksource_hpet); | ||
65 | } | ||
66 | |||
67 | module_init(init_hpet_clocksource); | ||
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c new file mode 100644 index 000000000000..477b24daff53 --- /dev/null +++ b/arch/i386/kernel/i8253.c | |||
@@ -0,0 +1,118 @@ | |||
1 | /* | ||
2 | * i8253.c 8253/PIT functions | ||
3 | * | ||
4 | */ | ||
5 | #include <linux/clocksource.h> | ||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/jiffies.h> | ||
8 | #include <linux/sysdev.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/init.h> | ||
11 | |||
12 | #include <asm/smp.h> | ||
13 | #include <asm/delay.h> | ||
14 | #include <asm/i8253.h> | ||
15 | #include <asm/io.h> | ||
16 | |||
17 | #include "io_ports.h" | ||
18 | |||
19 | DEFINE_SPINLOCK(i8253_lock); | ||
20 | EXPORT_SYMBOL(i8253_lock); | ||
21 | |||
22 | void setup_pit_timer(void) | ||
23 | { | ||
24 | unsigned long flags; | ||
25 | |||
26 | spin_lock_irqsave(&i8253_lock, flags); | ||
27 | outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | ||
28 | udelay(10); | ||
29 | outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ | ||
30 | udelay(10); | ||
31 | outb(LATCH >> 8 , PIT_CH0); /* MSB */ | ||
32 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
33 | } | ||
34 | |||
35 | /* | ||
36 | * Since the PIT overflows every tick, its not very useful | ||
37 | * to just read by itself. So use jiffies to emulate a free | ||
38 | * running counter: | ||
39 | */ | ||
40 | static cycle_t pit_read(void) | ||
41 | { | ||
42 | unsigned long flags; | ||
43 | int count; | ||
44 | u32 jifs; | ||
45 | static int old_count; | ||
46 | static u32 old_jifs; | ||
47 | |||
48 | spin_lock_irqsave(&i8253_lock, flags); | ||
49 | /* | ||
50 | * Although our caller may have the read side of xtime_lock, | ||
51 | * this is now a seqlock, and we are cheating in this routine | ||
52 | * by having side effects on state that we cannot undo if | ||
53 | * there is a collision on the seqlock and our caller has to | ||
54 | * retry. (Namely, old_jifs and old_count.) So we must treat | ||
55 | * jiffies as volatile despite the lock. We read jiffies | ||
56 | * before latching the timer count to guarantee that although | ||
57 | * the jiffies value might be older than the count (that is, | ||
58 | * the counter may underflow between the last point where | ||
59 | * jiffies was incremented and the point where we latch the | ||
60 | * count), it cannot be newer. | ||
61 | */ | ||
62 | jifs = jiffies; | ||
63 | outb_p(0x00, PIT_MODE); /* latch the count ASAP */ | ||
64 | count = inb_p(PIT_CH0); /* read the latched count */ | ||
65 | count |= inb_p(PIT_CH0) << 8; | ||
66 | |||
67 | /* VIA686a test code... reset the latch if count > max + 1 */ | ||
68 | if (count > LATCH) { | ||
69 | outb_p(0x34, PIT_MODE); | ||
70 | outb_p(LATCH & 0xff, PIT_CH0); | ||
71 | outb(LATCH >> 8, PIT_CH0); | ||
72 | count = LATCH - 1; | ||
73 | } | ||
74 | |||
75 | /* | ||
76 | * It's possible for count to appear to go the wrong way for a | ||
77 | * couple of reasons: | ||
78 | * | ||
79 | * 1. The timer counter underflows, but we haven't handled the | ||
80 | * resulting interrupt and incremented jiffies yet. | ||
81 | * 2. Hardware problem with the timer, not giving us continuous time, | ||
82 | * the counter does small "jumps" upwards on some Pentium systems, | ||
83 | * (see c't 95/10 page 335 for Neptun bug.) | ||
84 | * | ||
85 | * Previous attempts to handle these cases intelligently were | ||
86 | * buggy, so we just do the simple thing now. | ||
87 | */ | ||
88 | if (count > old_count && jifs == old_jifs) { | ||
89 | count = old_count; | ||
90 | } | ||
91 | old_count = count; | ||
92 | old_jifs = jifs; | ||
93 | |||
94 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
95 | |||
96 | count = (LATCH - 1) - count; | ||
97 | |||
98 | return (cycle_t)(jifs * LATCH) + count; | ||
99 | } | ||
100 | |||
101 | static struct clocksource clocksource_pit = { | ||
102 | .name = "pit", | ||
103 | .rating = 110, | ||
104 | .read = pit_read, | ||
105 | .mask = CLOCKSOURCE_MASK(32), | ||
106 | .mult = 0, | ||
107 | .shift = 20, | ||
108 | }; | ||
109 | |||
110 | static int __init init_pit_clocksource(void) | ||
111 | { | ||
112 | if (num_possible_cpus() > 4) /* PIT does not scale! */ | ||
113 | return 0; | ||
114 | |||
115 | clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); | ||
116 | return clocksource_register(&clocksource_pit); | ||
117 | } | ||
118 | module_init(init_pit_clocksource); | ||
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index b7636b96e104..c1a42feba286 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c | |||
@@ -175,7 +175,7 @@ static void mask_and_ack_8259A(unsigned int irq) | |||
175 | * Lightweight spurious IRQ detection. We do not want | 175 | * Lightweight spurious IRQ detection. We do not want |
176 | * to overdo spurious IRQ handling - it's usually a sign | 176 | * to overdo spurious IRQ handling - it's usually a sign |
177 | * of hardware problems, so we only do the checks we can | 177 | * of hardware problems, so we only do the checks we can |
178 | * do without slowing down good hardware unnecesserily. | 178 | * do without slowing down good hardware unnecessarily. |
179 | * | 179 | * |
180 | * Note that IRQ7 and IRQ15 (the two spurious IRQs | 180 | * Note that IRQ7 and IRQ15 (the two spurious IRQs |
181 | * usually resulting from the 8259A-1|2 PICs) occur | 181 | * usually resulting from the 8259A-1|2 PICs) occur |
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index a62df3e764c5..72ae414e4d49 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <asm/desc.h> | 38 | #include <asm/desc.h> |
39 | #include <asm/timer.h> | 39 | #include <asm/timer.h> |
40 | #include <asm/i8259.h> | 40 | #include <asm/i8259.h> |
41 | #include <asm/nmi.h> | ||
41 | 42 | ||
42 | #include <mach_apic.h> | 43 | #include <mach_apic.h> |
43 | 44 | ||
@@ -50,6 +51,7 @@ atomic_t irq_mis_count; | |||
50 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | 51 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; |
51 | 52 | ||
52 | static DEFINE_SPINLOCK(ioapic_lock); | 53 | static DEFINE_SPINLOCK(ioapic_lock); |
54 | static DEFINE_SPINLOCK(vector_lock); | ||
53 | 55 | ||
54 | int timer_over_8254 __initdata = 1; | 56 | int timer_over_8254 __initdata = 1; |
55 | 57 | ||
@@ -1161,10 +1163,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; | |||
1161 | int assign_irq_vector(int irq) | 1163 | int assign_irq_vector(int irq) |
1162 | { | 1164 | { |
1163 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; | 1165 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; |
1166 | unsigned long flags; | ||
1167 | int vector; | ||
1168 | |||
1169 | BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); | ||
1164 | 1170 | ||
1165 | BUG_ON(irq >= NR_IRQ_VECTORS); | 1171 | spin_lock_irqsave(&vector_lock, flags); |
1166 | if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) | 1172 | |
1173 | if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { | ||
1174 | spin_unlock_irqrestore(&vector_lock, flags); | ||
1167 | return IO_APIC_VECTOR(irq); | 1175 | return IO_APIC_VECTOR(irq); |
1176 | } | ||
1168 | next: | 1177 | next: |
1169 | current_vector += 8; | 1178 | current_vector += 8; |
1170 | if (current_vector == SYSCALL_VECTOR) | 1179 | if (current_vector == SYSCALL_VECTOR) |
@@ -1172,16 +1181,21 @@ next: | |||
1172 | 1181 | ||
1173 | if (current_vector >= FIRST_SYSTEM_VECTOR) { | 1182 | if (current_vector >= FIRST_SYSTEM_VECTOR) { |
1174 | offset++; | 1183 | offset++; |
1175 | if (!(offset%8)) | 1184 | if (!(offset%8)) { |
1185 | spin_unlock_irqrestore(&vector_lock, flags); | ||
1176 | return -ENOSPC; | 1186 | return -ENOSPC; |
1187 | } | ||
1177 | current_vector = FIRST_DEVICE_VECTOR + offset; | 1188 | current_vector = FIRST_DEVICE_VECTOR + offset; |
1178 | } | 1189 | } |
1179 | 1190 | ||
1180 | vector_irq[current_vector] = irq; | 1191 | vector = current_vector; |
1192 | vector_irq[vector] = irq; | ||
1181 | if (irq != AUTO_ASSIGN) | 1193 | if (irq != AUTO_ASSIGN) |
1182 | IO_APIC_VECTOR(irq) = current_vector; | 1194 | IO_APIC_VECTOR(irq) = vector; |
1183 | 1195 | ||
1184 | return current_vector; | 1196 | spin_unlock_irqrestore(&vector_lock, flags); |
1197 | |||
1198 | return vector; | ||
1185 | } | 1199 | } |
1186 | 1200 | ||
1187 | static struct hw_interrupt_type ioapic_level_type; | 1201 | static struct hw_interrupt_type ioapic_level_type; |
@@ -1193,21 +1207,14 @@ static struct hw_interrupt_type ioapic_edge_type; | |||
1193 | 1207 | ||
1194 | static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) | 1208 | static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) |
1195 | { | 1209 | { |
1196 | if (use_pci_vector() && !platform_legacy_irq(irq)) { | 1210 | unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; |
1197 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | 1211 | |
1198 | trigger == IOAPIC_LEVEL) | 1212 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
1199 | irq_desc[vector].handler = &ioapic_level_type; | 1213 | trigger == IOAPIC_LEVEL) |
1200 | else | 1214 | irq_desc[idx].handler = &ioapic_level_type; |
1201 | irq_desc[vector].handler = &ioapic_edge_type; | 1215 | else |
1202 | set_intr_gate(vector, interrupt[vector]); | 1216 | irq_desc[idx].handler = &ioapic_edge_type; |
1203 | } else { | 1217 | set_intr_gate(vector, interrupt[idx]); |
1204 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | ||
1205 | trigger == IOAPIC_LEVEL) | ||
1206 | irq_desc[irq].handler = &ioapic_level_type; | ||
1207 | else | ||
1208 | irq_desc[irq].handler = &ioapic_edge_type; | ||
1209 | set_intr_gate(vector, interrupt[irq]); | ||
1210 | } | ||
1211 | } | 1218 | } |
1212 | 1219 | ||
1213 | static void __init setup_IO_APIC_irqs(void) | 1220 | static void __init setup_IO_APIC_irqs(void) |
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 49ce4c31b713..c703bc7b0880 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c | |||
@@ -53,8 +53,8 @@ static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; | |||
53 | */ | 53 | */ |
54 | fastcall unsigned int do_IRQ(struct pt_regs *regs) | 54 | fastcall unsigned int do_IRQ(struct pt_regs *regs) |
55 | { | 55 | { |
56 | /* high bits used in ret_from_ code */ | 56 | /* high bit used in ret_from_ code */ |
57 | int irq = regs->orig_eax & 0xff; | 57 | int irq = ~regs->orig_eax; |
58 | #ifdef CONFIG_4KSTACKS | 58 | #ifdef CONFIG_4KSTACKS |
59 | union irq_ctx *curctx, *irqctx; | 59 | union irq_ctx *curctx, *irqctx; |
60 | u32 *isp; | 60 | u32 *isp; |
@@ -100,8 +100,8 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) | |||
100 | * softirq checks work in the hardirq context. | 100 | * softirq checks work in the hardirq context. |
101 | */ | 101 | */ |
102 | irqctx->tinfo.preempt_count = | 102 | irqctx->tinfo.preempt_count = |
103 | irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK | | 103 | (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | |
104 | curctx->tinfo.preempt_count & SOFTIRQ_MASK; | 104 | (curctx->tinfo.preempt_count & SOFTIRQ_MASK); |
105 | 105 | ||
106 | asm volatile( | 106 | asm volatile( |
107 | " xchgl %%ebx,%%esp \n" | 107 | " xchgl %%ebx,%%esp \n" |
@@ -227,7 +227,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
227 | if (i == 0) { | 227 | if (i == 0) { |
228 | seq_printf(p, " "); | 228 | seq_printf(p, " "); |
229 | for_each_online_cpu(j) | 229 | for_each_online_cpu(j) |
230 | seq_printf(p, "CPU%d ",j); | 230 | seq_printf(p, "CPU%-8d",j); |
231 | seq_putc(p, '\n'); | 231 | seq_putc(p, '\n'); |
232 | } | 232 | } |
233 | 233 | ||
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 395a9a6dff88..727e419ad78a 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c | |||
@@ -57,34 +57,85 @@ static __always_inline void set_jmp_op(void *from, void *to) | |||
57 | /* | 57 | /* |
58 | * returns non-zero if opcodes can be boosted. | 58 | * returns non-zero if opcodes can be boosted. |
59 | */ | 59 | */ |
60 | static __always_inline int can_boost(kprobe_opcode_t opcode) | 60 | static __always_inline int can_boost(kprobe_opcode_t *opcodes) |
61 | { | 61 | { |
62 | switch (opcode & 0xf0 ) { | 62 | #define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ |
63 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | ||
64 | (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ | ||
65 | (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ | ||
66 | (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ | ||
67 | << (row % 32)) | ||
68 | /* | ||
69 | * Undefined/reserved opcodes, conditional jump, Opcode Extension | ||
70 | * Groups, and some special opcodes can not be boost. | ||
71 | */ | ||
72 | static const unsigned long twobyte_is_boostable[256 / 32] = { | ||
73 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
74 | /* ------------------------------- */ | ||
75 | W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */ | ||
76 | W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */ | ||
77 | W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */ | ||
78 | W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ | ||
79 | W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ | ||
80 | W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */ | ||
81 | W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */ | ||
82 | W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */ | ||
83 | W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */ | ||
84 | W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ | ||
85 | W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */ | ||
86 | W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */ | ||
87 | W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */ | ||
88 | W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */ | ||
89 | W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */ | ||
90 | W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */ | ||
91 | /* ------------------------------- */ | ||
92 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
93 | }; | ||
94 | #undef W | ||
95 | kprobe_opcode_t opcode; | ||
96 | kprobe_opcode_t *orig_opcodes = opcodes; | ||
97 | retry: | ||
98 | if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) | ||
99 | return 0; | ||
100 | opcode = *(opcodes++); | ||
101 | |||
102 | /* 2nd-byte opcode */ | ||
103 | if (opcode == 0x0f) { | ||
104 | if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) | ||
105 | return 0; | ||
106 | return test_bit(*opcodes, twobyte_is_boostable); | ||
107 | } | ||
108 | |||
109 | switch (opcode & 0xf0) { | ||
110 | case 0x60: | ||
111 | if (0x63 < opcode && opcode < 0x67) | ||
112 | goto retry; /* prefixes */ | ||
113 | /* can't boost Address-size override and bound */ | ||
114 | return (opcode != 0x62 && opcode != 0x67); | ||
63 | case 0x70: | 115 | case 0x70: |
64 | return 0; /* can't boost conditional jump */ | 116 | return 0; /* can't boost conditional jump */ |
65 | case 0x90: | ||
66 | /* can't boost call and pushf */ | ||
67 | return opcode != 0x9a && opcode != 0x9c; | ||
68 | case 0xc0: | 117 | case 0xc0: |
69 | /* can't boost undefined opcodes and soft-interruptions */ | 118 | /* can't boost software-interruptions */ |
70 | return (0xc1 < opcode && opcode < 0xc6) || | 119 | return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; |
71 | (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf; | ||
72 | case 0xd0: | 120 | case 0xd0: |
73 | /* can boost AA* and XLAT */ | 121 | /* can boost AA* and XLAT */ |
74 | return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); | 122 | return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); |
75 | case 0xe0: | 123 | case 0xe0: |
76 | /* can boost in/out and (may be) jmps */ | 124 | /* can boost in/out and absolute jmps */ |
77 | return (0xe3 < opcode && opcode != 0xe8); | 125 | return ((opcode & 0x04) || opcode == 0xea); |
78 | case 0xf0: | 126 | case 0xf0: |
127 | if ((opcode & 0x0c) == 0 && opcode != 0xf1) | ||
128 | goto retry; /* lock/rep(ne) prefix */ | ||
79 | /* clear and set flags can be boost */ | 129 | /* clear and set flags can be boost */ |
80 | return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); | 130 | return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); |
81 | default: | 131 | default: |
82 | /* currently, can't boost 2 bytes opcodes */ | 132 | if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e) |
83 | return opcode != 0x0f; | 133 | goto retry; /* prefixes */ |
134 | /* can't boost CS override and call */ | ||
135 | return (opcode != 0x2e && opcode != 0x9a); | ||
84 | } | 136 | } |
85 | } | 137 | } |
86 | 138 | ||
87 | |||
88 | /* | 139 | /* |
89 | * returns non-zero if opcode modifies the interrupt flag. | 140 | * returns non-zero if opcode modifies the interrupt flag. |
90 | */ | 141 | */ |
@@ -109,7 +160,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) | |||
109 | 160 | ||
110 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 161 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); |
111 | p->opcode = *p->addr; | 162 | p->opcode = *p->addr; |
112 | if (can_boost(p->opcode)) { | 163 | if (can_boost(p->addr)) { |
113 | p->ainsn.boostable = 0; | 164 | p->ainsn.boostable = 0; |
114 | } else { | 165 | } else { |
115 | p->ainsn.boostable = -1; | 166 | p->ainsn.boostable = -1; |
@@ -208,7 +259,9 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
208 | struct kprobe_ctlblk *kcb; | 259 | struct kprobe_ctlblk *kcb; |
209 | #ifdef CONFIG_PREEMPT | 260 | #ifdef CONFIG_PREEMPT |
210 | unsigned pre_preempt_count = preempt_count(); | 261 | unsigned pre_preempt_count = preempt_count(); |
211 | #endif /* CONFIG_PREEMPT */ | 262 | #else |
263 | unsigned pre_preempt_count = 1; | ||
264 | #endif | ||
212 | 265 | ||
213 | addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); | 266 | addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); |
214 | 267 | ||
@@ -285,22 +338,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
285 | /* handler has already set things up, so skip ss setup */ | 338 | /* handler has already set things up, so skip ss setup */ |
286 | return 1; | 339 | return 1; |
287 | 340 | ||
288 | if (p->ainsn.boostable == 1 && | 341 | ss_probe: |
289 | #ifdef CONFIG_PREEMPT | 342 | if (pre_preempt_count && p->ainsn.boostable == 1 && !p->post_handler){ |
290 | !(pre_preempt_count) && /* | ||
291 | * This enables booster when the direct | ||
292 | * execution path aren't preempted. | ||
293 | */ | ||
294 | #endif /* CONFIG_PREEMPT */ | ||
295 | !p->post_handler && !p->break_handler ) { | ||
296 | /* Boost up -- we can execute copied instructions directly */ | 343 | /* Boost up -- we can execute copied instructions directly */ |
297 | reset_current_kprobe(); | 344 | reset_current_kprobe(); |
298 | regs->eip = (unsigned long)p->ainsn.insn; | 345 | regs->eip = (unsigned long)p->ainsn.insn; |
299 | preempt_enable_no_resched(); | 346 | preempt_enable_no_resched(); |
300 | return 1; | 347 | return 1; |
301 | } | 348 | } |
302 | |||
303 | ss_probe: | ||
304 | prepare_singlestep(p, regs); | 349 | prepare_singlestep(p, regs); |
305 | kcb->kprobe_status = KPROBE_HIT_SS; | 350 | kcb->kprobe_status = KPROBE_HIT_SS; |
306 | return 1; | 351 | return 1; |
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index f73d7374a2ba..511abe52a94e 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c | |||
@@ -133,9 +133,9 @@ typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( | |||
133 | unsigned long start_address, | 133 | unsigned long start_address, |
134 | unsigned int has_pae) ATTRIB_NORET; | 134 | unsigned int has_pae) ATTRIB_NORET; |
135 | 135 | ||
136 | const extern unsigned char relocate_new_kernel[]; | 136 | extern const unsigned char relocate_new_kernel[]; |
137 | extern void relocate_new_kernel_end(void); | 137 | extern void relocate_new_kernel_end(void); |
138 | const extern unsigned int relocate_new_kernel_size; | 138 | extern const unsigned int relocate_new_kernel_size; |
139 | 139 | ||
140 | /* | 140 | /* |
141 | * A architecture hook called to validate the | 141 | * A architecture hook called to validate the |
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 7a328230e540..d022cb8fd725 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c | |||
@@ -266,7 +266,7 @@ static int msr_class_cpu_callback(struct notifier_block *nfb, unsigned long acti | |||
266 | return NOTIFY_OK; | 266 | return NOTIFY_OK; |
267 | } | 267 | } |
268 | 268 | ||
269 | static struct notifier_block msr_class_cpu_notifier = | 269 | static struct notifier_block __cpuinitdata msr_class_cpu_notifier = |
270 | { | 270 | { |
271 | .notifier_call = msr_class_cpu_callback, | 271 | .notifier_call = msr_class_cpu_callback, |
272 | }; | 272 | }; |
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index d43b498ec745..a76e93146585 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -14,21 +14,17 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/config.h> | 16 | #include <linux/config.h> |
17 | #include <linux/mm.h> | ||
18 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
19 | #include <linux/bootmem.h> | ||
20 | #include <linux/smp_lock.h> | ||
21 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
22 | #include <linux/mc146818rtc.h> | ||
23 | #include <linux/kernel_stat.h> | ||
24 | #include <linux/module.h> | 19 | #include <linux/module.h> |
25 | #include <linux/nmi.h> | 20 | #include <linux/nmi.h> |
26 | #include <linux/sysdev.h> | 21 | #include <linux/sysdev.h> |
27 | #include <linux/sysctl.h> | 22 | #include <linux/sysctl.h> |
23 | #include <linux/percpu.h> | ||
28 | 24 | ||
29 | #include <asm/smp.h> | 25 | #include <asm/smp.h> |
30 | #include <asm/div64.h> | ||
31 | #include <asm/nmi.h> | 26 | #include <asm/nmi.h> |
27 | #include <asm/intel_arch_perfmon.h> | ||
32 | 28 | ||
33 | #include "mach_traps.h" | 29 | #include "mach_traps.h" |
34 | 30 | ||
@@ -100,6 +96,9 @@ int nmi_active; | |||
100 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | 96 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ |
101 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | 97 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) |
102 | 98 | ||
99 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
100 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
101 | |||
103 | #ifdef CONFIG_SMP | 102 | #ifdef CONFIG_SMP |
104 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when | 103 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when |
105 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | 104 | * the CPU is idle. To make sure the NMI watchdog really ticks on all |
@@ -212,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str) | |||
212 | 211 | ||
213 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 212 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
214 | 213 | ||
214 | static void disable_intel_arch_watchdog(void); | ||
215 | |||
215 | static void disable_lapic_nmi_watchdog(void) | 216 | static void disable_lapic_nmi_watchdog(void) |
216 | { | 217 | { |
217 | if (nmi_active <= 0) | 218 | if (nmi_active <= 0) |
@@ -221,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void) | |||
221 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | 222 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); |
222 | break; | 223 | break; |
223 | case X86_VENDOR_INTEL: | 224 | case X86_VENDOR_INTEL: |
225 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
226 | disable_intel_arch_watchdog(); | ||
227 | break; | ||
228 | } | ||
224 | switch (boot_cpu_data.x86) { | 229 | switch (boot_cpu_data.x86) { |
225 | case 6: | 230 | case 6: |
226 | if (boot_cpu_data.x86_model > 0xd) | 231 | if (boot_cpu_data.x86_model > 0xd) |
@@ -449,6 +454,53 @@ static int setup_p4_watchdog(void) | |||
449 | return 1; | 454 | return 1; |
450 | } | 455 | } |
451 | 456 | ||
457 | static void disable_intel_arch_watchdog(void) | ||
458 | { | ||
459 | unsigned ebx; | ||
460 | |||
461 | /* | ||
462 | * Check whether the Architectural PerfMon supports | ||
463 | * Unhalted Core Cycles Event or not. | ||
464 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | ||
465 | */ | ||
466 | ebx = cpuid_ebx(10); | ||
467 | if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
468 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); | ||
469 | } | ||
470 | |||
471 | static int setup_intel_arch_watchdog(void) | ||
472 | { | ||
473 | unsigned int evntsel; | ||
474 | unsigned ebx; | ||
475 | |||
476 | /* | ||
477 | * Check whether the Architectural PerfMon supports | ||
478 | * Unhalted Core Cycles Event or not. | ||
479 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | ||
480 | */ | ||
481 | ebx = cpuid_ebx(10); | ||
482 | if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
483 | return 0; | ||
484 | |||
485 | nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
486 | |||
487 | clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); | ||
488 | clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); | ||
489 | |||
490 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
491 | | ARCH_PERFMON_EVENTSEL_OS | ||
492 | | ARCH_PERFMON_EVENTSEL_USR | ||
493 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
494 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
495 | |||
496 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | ||
497 | write_watchdog_counter("INTEL_ARCH_PERFCTR0"); | ||
498 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
499 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
500 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | ||
501 | return 1; | ||
502 | } | ||
503 | |||
452 | void setup_apic_nmi_watchdog (void) | 504 | void setup_apic_nmi_watchdog (void) |
453 | { | 505 | { |
454 | switch (boot_cpu_data.x86_vendor) { | 506 | switch (boot_cpu_data.x86_vendor) { |
@@ -458,6 +510,11 @@ void setup_apic_nmi_watchdog (void) | |||
458 | setup_k7_watchdog(); | 510 | setup_k7_watchdog(); |
459 | break; | 511 | break; |
460 | case X86_VENDOR_INTEL: | 512 | case X86_VENDOR_INTEL: |
513 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
514 | if (!setup_intel_arch_watchdog()) | ||
515 | return; | ||
516 | break; | ||
517 | } | ||
461 | switch (boot_cpu_data.x86) { | 518 | switch (boot_cpu_data.x86) { |
462 | case 6: | 519 | case 6: |
463 | if (boot_cpu_data.x86_model > 0xd) | 520 | if (boot_cpu_data.x86_model > 0xd) |
@@ -561,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs) | |||
561 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 618 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); |
562 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 619 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
563 | } | 620 | } |
564 | else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { | 621 | else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 || |
622 | nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
565 | /* Only P6 based Pentium M need to re-unmask | 623 | /* Only P6 based Pentium M need to re-unmask |
566 | * the apic vector but it doesn't hurt | 624 | * the apic vector but it doesn't hurt |
567 | * other P6 variant */ | 625 | * other P6 variant */ |
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c index 5f5b075f860a..0caf14652bad 100644 --- a/arch/i386/kernel/numaq.c +++ b/arch/i386/kernel/numaq.c | |||
@@ -79,10 +79,12 @@ int __init get_memcfg_numaq(void) | |||
79 | return 1; | 79 | return 1; |
80 | } | 80 | } |
81 | 81 | ||
82 | static int __init numaq_dsc_disable(void) | 82 | static int __init numaq_tsc_disable(void) |
83 | { | 83 | { |
84 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | 84 | if (num_online_nodes() > 1) { |
85 | tsc_disable = 1; | 85 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); |
86 | tsc_disable = 1; | ||
87 | } | ||
86 | return 0; | 88 | return 0; |
87 | } | 89 | } |
88 | core_initcall(numaq_dsc_disable); | 90 | arch_initcall(numaq_tsc_disable); |
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 6259afea46d1..6946b06e2784 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -102,7 +102,7 @@ void default_idle(void) | |||
102 | local_irq_enable(); | 102 | local_irq_enable(); |
103 | 103 | ||
104 | if (!hlt_counter && boot_cpu_data.hlt_works_ok) { | 104 | if (!hlt_counter && boot_cpu_data.hlt_works_ok) { |
105 | clear_thread_flag(TIF_POLLING_NRFLAG); | 105 | current_thread_info()->status &= ~TS_POLLING; |
106 | smp_mb__after_clear_bit(); | 106 | smp_mb__after_clear_bit(); |
107 | while (!need_resched()) { | 107 | while (!need_resched()) { |
108 | local_irq_disable(); | 108 | local_irq_disable(); |
@@ -111,7 +111,7 @@ void default_idle(void) | |||
111 | else | 111 | else |
112 | local_irq_enable(); | 112 | local_irq_enable(); |
113 | } | 113 | } |
114 | set_thread_flag(TIF_POLLING_NRFLAG); | 114 | current_thread_info()->status |= TS_POLLING; |
115 | } else { | 115 | } else { |
116 | while (!need_resched()) | 116 | while (!need_resched()) |
117 | cpu_relax(); | 117 | cpu_relax(); |
@@ -174,7 +174,7 @@ void cpu_idle(void) | |||
174 | { | 174 | { |
175 | int cpu = smp_processor_id(); | 175 | int cpu = smp_processor_id(); |
176 | 176 | ||
177 | set_thread_flag(TIF_POLLING_NRFLAG); | 177 | current_thread_info()->status |= TS_POLLING; |
178 | 178 | ||
179 | /* endless idle loop with no priority at all */ | 179 | /* endless idle loop with no priority at all */ |
180 | while (1) { | 180 | while (1) { |
@@ -312,7 +312,7 @@ void show_regs(struct pt_regs * regs) | |||
312 | cr3 = read_cr3(); | 312 | cr3 = read_cr3(); |
313 | cr4 = read_cr4_safe(); | 313 | cr4 = read_cr4_safe(); |
314 | printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); | 314 | printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); |
315 | show_trace(NULL, ®s->esp); | 315 | show_trace(NULL, regs, ®s->esp); |
316 | } | 316 | } |
317 | 317 | ||
318 | /* | 318 | /* |
diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index 321f5fd26e75..9bf590cefc7d 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/errno.h> | 9 | #include <linux/errno.h> |
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/mutex.h> | ||
12 | #include <linux/pci.h> | 13 | #include <linux/pci.h> |
13 | 14 | ||
14 | #include <linux/scx200.h> | 15 | #include <linux/scx200.h> |
@@ -45,11 +46,19 @@ static struct pci_driver scx200_pci_driver = { | |||
45 | .probe = scx200_probe, | 46 | .probe = scx200_probe, |
46 | }; | 47 | }; |
47 | 48 | ||
48 | static DEFINE_SPINLOCK(scx200_gpio_config_lock); | 49 | static DEFINE_MUTEX(scx200_gpio_config_lock); |
49 | 50 | ||
50 | static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent) | 51 | static void __devinit scx200_init_shadow(void) |
51 | { | 52 | { |
52 | int bank; | 53 | int bank; |
54 | |||
55 | /* read the current values driven on the GPIO signals */ | ||
56 | for (bank = 0; bank < 2; ++bank) | ||
57 | scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank); | ||
58 | } | ||
59 | |||
60 | static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent) | ||
61 | { | ||
53 | unsigned base; | 62 | unsigned base; |
54 | 63 | ||
55 | if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE || | 64 | if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE || |
@@ -63,10 +72,7 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_ | |||
63 | } | 72 | } |
64 | 73 | ||
65 | scx200_gpio_base = base; | 74 | scx200_gpio_base = base; |
66 | 75 | scx200_init_shadow(); | |
67 | /* read the current values driven on the GPIO signals */ | ||
68 | for (bank = 0; bank < 2; ++bank) | ||
69 | scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank); | ||
70 | 76 | ||
71 | } else { | 77 | } else { |
72 | /* find the base of the Configuration Block */ | 78 | /* find the base of the Configuration Block */ |
@@ -87,12 +93,11 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_ | |||
87 | return 0; | 93 | return 0; |
88 | } | 94 | } |
89 | 95 | ||
90 | u32 scx200_gpio_configure(int index, u32 mask, u32 bits) | 96 | u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) |
91 | { | 97 | { |
92 | u32 config, new_config; | 98 | u32 config, new_config; |
93 | unsigned long flags; | ||
94 | 99 | ||
95 | spin_lock_irqsave(&scx200_gpio_config_lock, flags); | 100 | mutex_lock(&scx200_gpio_config_lock); |
96 | 101 | ||
97 | outl(index, scx200_gpio_base + 0x20); | 102 | outl(index, scx200_gpio_base + 0x20); |
98 | config = inl(scx200_gpio_base + 0x24); | 103 | config = inl(scx200_gpio_base + 0x24); |
@@ -100,45 +105,11 @@ u32 scx200_gpio_configure(int index, u32 mask, u32 bits) | |||
100 | new_config = (config & mask) | bits; | 105 | new_config = (config & mask) | bits; |
101 | outl(new_config, scx200_gpio_base + 0x24); | 106 | outl(new_config, scx200_gpio_base + 0x24); |
102 | 107 | ||
103 | spin_unlock_irqrestore(&scx200_gpio_config_lock, flags); | 108 | mutex_unlock(&scx200_gpio_config_lock); |
104 | 109 | ||
105 | return config; | 110 | return config; |
106 | } | 111 | } |
107 | 112 | ||
108 | #if 0 | ||
109 | void scx200_gpio_dump(unsigned index) | ||
110 | { | ||
111 | u32 config = scx200_gpio_configure(index, ~0, 0); | ||
112 | printk(KERN_DEBUG "GPIO%02u: 0x%08lx", index, (unsigned long)config); | ||
113 | |||
114 | if (config & 1) | ||
115 | printk(" OE"); /* output enabled */ | ||
116 | else | ||
117 | printk(" TS"); /* tristate */ | ||
118 | if (config & 2) | ||
119 | printk(" PP"); /* push pull */ | ||
120 | else | ||
121 | printk(" OD"); /* open drain */ | ||
122 | if (config & 4) | ||
123 | printk(" PUE"); /* pull up enabled */ | ||
124 | else | ||
125 | printk(" PUD"); /* pull up disabled */ | ||
126 | if (config & 8) | ||
127 | printk(" LOCKED"); /* locked */ | ||
128 | if (config & 16) | ||
129 | printk(" LEVEL"); /* level input */ | ||
130 | else | ||
131 | printk(" EDGE"); /* edge input */ | ||
132 | if (config & 32) | ||
133 | printk(" HI"); /* trigger on rising edge */ | ||
134 | else | ||
135 | printk(" LO"); /* trigger on falling edge */ | ||
136 | if (config & 64) | ||
137 | printk(" DEBOUNCE"); /* debounce */ | ||
138 | printk("\n"); | ||
139 | } | ||
140 | #endif /* 0 */ | ||
141 | |||
142 | static int __init scx200_init(void) | 113 | static int __init scx200_init(void) |
143 | { | 114 | { |
144 | printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); | 115 | printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); |
@@ -159,10 +130,3 @@ EXPORT_SYMBOL(scx200_gpio_base); | |||
159 | EXPORT_SYMBOL(scx200_gpio_shadow); | 130 | EXPORT_SYMBOL(scx200_gpio_shadow); |
160 | EXPORT_SYMBOL(scx200_gpio_configure); | 131 | EXPORT_SYMBOL(scx200_gpio_configure); |
161 | EXPORT_SYMBOL(scx200_cb_base); | 132 | EXPORT_SYMBOL(scx200_cb_base); |
162 | |||
163 | /* | ||
164 | Local variables: | ||
165 | compile-command: "make -k -C ../../.. SUBDIRS=arch/i386/kernel modules" | ||
166 | c-basic-offset: 8 | ||
167 | End: | ||
168 | */ | ||
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 6bef9273733e..4a65040cc624 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c | |||
@@ -1575,6 +1575,7 @@ void __init setup_arch(char **cmdline_p) | |||
1575 | conswitchp = &dummy_con; | 1575 | conswitchp = &dummy_con; |
1576 | #endif | 1576 | #endif |
1577 | #endif | 1577 | #endif |
1578 | tsc_init(); | ||
1578 | } | 1579 | } |
1579 | 1580 | ||
1580 | static __init int add_pcspkr(void) | 1581 | static __init int add_pcspkr(void) |
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 5c352c3a9e7f..43002cfb40c4 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c | |||
@@ -351,7 +351,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, | |||
351 | goto give_sigsegv; | 351 | goto give_sigsegv; |
352 | } | 352 | } |
353 | 353 | ||
354 | restorer = &__kernel_sigreturn; | 354 | restorer = (void *)VDSO_SYM(&__kernel_sigreturn); |
355 | if (ka->sa.sa_flags & SA_RESTORER) | 355 | if (ka->sa.sa_flags & SA_RESTORER) |
356 | restorer = ka->sa.sa_restorer; | 356 | restorer = ka->sa.sa_restorer; |
357 | 357 | ||
@@ -447,7 +447,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
447 | goto give_sigsegv; | 447 | goto give_sigsegv; |
448 | 448 | ||
449 | /* Set up to return from userspace. */ | 449 | /* Set up to return from userspace. */ |
450 | restorer = &__kernel_rt_sigreturn; | 450 | restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn); |
451 | if (ka->sa.sa_flags & SA_RESTORER) | 451 | if (ka->sa.sa_flags & SA_RESTORER) |
452 | restorer = ka->sa.sa_restorer; | 452 | restorer = ka->sa.sa_restorer; |
453 | err |= __put_user(restorer, &frame->pretcode); | 453 | err |= __put_user(restorer, &frame->pretcode); |
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index d134e9643a58..c10789d7a9d3 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c | |||
@@ -114,7 +114,17 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_m | |||
114 | 114 | ||
115 | static inline int __prepare_ICR (unsigned int shortcut, int vector) | 115 | static inline int __prepare_ICR (unsigned int shortcut, int vector) |
116 | { | 116 | { |
117 | return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; | 117 | unsigned int icr = shortcut | APIC_DEST_LOGICAL; |
118 | |||
119 | switch (vector) { | ||
120 | default: | ||
121 | icr |= APIC_DM_FIXED | vector; | ||
122 | break; | ||
123 | case NMI_VECTOR: | ||
124 | icr |= APIC_DM_NMI; | ||
125 | break; | ||
126 | } | ||
127 | return icr; | ||
118 | } | 128 | } |
119 | 129 | ||
120 | static inline int __prepare_ICR2 (unsigned int mask) | 130 | static inline int __prepare_ICR2 (unsigned int mask) |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index bd0ca5c9f053..89e7315e539c 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <asm/tlbflush.h> | 52 | #include <asm/tlbflush.h> |
53 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
54 | #include <asm/arch_hooks.h> | 54 | #include <asm/arch_hooks.h> |
55 | #include <asm/nmi.h> | ||
55 | 56 | ||
56 | #include <mach_apic.h> | 57 | #include <mach_apic.h> |
57 | #include <mach_wakecpu.h> | 58 | #include <mach_wakecpu.h> |
@@ -66,12 +67,6 @@ int smp_num_siblings = 1; | |||
66 | EXPORT_SYMBOL(smp_num_siblings); | 67 | EXPORT_SYMBOL(smp_num_siblings); |
67 | #endif | 68 | #endif |
68 | 69 | ||
69 | /* Package ID of each logical CPU */ | ||
70 | int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; | ||
71 | |||
72 | /* Core ID of each logical CPU */ | ||
73 | int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; | ||
74 | |||
75 | /* Last level cache ID of each logical CPU */ | 70 | /* Last level cache ID of each logical CPU */ |
76 | int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; | 71 | int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; |
77 | 72 | ||
@@ -453,10 +448,12 @@ cpumask_t cpu_coregroup_map(int cpu) | |||
453 | struct cpuinfo_x86 *c = cpu_data + cpu; | 448 | struct cpuinfo_x86 *c = cpu_data + cpu; |
454 | /* | 449 | /* |
455 | * For perf, we return last level cache shared map. | 450 | * For perf, we return last level cache shared map. |
456 | * TBD: when power saving sched policy is added, we will return | 451 | * And for power savings, we return cpu_core_map |
457 | * cpu_core_map when power saving policy is enabled | ||
458 | */ | 452 | */ |
459 | return c->llc_shared_map; | 453 | if (sched_mc_power_savings || sched_smt_power_savings) |
454 | return cpu_core_map[cpu]; | ||
455 | else | ||
456 | return c->llc_shared_map; | ||
460 | } | 457 | } |
461 | 458 | ||
462 | /* representing cpus for which sibling maps can be computed */ | 459 | /* representing cpus for which sibling maps can be computed */ |
@@ -472,8 +469,8 @@ set_cpu_sibling_map(int cpu) | |||
472 | 469 | ||
473 | if (smp_num_siblings > 1) { | 470 | if (smp_num_siblings > 1) { |
474 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 471 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
475 | if (phys_proc_id[cpu] == phys_proc_id[i] && | 472 | if (c[cpu].phys_proc_id == c[i].phys_proc_id && |
476 | cpu_core_id[cpu] == cpu_core_id[i]) { | 473 | c[cpu].cpu_core_id == c[i].cpu_core_id) { |
477 | cpu_set(i, cpu_sibling_map[cpu]); | 474 | cpu_set(i, cpu_sibling_map[cpu]); |
478 | cpu_set(cpu, cpu_sibling_map[i]); | 475 | cpu_set(cpu, cpu_sibling_map[i]); |
479 | cpu_set(i, cpu_core_map[cpu]); | 476 | cpu_set(i, cpu_core_map[cpu]); |
@@ -500,7 +497,7 @@ set_cpu_sibling_map(int cpu) | |||
500 | cpu_set(i, c[cpu].llc_shared_map); | 497 | cpu_set(i, c[cpu].llc_shared_map); |
501 | cpu_set(cpu, c[i].llc_shared_map); | 498 | cpu_set(cpu, c[i].llc_shared_map); |
502 | } | 499 | } |
503 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | 500 | if (c[cpu].phys_proc_id == c[i].phys_proc_id) { |
504 | cpu_set(i, cpu_core_map[cpu]); | 501 | cpu_set(i, cpu_core_map[cpu]); |
505 | cpu_set(cpu, cpu_core_map[i]); | 502 | cpu_set(cpu, cpu_core_map[i]); |
506 | /* | 503 | /* |
@@ -1055,6 +1052,7 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1055 | struct warm_boot_cpu_info info; | 1052 | struct warm_boot_cpu_info info; |
1056 | struct work_struct task; | 1053 | struct work_struct task; |
1057 | int apicid, ret; | 1054 | int apicid, ret; |
1055 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
1058 | 1056 | ||
1059 | apicid = x86_cpu_to_apicid[cpu]; | 1057 | apicid = x86_cpu_to_apicid[cpu]; |
1060 | if (apicid == BAD_APICID) { | 1058 | if (apicid == BAD_APICID) { |
@@ -1062,6 +1060,18 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1062 | goto exit; | 1060 | goto exit; |
1063 | } | 1061 | } |
1064 | 1062 | ||
1063 | /* | ||
1064 | * the CPU isn't initialized at boot time, allocate gdt table here. | ||
1065 | * cpu_init will initialize it | ||
1066 | */ | ||
1067 | if (!cpu_gdt_descr->address) { | ||
1068 | cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL); | ||
1069 | if (!cpu_gdt_descr->address) | ||
1070 | printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); | ||
1071 | ret = -ENOMEM; | ||
1072 | goto exit; | ||
1073 | } | ||
1074 | |||
1065 | info.complete = &done; | 1075 | info.complete = &done; |
1066 | info.apicid = apicid; | 1076 | info.apicid = apicid; |
1067 | info.cpu = cpu; | 1077 | info.cpu = cpu; |
@@ -1339,8 +1349,8 @@ remove_siblinginfo(int cpu) | |||
1339 | cpu_clear(cpu, cpu_sibling_map[sibling]); | 1349 | cpu_clear(cpu, cpu_sibling_map[sibling]); |
1340 | cpus_clear(cpu_sibling_map[cpu]); | 1350 | cpus_clear(cpu_sibling_map[cpu]); |
1341 | cpus_clear(cpu_core_map[cpu]); | 1351 | cpus_clear(cpu_core_map[cpu]); |
1342 | phys_proc_id[cpu] = BAD_APICID; | 1352 | c[cpu].phys_proc_id = 0; |
1343 | cpu_core_id[cpu] = BAD_APICID; | 1353 | c[cpu].cpu_core_id = 0; |
1344 | cpu_clear(cpu, cpu_sibling_setup_map); | 1354 | cpu_clear(cpu, cpu_sibling_setup_map); |
1345 | } | 1355 | } |
1346 | 1356 | ||
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 0bada1870bdf..c60419dee018 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -2,6 +2,8 @@ | |||
2 | * linux/arch/i386/kernel/sysenter.c | 2 | * linux/arch/i386/kernel/sysenter.c |
3 | * | 3 | * |
4 | * (C) Copyright 2002 Linus Torvalds | 4 | * (C) Copyright 2002 Linus Torvalds |
5 | * Portions based on the vdso-randomization code from exec-shield: | ||
6 | * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar | ||
5 | * | 7 | * |
6 | * This file contains the needed initializations to support sysenter. | 8 | * This file contains the needed initializations to support sysenter. |
7 | */ | 9 | */ |
@@ -13,12 +15,31 @@ | |||
13 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
14 | #include <linux/string.h> | 16 | #include <linux/string.h> |
15 | #include <linux/elf.h> | 17 | #include <linux/elf.h> |
18 | #include <linux/mm.h> | ||
19 | #include <linux/module.h> | ||
16 | 20 | ||
17 | #include <asm/cpufeature.h> | 21 | #include <asm/cpufeature.h> |
18 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
19 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
20 | #include <asm/unistd.h> | 24 | #include <asm/unistd.h> |
21 | 25 | ||
26 | /* | ||
27 | * Should the kernel map a VDSO page into processes and pass its | ||
28 | * address down to glibc upon exec()? | ||
29 | */ | ||
30 | unsigned int __read_mostly vdso_enabled = 1; | ||
31 | |||
32 | EXPORT_SYMBOL_GPL(vdso_enabled); | ||
33 | |||
34 | static int __init vdso_setup(char *s) | ||
35 | { | ||
36 | vdso_enabled = simple_strtoul(s, NULL, 0); | ||
37 | |||
38 | return 1; | ||
39 | } | ||
40 | |||
41 | __setup("vdso=", vdso_setup); | ||
42 | |||
22 | extern asmlinkage void sysenter_entry(void); | 43 | extern asmlinkage void sysenter_entry(void); |
23 | 44 | ||
24 | void enable_sep_cpu(void) | 45 | void enable_sep_cpu(void) |
@@ -45,23 +66,122 @@ void enable_sep_cpu(void) | |||
45 | */ | 66 | */ |
46 | extern const char vsyscall_int80_start, vsyscall_int80_end; | 67 | extern const char vsyscall_int80_start, vsyscall_int80_end; |
47 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; | 68 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; |
69 | static void *syscall_page; | ||
48 | 70 | ||
49 | int __init sysenter_setup(void) | 71 | int __init sysenter_setup(void) |
50 | { | 72 | { |
51 | void *page = (void *)get_zeroed_page(GFP_ATOMIC); | 73 | syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); |
52 | 74 | ||
53 | __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); | 75 | #ifdef CONFIG_COMPAT_VDSO |
76 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); | ||
77 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | ||
78 | #else | ||
79 | /* | ||
80 | * In the non-compat case the ELF coredumping code needs the fixmap: | ||
81 | */ | ||
82 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO); | ||
83 | #endif | ||
54 | 84 | ||
55 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | 85 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
56 | memcpy(page, | 86 | memcpy(syscall_page, |
57 | &vsyscall_int80_start, | 87 | &vsyscall_int80_start, |
58 | &vsyscall_int80_end - &vsyscall_int80_start); | 88 | &vsyscall_int80_end - &vsyscall_int80_start); |
59 | return 0; | 89 | return 0; |
60 | } | 90 | } |
61 | 91 | ||
62 | memcpy(page, | 92 | memcpy(syscall_page, |
63 | &vsyscall_sysenter_start, | 93 | &vsyscall_sysenter_start, |
64 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); | 94 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); |
65 | 95 | ||
66 | return 0; | 96 | return 0; |
67 | } | 97 | } |
98 | |||
99 | static struct page *syscall_nopage(struct vm_area_struct *vma, | ||
100 | unsigned long adr, int *type) | ||
101 | { | ||
102 | struct page *p = virt_to_page(adr - vma->vm_start + syscall_page); | ||
103 | get_page(p); | ||
104 | return p; | ||
105 | } | ||
106 | |||
107 | /* Prevent VMA merging */ | ||
108 | static void syscall_vma_close(struct vm_area_struct *vma) | ||
109 | { | ||
110 | } | ||
111 | |||
112 | static struct vm_operations_struct syscall_vm_ops = { | ||
113 | .close = syscall_vma_close, | ||
114 | .nopage = syscall_nopage, | ||
115 | }; | ||
116 | |||
117 | /* Defined in vsyscall-sysenter.S */ | ||
118 | extern void SYSENTER_RETURN; | ||
119 | |||
120 | /* Setup a VMA at program startup for the vsyscall page */ | ||
121 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | ||
122 | { | ||
123 | struct vm_area_struct *vma; | ||
124 | struct mm_struct *mm = current->mm; | ||
125 | unsigned long addr; | ||
126 | int ret; | ||
127 | |||
128 | down_write(&mm->mmap_sem); | ||
129 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | ||
130 | if (IS_ERR_VALUE(addr)) { | ||
131 | ret = addr; | ||
132 | goto up_fail; | ||
133 | } | ||
134 | |||
135 | vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); | ||
136 | if (!vma) { | ||
137 | ret = -ENOMEM; | ||
138 | goto up_fail; | ||
139 | } | ||
140 | |||
141 | vma->vm_start = addr; | ||
142 | vma->vm_end = addr + PAGE_SIZE; | ||
143 | /* MAYWRITE to allow gdb to COW and set breakpoints */ | ||
144 | vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; | ||
145 | vma->vm_flags |= mm->def_flags; | ||
146 | vma->vm_page_prot = protection_map[vma->vm_flags & 7]; | ||
147 | vma->vm_ops = &syscall_vm_ops; | ||
148 | vma->vm_mm = mm; | ||
149 | |||
150 | ret = insert_vm_struct(mm, vma); | ||
151 | if (ret) | ||
152 | goto free_vma; | ||
153 | |||
154 | current->mm->context.vdso = (void *)addr; | ||
155 | current_thread_info()->sysenter_return = | ||
156 | (void *)VDSO_SYM(&SYSENTER_RETURN); | ||
157 | mm->total_vm++; | ||
158 | up_fail: | ||
159 | up_write(&mm->mmap_sem); | ||
160 | return ret; | ||
161 | |||
162 | free_vma: | ||
163 | kmem_cache_free(vm_area_cachep, vma); | ||
164 | return ret; | ||
165 | } | ||
166 | |||
167 | const char *arch_vma_name(struct vm_area_struct *vma) | ||
168 | { | ||
169 | if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) | ||
170 | return "[vdso]"; | ||
171 | return NULL; | ||
172 | } | ||
173 | |||
174 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | ||
175 | { | ||
176 | return NULL; | ||
177 | } | ||
178 | |||
179 | int in_gate_area(struct task_struct *task, unsigned long addr) | ||
180 | { | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | int in_gate_area_no_task(unsigned long addr) | ||
185 | { | ||
186 | return 0; | ||
187 | } | ||
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 9d3074759856..5f43d0410122 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c | |||
@@ -82,13 +82,6 @@ extern unsigned long wall_jiffies; | |||
82 | DEFINE_SPINLOCK(rtc_lock); | 82 | DEFINE_SPINLOCK(rtc_lock); |
83 | EXPORT_SYMBOL(rtc_lock); | 83 | EXPORT_SYMBOL(rtc_lock); |
84 | 84 | ||
85 | #include <asm/i8253.h> | ||
86 | |||
87 | DEFINE_SPINLOCK(i8253_lock); | ||
88 | EXPORT_SYMBOL(i8253_lock); | ||
89 | |||
90 | struct timer_opts *cur_timer __read_mostly = &timer_none; | ||
91 | |||
92 | /* | 85 | /* |
93 | * This is a special lock that is owned by the CPU and holds the index | 86 | * This is a special lock that is owned by the CPU and holds the index |
94 | * register we are working with. It is required for NMI access to the | 87 | * register we are working with. It is required for NMI access to the |
@@ -118,99 +111,19 @@ void rtc_cmos_write(unsigned char val, unsigned char addr) | |||
118 | } | 111 | } |
119 | EXPORT_SYMBOL(rtc_cmos_write); | 112 | EXPORT_SYMBOL(rtc_cmos_write); |
120 | 113 | ||
121 | /* | ||
122 | * This version of gettimeofday has microsecond resolution | ||
123 | * and better than microsecond precision on fast x86 machines with TSC. | ||
124 | */ | ||
125 | void do_gettimeofday(struct timeval *tv) | ||
126 | { | ||
127 | unsigned long seq; | ||
128 | unsigned long usec, sec; | ||
129 | unsigned long max_ntp_tick; | ||
130 | |||
131 | do { | ||
132 | unsigned long lost; | ||
133 | |||
134 | seq = read_seqbegin(&xtime_lock); | ||
135 | |||
136 | usec = cur_timer->get_offset(); | ||
137 | lost = jiffies - wall_jiffies; | ||
138 | |||
139 | /* | ||
140 | * If time_adjust is negative then NTP is slowing the clock | ||
141 | * so make sure not to go into next possible interval. | ||
142 | * Better to lose some accuracy than have time go backwards.. | ||
143 | */ | ||
144 | if (unlikely(time_adjust < 0)) { | ||
145 | max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj; | ||
146 | usec = min(usec, max_ntp_tick); | ||
147 | |||
148 | if (lost) | ||
149 | usec += lost * max_ntp_tick; | ||
150 | } | ||
151 | else if (unlikely(lost)) | ||
152 | usec += lost * (USEC_PER_SEC / HZ); | ||
153 | |||
154 | sec = xtime.tv_sec; | ||
155 | usec += (xtime.tv_nsec / 1000); | ||
156 | } while (read_seqretry(&xtime_lock, seq)); | ||
157 | |||
158 | while (usec >= 1000000) { | ||
159 | usec -= 1000000; | ||
160 | sec++; | ||
161 | } | ||
162 | |||
163 | tv->tv_sec = sec; | ||
164 | tv->tv_usec = usec; | ||
165 | } | ||
166 | |||
167 | EXPORT_SYMBOL(do_gettimeofday); | ||
168 | |||
169 | int do_settimeofday(struct timespec *tv) | ||
170 | { | ||
171 | time_t wtm_sec, sec = tv->tv_sec; | ||
172 | long wtm_nsec, nsec = tv->tv_nsec; | ||
173 | |||
174 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | ||
175 | return -EINVAL; | ||
176 | |||
177 | write_seqlock_irq(&xtime_lock); | ||
178 | /* | ||
179 | * This is revolting. We need to set "xtime" correctly. However, the | ||
180 | * value in this location is the value at the most recent update of | ||
181 | * wall time. Discover what correction gettimeofday() would have | ||
182 | * made, and then undo it! | ||
183 | */ | ||
184 | nsec -= cur_timer->get_offset() * NSEC_PER_USEC; | ||
185 | nsec -= (jiffies - wall_jiffies) * TICK_NSEC; | ||
186 | |||
187 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | ||
188 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | ||
189 | |||
190 | set_normalized_timespec(&xtime, sec, nsec); | ||
191 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | ||
192 | |||
193 | ntp_clear(); | ||
194 | write_sequnlock_irq(&xtime_lock); | ||
195 | clock_was_set(); | ||
196 | return 0; | ||
197 | } | ||
198 | |||
199 | EXPORT_SYMBOL(do_settimeofday); | ||
200 | |||
201 | static int set_rtc_mmss(unsigned long nowtime) | 114 | static int set_rtc_mmss(unsigned long nowtime) |
202 | { | 115 | { |
203 | int retval; | 116 | int retval; |
204 | 117 | unsigned long flags; | |
205 | WARN_ON(irqs_disabled()); | ||
206 | 118 | ||
207 | /* gets recalled with irq locally disabled */ | 119 | /* gets recalled with irq locally disabled */ |
208 | spin_lock_irq(&rtc_lock); | 120 | /* XXX - does irqsave resolve this? -johnstul */ |
121 | spin_lock_irqsave(&rtc_lock, flags); | ||
209 | if (efi_enabled) | 122 | if (efi_enabled) |
210 | retval = efi_set_rtc_mmss(nowtime); | 123 | retval = efi_set_rtc_mmss(nowtime); |
211 | else | 124 | else |
212 | retval = mach_set_rtc_mmss(nowtime); | 125 | retval = mach_set_rtc_mmss(nowtime); |
213 | spin_unlock_irq(&rtc_lock); | 126 | spin_unlock_irqrestore(&rtc_lock, flags); |
214 | 127 | ||
215 | return retval; | 128 | return retval; |
216 | } | 129 | } |
@@ -218,16 +131,6 @@ static int set_rtc_mmss(unsigned long nowtime) | |||
218 | 131 | ||
219 | int timer_ack; | 132 | int timer_ack; |
220 | 133 | ||
221 | /* monotonic_clock(): returns # of nanoseconds passed since time_init() | ||
222 | * Note: This function is required to return accurate | ||
223 | * time even in the absence of multiple timer ticks. | ||
224 | */ | ||
225 | unsigned long long monotonic_clock(void) | ||
226 | { | ||
227 | return cur_timer->monotonic_clock(); | ||
228 | } | ||
229 | EXPORT_SYMBOL(monotonic_clock); | ||
230 | |||
231 | #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) | 134 | #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) |
232 | unsigned long profile_pc(struct pt_regs *regs) | 135 | unsigned long profile_pc(struct pt_regs *regs) |
233 | { | 136 | { |
@@ -242,11 +145,21 @@ EXPORT_SYMBOL(profile_pc); | |||
242 | #endif | 145 | #endif |
243 | 146 | ||
244 | /* | 147 | /* |
245 | * timer_interrupt() needs to keep up the real-time clock, | 148 | * This is the same as the above, except we _also_ save the current |
246 | * as well as call the "do_timer()" routine every clocktick | 149 | * Time Stamp Counter value at the time of the timer interrupt, so that |
150 | * we later on can estimate the time of day more exactly. | ||
247 | */ | 151 | */ |
248 | static inline void do_timer_interrupt(int irq, struct pt_regs *regs) | 152 | irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) |
249 | { | 153 | { |
154 | /* | ||
155 | * Here we are in the timer irq handler. We just have irqs locally | ||
156 | * disabled but we don't know if the timer_bh is running on the other | ||
157 | * CPU. We need to avoid to SMP race with it. NOTE: we don' t need | ||
158 | * the irq version of write_lock because as just said we have irq | ||
159 | * locally disabled. -arca | ||
160 | */ | ||
161 | write_seqlock(&xtime_lock); | ||
162 | |||
250 | #ifdef CONFIG_X86_IO_APIC | 163 | #ifdef CONFIG_X86_IO_APIC |
251 | if (timer_ack) { | 164 | if (timer_ack) { |
252 | /* | 165 | /* |
@@ -279,27 +192,6 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs) | |||
279 | irq = inb_p( 0x61 ); /* read the current state */ | 192 | irq = inb_p( 0x61 ); /* read the current state */ |
280 | outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ | 193 | outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ |
281 | } | 194 | } |
282 | } | ||
283 | |||
284 | /* | ||
285 | * This is the same as the above, except we _also_ save the current | ||
286 | * Time Stamp Counter value at the time of the timer interrupt, so that | ||
287 | * we later on can estimate the time of day more exactly. | ||
288 | */ | ||
289 | irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | ||
290 | { | ||
291 | /* | ||
292 | * Here we are in the timer irq handler. We just have irqs locally | ||
293 | * disabled but we don't know if the timer_bh is running on the other | ||
294 | * CPU. We need to avoid to SMP race with it. NOTE: we don' t need | ||
295 | * the irq version of write_lock because as just said we have irq | ||
296 | * locally disabled. -arca | ||
297 | */ | ||
298 | write_seqlock(&xtime_lock); | ||
299 | |||
300 | cur_timer->mark_offset(); | ||
301 | |||
302 | do_timer_interrupt(irq, regs); | ||
303 | 195 | ||
304 | write_sequnlock(&xtime_lock); | 196 | write_sequnlock(&xtime_lock); |
305 | 197 | ||
@@ -380,7 +272,6 @@ void notify_arch_cmos_timer(void) | |||
380 | 272 | ||
381 | static long clock_cmos_diff, sleep_start; | 273 | static long clock_cmos_diff, sleep_start; |
382 | 274 | ||
383 | static struct timer_opts *last_timer; | ||
384 | static int timer_suspend(struct sys_device *dev, pm_message_t state) | 275 | static int timer_suspend(struct sys_device *dev, pm_message_t state) |
385 | { | 276 | { |
386 | /* | 277 | /* |
@@ -389,10 +280,6 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state) | |||
389 | clock_cmos_diff = -get_cmos_time(); | 280 | clock_cmos_diff = -get_cmos_time(); |
390 | clock_cmos_diff += get_seconds(); | 281 | clock_cmos_diff += get_seconds(); |
391 | sleep_start = get_cmos_time(); | 282 | sleep_start = get_cmos_time(); |
392 | last_timer = cur_timer; | ||
393 | cur_timer = &timer_none; | ||
394 | if (last_timer->suspend) | ||
395 | last_timer->suspend(state); | ||
396 | return 0; | 283 | return 0; |
397 | } | 284 | } |
398 | 285 | ||
@@ -415,10 +302,6 @@ static int timer_resume(struct sys_device *dev) | |||
415 | jiffies_64 += sleep_length; | 302 | jiffies_64 += sleep_length; |
416 | wall_jiffies += sleep_length; | 303 | wall_jiffies += sleep_length; |
417 | write_sequnlock_irqrestore(&xtime_lock, flags); | 304 | write_sequnlock_irqrestore(&xtime_lock, flags); |
418 | if (last_timer->resume) | ||
419 | last_timer->resume(); | ||
420 | cur_timer = last_timer; | ||
421 | last_timer = NULL; | ||
422 | touch_softlockup_watchdog(); | 305 | touch_softlockup_watchdog(); |
423 | return 0; | 306 | return 0; |
424 | } | 307 | } |
@@ -460,9 +343,6 @@ static void __init hpet_time_init(void) | |||
460 | printk("Using HPET for base-timer\n"); | 343 | printk("Using HPET for base-timer\n"); |
461 | } | 344 | } |
462 | 345 | ||
463 | cur_timer = select_timer(); | ||
464 | printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); | ||
465 | |||
466 | time_init_hook(); | 346 | time_init_hook(); |
467 | } | 347 | } |
468 | #endif | 348 | #endif |
@@ -484,8 +364,5 @@ void __init time_init(void) | |||
484 | set_normalized_timespec(&wall_to_monotonic, | 364 | set_normalized_timespec(&wall_to_monotonic, |
485 | -xtime.tv_sec, -xtime.tv_nsec); | 365 | -xtime.tv_sec, -xtime.tv_nsec); |
486 | 366 | ||
487 | cur_timer = select_timer(); | ||
488 | printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); | ||
489 | |||
490 | time_init_hook(); | 367 | time_init_hook(); |
491 | } | 368 | } |
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile deleted file mode 100644 index 8fa12be658dd..000000000000 --- a/arch/i386/kernel/timers/Makefile +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | # | ||
2 | # Makefile for x86 timers | ||
3 | # | ||
4 | |||
5 | obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o | ||
6 | |||
7 | obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o | ||
8 | obj-$(CONFIG_HPET_TIMER) += timer_hpet.o | ||
9 | obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o | ||
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c deleted file mode 100644 index 8163fe0cf1f0..000000000000 --- a/arch/i386/kernel/timers/common.c +++ /dev/null | |||
@@ -1,172 +0,0 @@ | |||
1 | /* | ||
2 | * Common functions used across the timers go here | ||
3 | */ | ||
4 | |||
5 | #include <linux/init.h> | ||
6 | #include <linux/timex.h> | ||
7 | #include <linux/errno.h> | ||
8 | #include <linux/jiffies.h> | ||
9 | #include <linux/module.h> | ||
10 | |||
11 | #include <asm/io.h> | ||
12 | #include <asm/timer.h> | ||
13 | #include <asm/hpet.h> | ||
14 | |||
15 | #include "mach_timer.h" | ||
16 | |||
17 | /* ------ Calibrate the TSC ------- | ||
18 | * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). | ||
19 | * Too much 64-bit arithmetic here to do this cleanly in C, and for | ||
20 | * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2) | ||
21 | * output busy loop as low as possible. We avoid reading the CTC registers | ||
22 | * directly because of the awkward 8-bit access mechanism of the 82C54 | ||
23 | * device. | ||
24 | */ | ||
25 | |||
26 | #define CALIBRATE_TIME (5 * 1000020/HZ) | ||
27 | |||
28 | unsigned long calibrate_tsc(void) | ||
29 | { | ||
30 | mach_prepare_counter(); | ||
31 | |||
32 | { | ||
33 | unsigned long startlow, starthigh; | ||
34 | unsigned long endlow, endhigh; | ||
35 | unsigned long count; | ||
36 | |||
37 | rdtsc(startlow,starthigh); | ||
38 | mach_countup(&count); | ||
39 | rdtsc(endlow,endhigh); | ||
40 | |||
41 | |||
42 | /* Error: ECTCNEVERSET */ | ||
43 | if (count <= 1) | ||
44 | goto bad_ctc; | ||
45 | |||
46 | /* 64-bit subtract - gcc just messes up with long longs */ | ||
47 | __asm__("subl %2,%0\n\t" | ||
48 | "sbbl %3,%1" | ||
49 | :"=a" (endlow), "=d" (endhigh) | ||
50 | :"g" (startlow), "g" (starthigh), | ||
51 | "0" (endlow), "1" (endhigh)); | ||
52 | |||
53 | /* Error: ECPUTOOFAST */ | ||
54 | if (endhigh) | ||
55 | goto bad_ctc; | ||
56 | |||
57 | /* Error: ECPUTOOSLOW */ | ||
58 | if (endlow <= CALIBRATE_TIME) | ||
59 | goto bad_ctc; | ||
60 | |||
61 | __asm__("divl %2" | ||
62 | :"=a" (endlow), "=d" (endhigh) | ||
63 | :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME)); | ||
64 | |||
65 | return endlow; | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * The CTC wasn't reliable: we got a hit on the very first read, | ||
70 | * or the CPU was so fast/slow that the quotient wouldn't fit in | ||
71 | * 32 bits.. | ||
72 | */ | ||
73 | bad_ctc: | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | #ifdef CONFIG_HPET_TIMER | ||
78 | /* ------ Calibrate the TSC using HPET ------- | ||
79 | * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq. | ||
80 | * Second output is parameter 1 (when non NULL) | ||
81 | * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet(). | ||
82 | * calibrate_tsc() calibrates the processor TSC by comparing | ||
83 | * it to the HPET timer of known frequency. | ||
84 | * Too much 64-bit arithmetic here to do this cleanly in C | ||
85 | */ | ||
86 | #define CALIBRATE_CNT_HPET (5 * hpet_tick) | ||
87 | #define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC) | ||
88 | |||
89 | unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) | ||
90 | { | ||
91 | unsigned long tsc_startlow, tsc_starthigh; | ||
92 | unsigned long tsc_endlow, tsc_endhigh; | ||
93 | unsigned long hpet_start, hpet_end; | ||
94 | unsigned long result, remain; | ||
95 | |||
96 | hpet_start = hpet_readl(HPET_COUNTER); | ||
97 | rdtsc(tsc_startlow, tsc_starthigh); | ||
98 | do { | ||
99 | hpet_end = hpet_readl(HPET_COUNTER); | ||
100 | } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET); | ||
101 | rdtsc(tsc_endlow, tsc_endhigh); | ||
102 | |||
103 | /* 64-bit subtract - gcc just messes up with long longs */ | ||
104 | __asm__("subl %2,%0\n\t" | ||
105 | "sbbl %3,%1" | ||
106 | :"=a" (tsc_endlow), "=d" (tsc_endhigh) | ||
107 | :"g" (tsc_startlow), "g" (tsc_starthigh), | ||
108 | "0" (tsc_endlow), "1" (tsc_endhigh)); | ||
109 | |||
110 | /* Error: ECPUTOOFAST */ | ||
111 | if (tsc_endhigh) | ||
112 | goto bad_calibration; | ||
113 | |||
114 | /* Error: ECPUTOOSLOW */ | ||
115 | if (tsc_endlow <= CALIBRATE_TIME_HPET) | ||
116 | goto bad_calibration; | ||
117 | |||
118 | ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET); | ||
119 | if (remain > (tsc_endlow >> 1)) | ||
120 | result++; /* rounding the result */ | ||
121 | |||
122 | if (tsc_hpet_quotient_ptr) { | ||
123 | unsigned long tsc_hpet_quotient; | ||
124 | |||
125 | ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0, | ||
126 | CALIBRATE_CNT_HPET); | ||
127 | if (remain > (tsc_endlow >> 1)) | ||
128 | tsc_hpet_quotient++; /* rounding the result */ | ||
129 | *tsc_hpet_quotient_ptr = tsc_hpet_quotient; | ||
130 | } | ||
131 | |||
132 | return result; | ||
133 | bad_calibration: | ||
134 | /* | ||
135 | * the CPU was so fast/slow that the quotient wouldn't fit in | ||
136 | * 32 bits.. | ||
137 | */ | ||
138 | return 0; | ||
139 | } | ||
140 | #endif | ||
141 | |||
142 | |||
143 | unsigned long read_timer_tsc(void) | ||
144 | { | ||
145 | unsigned long retval; | ||
146 | rdtscl(retval); | ||
147 | return retval; | ||
148 | } | ||
149 | |||
150 | |||
151 | /* calculate cpu_khz */ | ||
152 | void init_cpu_khz(void) | ||
153 | { | ||
154 | if (cpu_has_tsc) { | ||
155 | unsigned long tsc_quotient = calibrate_tsc(); | ||
156 | if (tsc_quotient) { | ||
157 | /* report CPU clock rate in Hz. | ||
158 | * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = | ||
159 | * clock/second. Our precision is about 100 ppm. | ||
160 | */ | ||
161 | { unsigned long eax=0, edx=1000; | ||
162 | __asm__("divl %2" | ||
163 | :"=a" (cpu_khz), "=d" (edx) | ||
164 | :"r" (tsc_quotient), | ||
165 | "0" (eax), "1" (edx)); | ||
166 | printk("Detected %u.%03u MHz processor.\n", | ||
167 | cpu_khz / 1000, cpu_khz % 1000); | ||
168 | } | ||
169 | } | ||
170 | } | ||
171 | } | ||
172 | |||
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c deleted file mode 100644 index 7e39ed8e33f8..000000000000 --- a/arch/i386/kernel/timers/timer.c +++ /dev/null | |||
@@ -1,75 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/string.h> | ||
4 | #include <asm/timer.h> | ||
5 | |||
6 | #ifdef CONFIG_HPET_TIMER | ||
7 | /* | ||
8 | * HPET memory read is slower than tsc reads, but is more dependable as it | ||
9 | * always runs at constant frequency and reduces complexity due to | ||
10 | * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use | ||
11 | * timer_pit when HPET is active. So, we default to timer_tsc. | ||
12 | */ | ||
13 | #endif | ||
14 | /* list of timers, ordered by preference, NULL terminated */ | ||
15 | static struct init_timer_opts* __initdata timers[] = { | ||
16 | #ifdef CONFIG_X86_CYCLONE_TIMER | ||
17 | &timer_cyclone_init, | ||
18 | #endif | ||
19 | #ifdef CONFIG_HPET_TIMER | ||
20 | &timer_hpet_init, | ||
21 | #endif | ||
22 | #ifdef CONFIG_X86_PM_TIMER | ||
23 | &timer_pmtmr_init, | ||
24 | #endif | ||
25 | &timer_tsc_init, | ||
26 | &timer_pit_init, | ||
27 | NULL, | ||
28 | }; | ||
29 | |||
30 | static char clock_override[10] __initdata; | ||
31 | |||
32 | static int __init clock_setup(char* str) | ||
33 | { | ||
34 | if (str) | ||
35 | strlcpy(clock_override, str, sizeof(clock_override)); | ||
36 | return 1; | ||
37 | } | ||
38 | __setup("clock=", clock_setup); | ||
39 | |||
40 | |||
41 | /* The chosen timesource has been found to be bad. | ||
42 | * Fall back to a known good timesource (the PIT) | ||
43 | */ | ||
44 | void clock_fallback(void) | ||
45 | { | ||
46 | cur_timer = &timer_pit; | ||
47 | } | ||
48 | |||
49 | /* iterates through the list of timers, returning the first | ||
50 | * one that initializes successfully. | ||
51 | */ | ||
52 | struct timer_opts* __init select_timer(void) | ||
53 | { | ||
54 | int i = 0; | ||
55 | |||
56 | /* find most preferred working timer */ | ||
57 | while (timers[i]) { | ||
58 | if (timers[i]->init) | ||
59 | if (timers[i]->init(clock_override) == 0) | ||
60 | return timers[i]->opts; | ||
61 | ++i; | ||
62 | } | ||
63 | |||
64 | panic("select_timer: Cannot find a suitable timer\n"); | ||
65 | return NULL; | ||
66 | } | ||
67 | |||
68 | int read_current_timer(unsigned long *timer_val) | ||
69 | { | ||
70 | if (cur_timer->read_timer) { | ||
71 | *timer_val = cur_timer->read_timer(); | ||
72 | return 0; | ||
73 | } | ||
74 | return -1; | ||
75 | } | ||
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c deleted file mode 100644 index 13892a65c941..000000000000 --- a/arch/i386/kernel/timers/timer_cyclone.c +++ /dev/null | |||
@@ -1,259 +0,0 @@ | |||
1 | /* Cyclone-timer: | ||
2 | * This code implements timer_ops for the cyclone counter found | ||
3 | * on IBM x440, x360, and other Summit based systems. | ||
4 | * | ||
5 | * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com) | ||
6 | */ | ||
7 | |||
8 | |||
9 | #include <linux/spinlock.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/timex.h> | ||
12 | #include <linux/errno.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <linux/jiffies.h> | ||
15 | |||
16 | #include <asm/timer.h> | ||
17 | #include <asm/io.h> | ||
18 | #include <asm/pgtable.h> | ||
19 | #include <asm/fixmap.h> | ||
20 | #include <asm/i8253.h> | ||
21 | |||
22 | #include "io_ports.h" | ||
23 | |||
24 | /* Number of usecs that the last interrupt was delayed */ | ||
25 | static int delay_at_last_interrupt; | ||
26 | |||
27 | #define CYCLONE_CBAR_ADDR 0xFEB00CD0 | ||
28 | #define CYCLONE_PMCC_OFFSET 0x51A0 | ||
29 | #define CYCLONE_MPMC_OFFSET 0x51D0 | ||
30 | #define CYCLONE_MPCS_OFFSET 0x51A8 | ||
31 | #define CYCLONE_TIMER_FREQ 100000000 | ||
32 | #define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */ | ||
33 | int use_cyclone = 0; | ||
34 | |||
35 | static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */ | ||
36 | static u32 last_cyclone_low; | ||
37 | static u32 last_cyclone_high; | ||
38 | static unsigned long long monotonic_base; | ||
39 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
40 | |||
41 | /* helper macro to atomically read both cyclone counter registers */ | ||
42 | #define read_cyclone_counter(low,high) \ | ||
43 | do{ \ | ||
44 | high = cyclone_timer[1]; low = cyclone_timer[0]; \ | ||
45 | } while (high != cyclone_timer[1]); | ||
46 | |||
47 | |||
48 | static void mark_offset_cyclone(void) | ||
49 | { | ||
50 | unsigned long lost, delay; | ||
51 | unsigned long delta = last_cyclone_low; | ||
52 | int count; | ||
53 | unsigned long long this_offset, last_offset; | ||
54 | |||
55 | write_seqlock(&monotonic_lock); | ||
56 | last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; | ||
57 | |||
58 | spin_lock(&i8253_lock); | ||
59 | read_cyclone_counter(last_cyclone_low,last_cyclone_high); | ||
60 | |||
61 | /* read values for delay_at_last_interrupt */ | ||
62 | outb_p(0x00, 0x43); /* latch the count ASAP */ | ||
63 | |||
64 | count = inb_p(0x40); /* read the latched count */ | ||
65 | count |= inb(0x40) << 8; | ||
66 | |||
67 | /* | ||
68 | * VIA686a test code... reset the latch if count > max + 1 | ||
69 | * from timer_pit.c - cjb | ||
70 | */ | ||
71 | if (count > LATCH) { | ||
72 | outb_p(0x34, PIT_MODE); | ||
73 | outb_p(LATCH & 0xff, PIT_CH0); | ||
74 | outb(LATCH >> 8, PIT_CH0); | ||
75 | count = LATCH - 1; | ||
76 | } | ||
77 | spin_unlock(&i8253_lock); | ||
78 | |||
79 | /* lost tick compensation */ | ||
80 | delta = last_cyclone_low - delta; | ||
81 | delta /= (CYCLONE_TIMER_FREQ/1000000); | ||
82 | delta += delay_at_last_interrupt; | ||
83 | lost = delta/(1000000/HZ); | ||
84 | delay = delta%(1000000/HZ); | ||
85 | if (lost >= 2) | ||
86 | jiffies_64 += lost-1; | ||
87 | |||
88 | /* update the monotonic base value */ | ||
89 | this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; | ||
90 | monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK; | ||
91 | write_sequnlock(&monotonic_lock); | ||
92 | |||
93 | /* calculate delay_at_last_interrupt */ | ||
94 | count = ((LATCH-1) - count) * TICK_SIZE; | ||
95 | delay_at_last_interrupt = (count + LATCH/2) / LATCH; | ||
96 | |||
97 | |||
98 | /* catch corner case where tick rollover occured | ||
99 | * between cyclone and pit reads (as noted when | ||
100 | * usec delta is > 90% # of usecs/tick) | ||
101 | */ | ||
102 | if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) | ||
103 | jiffies_64++; | ||
104 | } | ||
105 | |||
106 | static unsigned long get_offset_cyclone(void) | ||
107 | { | ||
108 | u32 offset; | ||
109 | |||
110 | if(!cyclone_timer) | ||
111 | return delay_at_last_interrupt; | ||
112 | |||
113 | /* Read the cyclone timer */ | ||
114 | offset = cyclone_timer[0]; | ||
115 | |||
116 | /* .. relative to previous jiffy */ | ||
117 | offset = offset - last_cyclone_low; | ||
118 | |||
119 | /* convert cyclone ticks to microseconds */ | ||
120 | /* XXX slow, can we speed this up? */ | ||
121 | offset = offset/(CYCLONE_TIMER_FREQ/1000000); | ||
122 | |||
123 | /* our adjusted time offset in microseconds */ | ||
124 | return delay_at_last_interrupt + offset; | ||
125 | } | ||
126 | |||
127 | static unsigned long long monotonic_clock_cyclone(void) | ||
128 | { | ||
129 | u32 now_low, now_high; | ||
130 | unsigned long long last_offset, this_offset, base; | ||
131 | unsigned long long ret; | ||
132 | unsigned seq; | ||
133 | |||
134 | /* atomically read monotonic base & last_offset */ | ||
135 | do { | ||
136 | seq = read_seqbegin(&monotonic_lock); | ||
137 | last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; | ||
138 | base = monotonic_base; | ||
139 | } while (read_seqretry(&monotonic_lock, seq)); | ||
140 | |||
141 | |||
142 | /* Read the cyclone counter */ | ||
143 | read_cyclone_counter(now_low,now_high); | ||
144 | this_offset = ((unsigned long long)now_high<<32)|now_low; | ||
145 | |||
146 | /* convert to nanoseconds */ | ||
147 | ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK); | ||
148 | return ret * (1000000000 / CYCLONE_TIMER_FREQ); | ||
149 | } | ||
150 | |||
151 | static int __init init_cyclone(char* override) | ||
152 | { | ||
153 | u32* reg; | ||
154 | u32 base; /* saved cyclone base address */ | ||
155 | u32 pageaddr; /* page that contains cyclone_timer register */ | ||
156 | u32 offset; /* offset from pageaddr to cyclone_timer register */ | ||
157 | int i; | ||
158 | |||
159 | /* check clock override */ | ||
160 | if (override[0] && strncmp(override,"cyclone",7)) | ||
161 | return -ENODEV; | ||
162 | |||
163 | /*make sure we're on a summit box*/ | ||
164 | if(!use_cyclone) return -ENODEV; | ||
165 | |||
166 | printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); | ||
167 | |||
168 | /* find base address */ | ||
169 | pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK; | ||
170 | offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK); | ||
171 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
172 | reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
173 | if(!reg){ | ||
174 | printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n"); | ||
175 | return -ENODEV; | ||
176 | } | ||
177 | base = *reg; | ||
178 | if(!base){ | ||
179 | printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n"); | ||
180 | return -ENODEV; | ||
181 | } | ||
182 | |||
183 | /* setup PMCC */ | ||
184 | pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK; | ||
185 | offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK); | ||
186 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
187 | reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
188 | if(!reg){ | ||
189 | printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n"); | ||
190 | return -ENODEV; | ||
191 | } | ||
192 | reg[0] = 0x00000001; | ||
193 | |||
194 | /* setup MPCS */ | ||
195 | pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK; | ||
196 | offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK); | ||
197 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
198 | reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
199 | if(!reg){ | ||
200 | printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n"); | ||
201 | return -ENODEV; | ||
202 | } | ||
203 | reg[0] = 0x00000001; | ||
204 | |||
205 | /* map in cyclone_timer */ | ||
206 | pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK; | ||
207 | offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK); | ||
208 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
209 | cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
210 | if(!cyclone_timer){ | ||
211 | printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n"); | ||
212 | return -ENODEV; | ||
213 | } | ||
214 | |||
215 | /*quick test to make sure its ticking*/ | ||
216 | for(i=0; i<3; i++){ | ||
217 | u32 old = cyclone_timer[0]; | ||
218 | int stall = 100; | ||
219 | while(stall--) barrier(); | ||
220 | if(cyclone_timer[0] == old){ | ||
221 | printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n"); | ||
222 | cyclone_timer = 0; | ||
223 | return -ENODEV; | ||
224 | } | ||
225 | } | ||
226 | |||
227 | init_cpu_khz(); | ||
228 | |||
229 | /* Everything looks good! */ | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | |||
234 | static void delay_cyclone(unsigned long loops) | ||
235 | { | ||
236 | unsigned long bclock, now; | ||
237 | if(!cyclone_timer) | ||
238 | return; | ||
239 | bclock = cyclone_timer[0]; | ||
240 | do { | ||
241 | rep_nop(); | ||
242 | now = cyclone_timer[0]; | ||
243 | } while ((now-bclock) < loops); | ||
244 | } | ||
245 | /************************************************************/ | ||
246 | |||
247 | /* cyclone timer_opts struct */ | ||
248 | static struct timer_opts timer_cyclone = { | ||
249 | .name = "cyclone", | ||
250 | .mark_offset = mark_offset_cyclone, | ||
251 | .get_offset = get_offset_cyclone, | ||
252 | .monotonic_clock = monotonic_clock_cyclone, | ||
253 | .delay = delay_cyclone, | ||
254 | }; | ||
255 | |||
256 | struct init_timer_opts __initdata timer_cyclone_init = { | ||
257 | .init = init_cyclone, | ||
258 | .opts = &timer_cyclone, | ||
259 | }; | ||
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c deleted file mode 100644 index 17a6fe7166e7..000000000000 --- a/arch/i386/kernel/timers/timer_hpet.c +++ /dev/null | |||
@@ -1,217 +0,0 @@ | |||
1 | /* | ||
2 | * This code largely moved from arch/i386/kernel/time.c. | ||
3 | * See comments there for proper credits. | ||
4 | */ | ||
5 | |||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/timex.h> | ||
9 | #include <linux/errno.h> | ||
10 | #include <linux/string.h> | ||
11 | #include <linux/jiffies.h> | ||
12 | |||
13 | #include <asm/timer.h> | ||
14 | #include <asm/io.h> | ||
15 | #include <asm/processor.h> | ||
16 | |||
17 | #include "io_ports.h" | ||
18 | #include "mach_timer.h" | ||
19 | #include <asm/hpet.h> | ||
20 | |||
21 | static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */ | ||
22 | static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */ | ||
23 | static unsigned long hpet_last; /* hpet counter value at last tick*/ | ||
24 | static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ | ||
25 | static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ | ||
26 | static unsigned long long monotonic_base; | ||
27 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
28 | |||
29 | /* convert from cycles(64bits) => nanoseconds (64bits) | ||
30 | * basic equation: | ||
31 | * ns = cycles / (freq / ns_per_sec) | ||
32 | * ns = cycles * (ns_per_sec / freq) | ||
33 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
34 | * ns = cycles * (10^6 / cpu_khz) | ||
35 | * | ||
36 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
37 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
38 | * ns = cycles * cyc2ns_scale / SC | ||
39 | * | ||
40 | * And since SC is a constant power of two, we can convert the div | ||
41 | * into a shift. | ||
42 | * | ||
43 | * We can use khz divisor instead of mhz to keep a better percision, since | ||
44 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
45 | * (mathieu.desnoyers@polymtl.ca) | ||
46 | * | ||
47 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
48 | */ | ||
49 | static unsigned long cyc2ns_scale __read_mostly; | ||
50 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
51 | |||
52 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | ||
53 | { | ||
54 | cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; | ||
55 | } | ||
56 | |||
57 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
58 | { | ||
59 | return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; | ||
60 | } | ||
61 | |||
62 | static unsigned long long monotonic_clock_hpet(void) | ||
63 | { | ||
64 | unsigned long long last_offset, this_offset, base; | ||
65 | unsigned seq; | ||
66 | |||
67 | /* atomically read monotonic base & last_offset */ | ||
68 | do { | ||
69 | seq = read_seqbegin(&monotonic_lock); | ||
70 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
71 | base = monotonic_base; | ||
72 | } while (read_seqretry(&monotonic_lock, seq)); | ||
73 | |||
74 | /* Read the Time Stamp Counter */ | ||
75 | rdtscll(this_offset); | ||
76 | |||
77 | /* return the value in ns */ | ||
78 | return base + cycles_2_ns(this_offset - last_offset); | ||
79 | } | ||
80 | |||
81 | static unsigned long get_offset_hpet(void) | ||
82 | { | ||
83 | register unsigned long eax, edx; | ||
84 | |||
85 | eax = hpet_readl(HPET_COUNTER); | ||
86 | eax -= hpet_last; /* hpet delta */ | ||
87 | eax = min(hpet_tick, eax); | ||
88 | /* | ||
89 | * Time offset = (hpet delta) * ( usecs per HPET clock ) | ||
90 | * = (hpet delta) * ( usecs per tick / HPET clocks per tick) | ||
91 | * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) | ||
92 | * | ||
93 | * Where, | ||
94 | * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick | ||
95 | * | ||
96 | * Using a mull instead of a divl saves some cycles in critical path. | ||
97 | */ | ||
98 | ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax); | ||
99 | |||
100 | /* our adjusted time offset in microseconds */ | ||
101 | return edx; | ||
102 | } | ||
103 | |||
104 | static void mark_offset_hpet(void) | ||
105 | { | ||
106 | unsigned long long this_offset, last_offset; | ||
107 | unsigned long offset; | ||
108 | |||
109 | write_seqlock(&monotonic_lock); | ||
110 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
111 | rdtsc(last_tsc_low, last_tsc_high); | ||
112 | |||
113 | if (hpet_use_timer) | ||
114 | offset = hpet_readl(HPET_T0_CMP) - hpet_tick; | ||
115 | else | ||
116 | offset = hpet_readl(HPET_COUNTER); | ||
117 | if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) { | ||
118 | int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1; | ||
119 | jiffies_64 += lost_ticks; | ||
120 | } | ||
121 | hpet_last = offset; | ||
122 | |||
123 | /* update the monotonic base value */ | ||
124 | this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
125 | monotonic_base += cycles_2_ns(this_offset - last_offset); | ||
126 | write_sequnlock(&monotonic_lock); | ||
127 | } | ||
128 | |||
129 | static void delay_hpet(unsigned long loops) | ||
130 | { | ||
131 | unsigned long hpet_start, hpet_end; | ||
132 | unsigned long eax; | ||
133 | |||
134 | /* loops is the number of cpu cycles. Convert it to hpet clocks */ | ||
135 | ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops); | ||
136 | |||
137 | hpet_start = hpet_readl(HPET_COUNTER); | ||
138 | do { | ||
139 | rep_nop(); | ||
140 | hpet_end = hpet_readl(HPET_COUNTER); | ||
141 | } while ((hpet_end - hpet_start) < (loops)); | ||
142 | } | ||
143 | |||
144 | static struct timer_opts timer_hpet; | ||
145 | |||
146 | static int __init init_hpet(char* override) | ||
147 | { | ||
148 | unsigned long result, remain; | ||
149 | |||
150 | /* check clock override */ | ||
151 | if (override[0] && strncmp(override,"hpet",4)) | ||
152 | return -ENODEV; | ||
153 | |||
154 | if (!is_hpet_enabled()) | ||
155 | return -ENODEV; | ||
156 | |||
157 | printk("Using HPET for gettimeofday\n"); | ||
158 | if (cpu_has_tsc) { | ||
159 | unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient); | ||
160 | if (tsc_quotient) { | ||
161 | /* report CPU clock rate in Hz. | ||
162 | * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = | ||
163 | * clock/second. Our precision is about 100 ppm. | ||
164 | */ | ||
165 | { unsigned long eax=0, edx=1000; | ||
166 | ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, | ||
167 | eax, edx); | ||
168 | printk("Detected %u.%03u MHz processor.\n", | ||
169 | cpu_khz / 1000, cpu_khz % 1000); | ||
170 | } | ||
171 | set_cyc2ns_scale(cpu_khz); | ||
172 | } | ||
173 | /* set this only when cpu_has_tsc */ | ||
174 | timer_hpet.read_timer = read_timer_tsc; | ||
175 | } | ||
176 | |||
177 | /* | ||
178 | * Math to calculate hpet to usec multiplier | ||
179 | * Look for the comments at get_offset_hpet() | ||
180 | */ | ||
181 | ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC); | ||
182 | if (remain > (hpet_tick >> 1)) | ||
183 | result++; /* rounding the result */ | ||
184 | hpet_usec_quotient = result; | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | static int hpet_resume(void) | ||
190 | { | ||
191 | write_seqlock(&monotonic_lock); | ||
192 | /* Assume this is the last mark offset time */ | ||
193 | rdtsc(last_tsc_low, last_tsc_high); | ||
194 | |||
195 | if (hpet_use_timer) | ||
196 | hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick; | ||
197 | else | ||
198 | hpet_last = hpet_readl(HPET_COUNTER); | ||
199 | write_sequnlock(&monotonic_lock); | ||
200 | return 0; | ||
201 | } | ||
202 | /************************************************************/ | ||
203 | |||
204 | /* tsc timer_opts struct */ | ||
205 | static struct timer_opts timer_hpet __read_mostly = { | ||
206 | .name = "hpet", | ||
207 | .mark_offset = mark_offset_hpet, | ||
208 | .get_offset = get_offset_hpet, | ||
209 | .monotonic_clock = monotonic_clock_hpet, | ||
210 | .delay = delay_hpet, | ||
211 | .resume = hpet_resume, | ||
212 | }; | ||
213 | |||
214 | struct init_timer_opts __initdata timer_hpet_init = { | ||
215 | .init = init_hpet, | ||
216 | .opts = &timer_hpet, | ||
217 | }; | ||
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c deleted file mode 100644 index 4ea2f414dbbd..000000000000 --- a/arch/i386/kernel/timers/timer_none.c +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <asm/timer.h> | ||
3 | |||
4 | static void mark_offset_none(void) | ||
5 | { | ||
6 | /* nothing needed */ | ||
7 | } | ||
8 | |||
9 | static unsigned long get_offset_none(void) | ||
10 | { | ||
11 | return 0; | ||
12 | } | ||
13 | |||
14 | static unsigned long long monotonic_clock_none(void) | ||
15 | { | ||
16 | return 0; | ||
17 | } | ||
18 | |||
19 | static void delay_none(unsigned long loops) | ||
20 | { | ||
21 | int d0; | ||
22 | __asm__ __volatile__( | ||
23 | "\tjmp 1f\n" | ||
24 | ".align 16\n" | ||
25 | "1:\tjmp 2f\n" | ||
26 | ".align 16\n" | ||
27 | "2:\tdecl %0\n\tjns 2b" | ||
28 | :"=&a" (d0) | ||
29 | :"0" (loops)); | ||
30 | } | ||
31 | |||
32 | /* none timer_opts struct */ | ||
33 | struct timer_opts timer_none = { | ||
34 | .name = "none", | ||
35 | .mark_offset = mark_offset_none, | ||
36 | .get_offset = get_offset_none, | ||
37 | .monotonic_clock = monotonic_clock_none, | ||
38 | .delay = delay_none, | ||
39 | }; | ||
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c deleted file mode 100644 index b9b6bd56b9ba..000000000000 --- a/arch/i386/kernel/timers/timer_pit.c +++ /dev/null | |||
@@ -1,177 +0,0 @@ | |||
1 | /* | ||
2 | * This code largely moved from arch/i386/kernel/time.c. | ||
3 | * See comments there for proper credits. | ||
4 | */ | ||
5 | |||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/module.h> | ||
8 | #include <linux/device.h> | ||
9 | #include <linux/sysdev.h> | ||
10 | #include <linux/timex.h> | ||
11 | #include <asm/delay.h> | ||
12 | #include <asm/mpspec.h> | ||
13 | #include <asm/timer.h> | ||
14 | #include <asm/smp.h> | ||
15 | #include <asm/io.h> | ||
16 | #include <asm/arch_hooks.h> | ||
17 | #include <asm/i8253.h> | ||
18 | |||
19 | #include "do_timer.h" | ||
20 | #include "io_ports.h" | ||
21 | |||
22 | static int count_p; /* counter in get_offset_pit() */ | ||
23 | |||
24 | static int __init init_pit(char* override) | ||
25 | { | ||
26 | /* check clock override */ | ||
27 | if (override[0] && strncmp(override,"pit",3)) | ||
28 | printk(KERN_ERR "Warning: clock= override failed. Defaulting " | ||
29 | "to PIT\n"); | ||
30 | init_cpu_khz(); | ||
31 | count_p = LATCH; | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | static void mark_offset_pit(void) | ||
36 | { | ||
37 | /* nothing needed */ | ||
38 | } | ||
39 | |||
40 | static unsigned long long monotonic_clock_pit(void) | ||
41 | { | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static void delay_pit(unsigned long loops) | ||
46 | { | ||
47 | int d0; | ||
48 | __asm__ __volatile__( | ||
49 | "\tjmp 1f\n" | ||
50 | ".align 16\n" | ||
51 | "1:\tjmp 2f\n" | ||
52 | ".align 16\n" | ||
53 | "2:\tdecl %0\n\tjns 2b" | ||
54 | :"=&a" (d0) | ||
55 | :"0" (loops)); | ||
56 | } | ||
57 | |||
58 | |||
59 | /* This function must be called with xtime_lock held. | ||
60 | * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs | ||
61 | * | ||
62 | * However, the pc-audio speaker driver changes the divisor so that | ||
63 | * it gets interrupted rather more often - it loads 64 into the | ||
64 | * counter rather than 11932! This has an adverse impact on | ||
65 | * do_gettimeoffset() -- it stops working! What is also not | ||
66 | * good is that the interval that our timer function gets called | ||
67 | * is no longer 10.0002 ms, but 9.9767 ms. To get around this | ||
68 | * would require using a different timing source. Maybe someone | ||
69 | * could use the RTC - I know that this can interrupt at frequencies | ||
70 | * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix | ||
71 | * it so that at startup, the timer code in sched.c would select | ||
72 | * using either the RTC or the 8253 timer. The decision would be | ||
73 | * based on whether there was any other device around that needed | ||
74 | * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz, | ||
75 | * and then do some jiggery to have a version of do_timer that | ||
76 | * advanced the clock by 1/1024 s. Every time that reached over 1/100 | ||
77 | * of a second, then do all the old code. If the time was kept correct | ||
78 | * then do_gettimeoffset could just return 0 - there is no low order | ||
79 | * divider that can be accessed. | ||
80 | * | ||
81 | * Ideally, you would be able to use the RTC for the speaker driver, | ||
82 | * but it appears that the speaker driver really needs interrupt more | ||
83 | * often than every 120 us or so. | ||
84 | * | ||
85 | * Anyway, this needs more thought.... pjsg (1993-08-28) | ||
86 | * | ||
87 | * If you are really that interested, you should be reading | ||
88 | * comp.protocols.time.ntp! | ||
89 | */ | ||
90 | |||
91 | static unsigned long get_offset_pit(void) | ||
92 | { | ||
93 | int count; | ||
94 | unsigned long flags; | ||
95 | static unsigned long jiffies_p = 0; | ||
96 | |||
97 | /* | ||
98 | * cache volatile jiffies temporarily; we have xtime_lock. | ||
99 | */ | ||
100 | unsigned long jiffies_t; | ||
101 | |||
102 | spin_lock_irqsave(&i8253_lock, flags); | ||
103 | /* timer count may underflow right here */ | ||
104 | outb_p(0x00, PIT_MODE); /* latch the count ASAP */ | ||
105 | |||
106 | count = inb_p(PIT_CH0); /* read the latched count */ | ||
107 | |||
108 | /* | ||
109 | * We do this guaranteed double memory access instead of a _p | ||
110 | * postfix in the previous port access. Wheee, hackady hack | ||
111 | */ | ||
112 | jiffies_t = jiffies; | ||
113 | |||
114 | count |= inb_p(PIT_CH0) << 8; | ||
115 | |||
116 | /* VIA686a test code... reset the latch if count > max + 1 */ | ||
117 | if (count > LATCH) { | ||
118 | outb_p(0x34, PIT_MODE); | ||
119 | outb_p(LATCH & 0xff, PIT_CH0); | ||
120 | outb(LATCH >> 8, PIT_CH0); | ||
121 | count = LATCH - 1; | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * avoiding timer inconsistencies (they are rare, but they happen)... | ||
126 | * there are two kinds of problems that must be avoided here: | ||
127 | * 1. the timer counter underflows | ||
128 | * 2. hardware problem with the timer, not giving us continuous time, | ||
129 | * the counter does small "jumps" upwards on some Pentium systems, | ||
130 | * (see c't 95/10 page 335 for Neptun bug.) | ||
131 | */ | ||
132 | |||
133 | if( jiffies_t == jiffies_p ) { | ||
134 | if( count > count_p ) { | ||
135 | /* the nutcase */ | ||
136 | count = do_timer_overflow(count); | ||
137 | } | ||
138 | } else | ||
139 | jiffies_p = jiffies_t; | ||
140 | |||
141 | count_p = count; | ||
142 | |||
143 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
144 | |||
145 | count = ((LATCH-1) - count) * TICK_SIZE; | ||
146 | count = (count + LATCH/2) / LATCH; | ||
147 | |||
148 | return count; | ||
149 | } | ||
150 | |||
151 | |||
152 | /* tsc timer_opts struct */ | ||
153 | struct timer_opts timer_pit = { | ||
154 | .name = "pit", | ||
155 | .mark_offset = mark_offset_pit, | ||
156 | .get_offset = get_offset_pit, | ||
157 | .monotonic_clock = monotonic_clock_pit, | ||
158 | .delay = delay_pit, | ||
159 | }; | ||
160 | |||
161 | struct init_timer_opts __initdata timer_pit_init = { | ||
162 | .init = init_pit, | ||
163 | .opts = &timer_pit, | ||
164 | }; | ||
165 | |||
166 | void setup_pit_timer(void) | ||
167 | { | ||
168 | unsigned long flags; | ||
169 | |||
170 | spin_lock_irqsave(&i8253_lock, flags); | ||
171 | outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | ||
172 | udelay(10); | ||
173 | outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ | ||
174 | udelay(10); | ||
175 | outb(LATCH >> 8 , PIT_CH0); /* MSB */ | ||
176 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
177 | } | ||
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c deleted file mode 100644 index 144e94a04933..000000000000 --- a/arch/i386/kernel/timers/timer_pm.c +++ /dev/null | |||
@@ -1,342 +0,0 @@ | |||
1 | /* | ||
2 | * (C) Dominik Brodowski <linux@brodo.de> 2003 | ||
3 | * | ||
4 | * Driver to use the Power Management Timer (PMTMR) available in some | ||
5 | * southbridges as primary timing source for the Linux kernel. | ||
6 | * | ||
7 | * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, | ||
8 | * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. | ||
9 | * | ||
10 | * This file is licensed under the GPL v2. | ||
11 | */ | ||
12 | |||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/device.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/pci.h> | ||
19 | #include <asm/types.h> | ||
20 | #include <asm/timer.h> | ||
21 | #include <asm/smp.h> | ||
22 | #include <asm/io.h> | ||
23 | #include <asm/arch_hooks.h> | ||
24 | |||
25 | #include <linux/timex.h> | ||
26 | #include "mach_timer.h" | ||
27 | |||
28 | /* Number of PMTMR ticks expected during calibration run */ | ||
29 | #define PMTMR_TICKS_PER_SEC 3579545 | ||
30 | #define PMTMR_EXPECTED_RATE \ | ||
31 | ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10)) | ||
32 | |||
33 | |||
34 | /* The I/O port the PMTMR resides at. | ||
35 | * The location is detected during setup_arch(), | ||
36 | * in arch/i386/acpi/boot.c */ | ||
37 | u32 pmtmr_ioport = 0; | ||
38 | |||
39 | |||
40 | /* value of the Power timer at last timer interrupt */ | ||
41 | static u32 offset_tick; | ||
42 | static u32 offset_delay; | ||
43 | |||
44 | static unsigned long long monotonic_base; | ||
45 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
46 | |||
47 | #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ | ||
48 | |||
49 | static int pmtmr_need_workaround __read_mostly = 1; | ||
50 | |||
51 | /*helper function to safely read acpi pm timesource*/ | ||
52 | static inline u32 read_pmtmr(void) | ||
53 | { | ||
54 | if (pmtmr_need_workaround) { | ||
55 | u32 v1, v2, v3; | ||
56 | |||
57 | /* It has been reported that because of various broken | ||
58 | * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time | ||
59 | * source is not latched, so you must read it multiple | ||
60 | * times to insure a safe value is read. | ||
61 | */ | ||
62 | do { | ||
63 | v1 = inl(pmtmr_ioport); | ||
64 | v2 = inl(pmtmr_ioport); | ||
65 | v3 = inl(pmtmr_ioport); | ||
66 | } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) | ||
67 | || (v3 > v1 && v3 < v2)); | ||
68 | |||
69 | /* mask the output to 24 bits */ | ||
70 | return v2 & ACPI_PM_MASK; | ||
71 | } | ||
72 | |||
73 | return inl(pmtmr_ioport) & ACPI_PM_MASK; | ||
74 | } | ||
75 | |||
76 | |||
77 | /* | ||
78 | * Some boards have the PMTMR running way too fast. We check | ||
79 | * the PMTMR rate against PIT channel 2 to catch these cases. | ||
80 | */ | ||
81 | static int verify_pmtmr_rate(void) | ||
82 | { | ||
83 | u32 value1, value2; | ||
84 | unsigned long count, delta; | ||
85 | |||
86 | mach_prepare_counter(); | ||
87 | value1 = read_pmtmr(); | ||
88 | mach_countup(&count); | ||
89 | value2 = read_pmtmr(); | ||
90 | delta = (value2 - value1) & ACPI_PM_MASK; | ||
91 | |||
92 | /* Check that the PMTMR delta is within 5% of what we expect */ | ||
93 | if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 || | ||
94 | delta > (PMTMR_EXPECTED_RATE * 21) / 20) { | ||
95 | printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE); | ||
96 | return -1; | ||
97 | } | ||
98 | |||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | |||
103 | static int init_pmtmr(char* override) | ||
104 | { | ||
105 | u32 value1, value2; | ||
106 | unsigned int i; | ||
107 | |||
108 | if (override[0] && strncmp(override,"pmtmr",5)) | ||
109 | return -ENODEV; | ||
110 | |||
111 | if (!pmtmr_ioport) | ||
112 | return -ENODEV; | ||
113 | |||
114 | /* we use the TSC for delay_pmtmr, so make sure it exists */ | ||
115 | if (!cpu_has_tsc) | ||
116 | return -ENODEV; | ||
117 | |||
118 | /* "verify" this timing source */ | ||
119 | value1 = read_pmtmr(); | ||
120 | for (i = 0; i < 10000; i++) { | ||
121 | value2 = read_pmtmr(); | ||
122 | if (value2 == value1) | ||
123 | continue; | ||
124 | if (value2 > value1) | ||
125 | goto pm_good; | ||
126 | if ((value2 < value1) && ((value2) < 0xFFF)) | ||
127 | goto pm_good; | ||
128 | printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); | ||
129 | return -EINVAL; | ||
130 | } | ||
131 | printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); | ||
132 | return -ENODEV; | ||
133 | |||
134 | pm_good: | ||
135 | if (verify_pmtmr_rate() != 0) | ||
136 | return -ENODEV; | ||
137 | |||
138 | init_cpu_khz(); | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static inline u32 cyc2us(u32 cycles) | ||
143 | { | ||
144 | /* The Power Management Timer ticks at 3.579545 ticks per microsecond. | ||
145 | * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] | ||
146 | * | ||
147 | * Even with HZ = 100, delta is at maximum 35796 ticks, so it can | ||
148 | * easily be multiplied with 286 (=0x11E) without having to fear | ||
149 | * u32 overflows. | ||
150 | */ | ||
151 | cycles *= 286; | ||
152 | return (cycles >> 10); | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * this gets called during each timer interrupt | ||
157 | * - Called while holding the writer xtime_lock | ||
158 | */ | ||
159 | static void mark_offset_pmtmr(void) | ||
160 | { | ||
161 | u32 lost, delta, last_offset; | ||
162 | static int first_run = 1; | ||
163 | last_offset = offset_tick; | ||
164 | |||
165 | write_seqlock(&monotonic_lock); | ||
166 | |||
167 | offset_tick = read_pmtmr(); | ||
168 | |||
169 | /* calculate tick interval */ | ||
170 | delta = (offset_tick - last_offset) & ACPI_PM_MASK; | ||
171 | |||
172 | /* convert to usecs */ | ||
173 | delta = cyc2us(delta); | ||
174 | |||
175 | /* update the monotonic base value */ | ||
176 | monotonic_base += delta * NSEC_PER_USEC; | ||
177 | write_sequnlock(&monotonic_lock); | ||
178 | |||
179 | /* convert to ticks */ | ||
180 | delta += offset_delay; | ||
181 | lost = delta / (USEC_PER_SEC / HZ); | ||
182 | offset_delay = delta % (USEC_PER_SEC / HZ); | ||
183 | |||
184 | |||
185 | /* compensate for lost ticks */ | ||
186 | if (lost >= 2) | ||
187 | jiffies_64 += lost - 1; | ||
188 | |||
189 | /* don't calculate delay for first run, | ||
190 | or if we've got less then a tick */ | ||
191 | if (first_run || (lost < 1)) { | ||
192 | first_run = 0; | ||
193 | offset_delay = 0; | ||
194 | } | ||
195 | } | ||
196 | |||
197 | static int pmtmr_resume(void) | ||
198 | { | ||
199 | write_seqlock(&monotonic_lock); | ||
200 | /* Assume this is the last mark offset time */ | ||
201 | offset_tick = read_pmtmr(); | ||
202 | write_sequnlock(&monotonic_lock); | ||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | static unsigned long long monotonic_clock_pmtmr(void) | ||
207 | { | ||
208 | u32 last_offset, this_offset; | ||
209 | unsigned long long base, ret; | ||
210 | unsigned seq; | ||
211 | |||
212 | |||
213 | /* atomically read monotonic base & last_offset */ | ||
214 | do { | ||
215 | seq = read_seqbegin(&monotonic_lock); | ||
216 | last_offset = offset_tick; | ||
217 | base = monotonic_base; | ||
218 | } while (read_seqretry(&monotonic_lock, seq)); | ||
219 | |||
220 | /* Read the pmtmr */ | ||
221 | this_offset = read_pmtmr(); | ||
222 | |||
223 | /* convert to nanoseconds */ | ||
224 | ret = (this_offset - last_offset) & ACPI_PM_MASK; | ||
225 | ret = base + (cyc2us(ret) * NSEC_PER_USEC); | ||
226 | return ret; | ||
227 | } | ||
228 | |||
229 | static void delay_pmtmr(unsigned long loops) | ||
230 | { | ||
231 | unsigned long bclock, now; | ||
232 | |||
233 | rdtscl(bclock); | ||
234 | do | ||
235 | { | ||
236 | rep_nop(); | ||
237 | rdtscl(now); | ||
238 | } while ((now-bclock) < loops); | ||
239 | } | ||
240 | |||
241 | |||
242 | /* | ||
243 | * get the offset (in microseconds) from the last call to mark_offset() | ||
244 | * - Called holding a reader xtime_lock | ||
245 | */ | ||
246 | static unsigned long get_offset_pmtmr(void) | ||
247 | { | ||
248 | u32 now, offset, delta = 0; | ||
249 | |||
250 | offset = offset_tick; | ||
251 | now = read_pmtmr(); | ||
252 | delta = (now - offset)&ACPI_PM_MASK; | ||
253 | |||
254 | return (unsigned long) offset_delay + cyc2us(delta); | ||
255 | } | ||
256 | |||
257 | |||
258 | /* acpi timer_opts struct */ | ||
259 | static struct timer_opts timer_pmtmr = { | ||
260 | .name = "pmtmr", | ||
261 | .mark_offset = mark_offset_pmtmr, | ||
262 | .get_offset = get_offset_pmtmr, | ||
263 | .monotonic_clock = monotonic_clock_pmtmr, | ||
264 | .delay = delay_pmtmr, | ||
265 | .read_timer = read_timer_tsc, | ||
266 | .resume = pmtmr_resume, | ||
267 | }; | ||
268 | |||
269 | struct init_timer_opts __initdata timer_pmtmr_init = { | ||
270 | .init = init_pmtmr, | ||
271 | .opts = &timer_pmtmr, | ||
272 | }; | ||
273 | |||
274 | #ifdef CONFIG_PCI | ||
275 | /* | ||
276 | * PIIX4 Errata: | ||
277 | * | ||
278 | * The power management timer may return improper results when read. | ||
279 | * Although the timer value settles properly after incrementing, | ||
280 | * while incrementing there is a 3 ns window every 69.8 ns where the | ||
281 | * timer value is indeterminate (a 4.2% chance that the data will be | ||
282 | * incorrect when read). As a result, the ACPI free running count up | ||
283 | * timer specification is violated due to erroneous reads. | ||
284 | */ | ||
285 | static int __init pmtmr_bug_check(void) | ||
286 | { | ||
287 | static struct pci_device_id gray_list[] __initdata = { | ||
288 | /* these chipsets may have bug. */ | ||
289 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, | ||
290 | PCI_DEVICE_ID_INTEL_82801DB_0) }, | ||
291 | { }, | ||
292 | }; | ||
293 | struct pci_dev *dev; | ||
294 | int pmtmr_has_bug = 0; | ||
295 | u8 rev; | ||
296 | |||
297 | if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround) | ||
298 | return 0; | ||
299 | |||
300 | dev = pci_get_device(PCI_VENDOR_ID_INTEL, | ||
301 | PCI_DEVICE_ID_INTEL_82371AB_3, NULL); | ||
302 | if (dev) { | ||
303 | pci_read_config_byte(dev, PCI_REVISION_ID, &rev); | ||
304 | /* the bug has been fixed in PIIX4M */ | ||
305 | if (rev < 3) { | ||
306 | printk(KERN_WARNING "* Found PM-Timer Bug on this " | ||
307 | "chipset. Due to workarounds for a bug,\n" | ||
308 | "* this time source is slow. Consider trying " | ||
309 | "other time sources (clock=)\n"); | ||
310 | pmtmr_has_bug = 1; | ||
311 | } | ||
312 | pci_dev_put(dev); | ||
313 | } | ||
314 | |||
315 | if (pci_dev_present(gray_list)) { | ||
316 | printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due" | ||
317 | " to workarounds for a bug,\n" | ||
318 | "* this time source is slow. If you are sure your timer" | ||
319 | " does not have\n" | ||
320 | "* this bug, please use \"pmtmr_good\" to disable the " | ||
321 | "workaround\n"); | ||
322 | pmtmr_has_bug = 1; | ||
323 | } | ||
324 | |||
325 | if (!pmtmr_has_bug) | ||
326 | pmtmr_need_workaround = 0; | ||
327 | |||
328 | return 0; | ||
329 | } | ||
330 | device_initcall(pmtmr_bug_check); | ||
331 | #endif | ||
332 | |||
333 | static int __init pmtr_good_setup(char *__str) | ||
334 | { | ||
335 | pmtmr_need_workaround = 0; | ||
336 | return 1; | ||
337 | } | ||
338 | __setup("pmtmr_good", pmtr_good_setup); | ||
339 | |||
340 | MODULE_LICENSE("GPL"); | ||
341 | MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); | ||
342 | MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); | ||
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c deleted file mode 100644 index f1187ddb0d0f..000000000000 --- a/arch/i386/kernel/timers/timer_tsc.c +++ /dev/null | |||
@@ -1,617 +0,0 @@ | |||
1 | /* | ||
2 | * This code largely moved from arch/i386/kernel/time.c. | ||
3 | * See comments there for proper credits. | ||
4 | * | ||
5 | * 2004-06-25 Jesper Juhl | ||
6 | * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4 | ||
7 | * failing to inline. | ||
8 | */ | ||
9 | |||
10 | #include <linux/spinlock.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/timex.h> | ||
13 | #include <linux/errno.h> | ||
14 | #include <linux/cpufreq.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/jiffies.h> | ||
17 | |||
18 | #include <asm/timer.h> | ||
19 | #include <asm/io.h> | ||
20 | /* processor.h for distable_tsc flag */ | ||
21 | #include <asm/processor.h> | ||
22 | |||
23 | #include "io_ports.h" | ||
24 | #include "mach_timer.h" | ||
25 | |||
26 | #include <asm/hpet.h> | ||
27 | #include <asm/i8253.h> | ||
28 | |||
29 | #ifdef CONFIG_HPET_TIMER | ||
30 | static unsigned long hpet_usec_quotient; | ||
31 | static unsigned long hpet_last; | ||
32 | static struct timer_opts timer_tsc; | ||
33 | #endif | ||
34 | |||
35 | static inline void cpufreq_delayed_get(void); | ||
36 | |||
37 | int tsc_disable __devinitdata = 0; | ||
38 | |||
39 | static int use_tsc; | ||
40 | /* Number of usecs that the last interrupt was delayed */ | ||
41 | static int delay_at_last_interrupt; | ||
42 | |||
43 | static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ | ||
44 | static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ | ||
45 | static unsigned long long monotonic_base; | ||
46 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
47 | |||
48 | /* Avoid compensating for lost ticks before TSCs are synched */ | ||
49 | static int detect_lost_ticks; | ||
50 | static int __init start_lost_tick_compensation(void) | ||
51 | { | ||
52 | detect_lost_ticks = 1; | ||
53 | return 0; | ||
54 | } | ||
55 | late_initcall(start_lost_tick_compensation); | ||
56 | |||
57 | /* convert from cycles(64bits) => nanoseconds (64bits) | ||
58 | * basic equation: | ||
59 | * ns = cycles / (freq / ns_per_sec) | ||
60 | * ns = cycles * (ns_per_sec / freq) | ||
61 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
62 | * ns = cycles * (10^6 / cpu_khz) | ||
63 | * | ||
64 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
65 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
66 | * ns = cycles * cyc2ns_scale / SC | ||
67 | * | ||
68 | * And since SC is a constant power of two, we can convert the div | ||
69 | * into a shift. | ||
70 | * | ||
71 | * We can use khz divisor instead of mhz to keep a better percision, since | ||
72 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
73 | * (mathieu.desnoyers@polymtl.ca) | ||
74 | * | ||
75 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
76 | */ | ||
77 | static unsigned long cyc2ns_scale __read_mostly; | ||
78 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
79 | |||
80 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | ||
81 | { | ||
82 | cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; | ||
83 | } | ||
84 | |||
85 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
86 | { | ||
87 | return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; | ||
88 | } | ||
89 | |||
90 | static int count2; /* counter for mark_offset_tsc() */ | ||
91 | |||
92 | /* Cached *multiplier* to convert TSC counts to microseconds. | ||
93 | * (see the equation below). | ||
94 | * Equal to 2^32 * (1 / (clocks per usec) ). | ||
95 | * Initialized in time_init. | ||
96 | */ | ||
97 | static unsigned long fast_gettimeoffset_quotient; | ||
98 | |||
99 | static unsigned long get_offset_tsc(void) | ||
100 | { | ||
101 | register unsigned long eax, edx; | ||
102 | |||
103 | /* Read the Time Stamp Counter */ | ||
104 | |||
105 | rdtsc(eax,edx); | ||
106 | |||
107 | /* .. relative to previous jiffy (32 bits is enough) */ | ||
108 | eax -= last_tsc_low; /* tsc_low delta */ | ||
109 | |||
110 | /* | ||
111 | * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient | ||
112 | * = (tsc_low delta) * (usecs_per_clock) | ||
113 | * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) | ||
114 | * | ||
115 | * Using a mull instead of a divl saves up to 31 clock cycles | ||
116 | * in the critical path. | ||
117 | */ | ||
118 | |||
119 | __asm__("mull %2" | ||
120 | :"=a" (eax), "=d" (edx) | ||
121 | :"rm" (fast_gettimeoffset_quotient), | ||
122 | "0" (eax)); | ||
123 | |||
124 | /* our adjusted time offset in microseconds */ | ||
125 | return delay_at_last_interrupt + edx; | ||
126 | } | ||
127 | |||
128 | static unsigned long long monotonic_clock_tsc(void) | ||
129 | { | ||
130 | unsigned long long last_offset, this_offset, base; | ||
131 | unsigned seq; | ||
132 | |||
133 | /* atomically read monotonic base & last_offset */ | ||
134 | do { | ||
135 | seq = read_seqbegin(&monotonic_lock); | ||
136 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
137 | base = monotonic_base; | ||
138 | } while (read_seqretry(&monotonic_lock, seq)); | ||
139 | |||
140 | /* Read the Time Stamp Counter */ | ||
141 | rdtscll(this_offset); | ||
142 | |||
143 | /* return the value in ns */ | ||
144 | return base + cycles_2_ns(this_offset - last_offset); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * Scheduler clock - returns current time in nanosec units. | ||
149 | */ | ||
150 | unsigned long long sched_clock(void) | ||
151 | { | ||
152 | unsigned long long this_offset; | ||
153 | |||
154 | /* | ||
155 | * In the NUMA case we dont use the TSC as they are not | ||
156 | * synchronized across all CPUs. | ||
157 | */ | ||
158 | #ifndef CONFIG_NUMA | ||
159 | if (!use_tsc) | ||
160 | #endif | ||
161 | /* no locking but a rare wrong value is not a big deal */ | ||
162 | return jiffies_64 * (1000000000 / HZ); | ||
163 | |||
164 | /* Read the Time Stamp Counter */ | ||
165 | rdtscll(this_offset); | ||
166 | |||
167 | /* return the value in ns */ | ||
168 | return cycles_2_ns(this_offset); | ||
169 | } | ||
170 | |||
171 | static void delay_tsc(unsigned long loops) | ||
172 | { | ||
173 | unsigned long bclock, now; | ||
174 | |||
175 | rdtscl(bclock); | ||
176 | do | ||
177 | { | ||
178 | rep_nop(); | ||
179 | rdtscl(now); | ||
180 | } while ((now-bclock) < loops); | ||
181 | } | ||
182 | |||
183 | #ifdef CONFIG_HPET_TIMER | ||
184 | static void mark_offset_tsc_hpet(void) | ||
185 | { | ||
186 | unsigned long long this_offset, last_offset; | ||
187 | unsigned long offset, temp, hpet_current; | ||
188 | |||
189 | write_seqlock(&monotonic_lock); | ||
190 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
191 | /* | ||
192 | * It is important that these two operations happen almost at | ||
193 | * the same time. We do the RDTSC stuff first, since it's | ||
194 | * faster. To avoid any inconsistencies, we need interrupts | ||
195 | * disabled locally. | ||
196 | */ | ||
197 | /* | ||
198 | * Interrupts are just disabled locally since the timer irq | ||
199 | * has the SA_INTERRUPT flag set. -arca | ||
200 | */ | ||
201 | /* read Pentium cycle counter */ | ||
202 | |||
203 | hpet_current = hpet_readl(HPET_COUNTER); | ||
204 | rdtsc(last_tsc_low, last_tsc_high); | ||
205 | |||
206 | /* lost tick compensation */ | ||
207 | offset = hpet_readl(HPET_T0_CMP) - hpet_tick; | ||
208 | if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0)) | ||
209 | && detect_lost_ticks) { | ||
210 | int lost_ticks = (offset - hpet_last) / hpet_tick; | ||
211 | jiffies_64 += lost_ticks; | ||
212 | } | ||
213 | hpet_last = hpet_current; | ||
214 | |||
215 | /* update the monotonic base value */ | ||
216 | this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
217 | monotonic_base += cycles_2_ns(this_offset - last_offset); | ||
218 | write_sequnlock(&monotonic_lock); | ||
219 | |||
220 | /* calculate delay_at_last_interrupt */ | ||
221 | /* | ||
222 | * Time offset = (hpet delta) * ( usecs per HPET clock ) | ||
223 | * = (hpet delta) * ( usecs per tick / HPET clocks per tick) | ||
224 | * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) | ||
225 | * Where, | ||
226 | * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick | ||
227 | */ | ||
228 | delay_at_last_interrupt = hpet_current - offset; | ||
229 | ASM_MUL64_REG(temp, delay_at_last_interrupt, | ||
230 | hpet_usec_quotient, delay_at_last_interrupt); | ||
231 | } | ||
232 | #endif | ||
233 | |||
234 | |||
235 | #ifdef CONFIG_CPU_FREQ | ||
236 | #include <linux/workqueue.h> | ||
237 | |||
238 | static unsigned int cpufreq_delayed_issched = 0; | ||
239 | static unsigned int cpufreq_init = 0; | ||
240 | static struct work_struct cpufreq_delayed_get_work; | ||
241 | |||
242 | static void handle_cpufreq_delayed_get(void *v) | ||
243 | { | ||
244 | unsigned int cpu; | ||
245 | for_each_online_cpu(cpu) { | ||
246 | cpufreq_get(cpu); | ||
247 | } | ||
248 | cpufreq_delayed_issched = 0; | ||
249 | } | ||
250 | |||
251 | /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries | ||
252 | * to verify the CPU frequency the timing core thinks the CPU is running | ||
253 | * at is still correct. | ||
254 | */ | ||
255 | static inline void cpufreq_delayed_get(void) | ||
256 | { | ||
257 | if (cpufreq_init && !cpufreq_delayed_issched) { | ||
258 | cpufreq_delayed_issched = 1; | ||
259 | printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); | ||
260 | schedule_work(&cpufreq_delayed_get_work); | ||
261 | } | ||
262 | } | ||
263 | |||
264 | /* If the CPU frequency is scaled, TSC-based delays will need a different | ||
265 | * loops_per_jiffy value to function properly. | ||
266 | */ | ||
267 | |||
268 | static unsigned int ref_freq = 0; | ||
269 | static unsigned long loops_per_jiffy_ref = 0; | ||
270 | |||
271 | #ifndef CONFIG_SMP | ||
272 | static unsigned long fast_gettimeoffset_ref = 0; | ||
273 | static unsigned int cpu_khz_ref = 0; | ||
274 | #endif | ||
275 | |||
276 | static int | ||
277 | time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | ||
278 | void *data) | ||
279 | { | ||
280 | struct cpufreq_freqs *freq = data; | ||
281 | |||
282 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
283 | write_seqlock_irq(&xtime_lock); | ||
284 | if (!ref_freq) { | ||
285 | if (!freq->old){ | ||
286 | ref_freq = freq->new; | ||
287 | goto end; | ||
288 | } | ||
289 | ref_freq = freq->old; | ||
290 | loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; | ||
291 | #ifndef CONFIG_SMP | ||
292 | fast_gettimeoffset_ref = fast_gettimeoffset_quotient; | ||
293 | cpu_khz_ref = cpu_khz; | ||
294 | #endif | ||
295 | } | ||
296 | |||
297 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | ||
298 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | ||
299 | (val == CPUFREQ_RESUMECHANGE)) { | ||
300 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
301 | cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | ||
302 | #ifndef CONFIG_SMP | ||
303 | if (cpu_khz) | ||
304 | cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); | ||
305 | if (use_tsc) { | ||
306 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { | ||
307 | fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); | ||
308 | set_cyc2ns_scale(cpu_khz); | ||
309 | } | ||
310 | } | ||
311 | #endif | ||
312 | } | ||
313 | |||
314 | end: | ||
315 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
316 | write_sequnlock_irq(&xtime_lock); | ||
317 | |||
318 | return 0; | ||
319 | } | ||
320 | |||
321 | static struct notifier_block time_cpufreq_notifier_block = { | ||
322 | .notifier_call = time_cpufreq_notifier | ||
323 | }; | ||
324 | |||
325 | |||
326 | static int __init cpufreq_tsc(void) | ||
327 | { | ||
328 | int ret; | ||
329 | INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); | ||
330 | ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
331 | CPUFREQ_TRANSITION_NOTIFIER); | ||
332 | if (!ret) | ||
333 | cpufreq_init = 1; | ||
334 | return ret; | ||
335 | } | ||
336 | core_initcall(cpufreq_tsc); | ||
337 | |||
338 | #else /* CONFIG_CPU_FREQ */ | ||
339 | static inline void cpufreq_delayed_get(void) { return; } | ||
340 | #endif | ||
341 | |||
342 | int recalibrate_cpu_khz(void) | ||
343 | { | ||
344 | #ifndef CONFIG_SMP | ||
345 | unsigned int cpu_khz_old = cpu_khz; | ||
346 | |||
347 | if (cpu_has_tsc) { | ||
348 | local_irq_disable(); | ||
349 | init_cpu_khz(); | ||
350 | local_irq_enable(); | ||
351 | cpu_data[0].loops_per_jiffy = | ||
352 | cpufreq_scale(cpu_data[0].loops_per_jiffy, | ||
353 | cpu_khz_old, | ||
354 | cpu_khz); | ||
355 | return 0; | ||
356 | } else | ||
357 | return -ENODEV; | ||
358 | #else | ||
359 | return -ENODEV; | ||
360 | #endif | ||
361 | } | ||
362 | EXPORT_SYMBOL(recalibrate_cpu_khz); | ||
363 | |||
364 | static void mark_offset_tsc(void) | ||
365 | { | ||
366 | unsigned long lost,delay; | ||
367 | unsigned long delta = last_tsc_low; | ||
368 | int count; | ||
369 | int countmp; | ||
370 | static int count1 = 0; | ||
371 | unsigned long long this_offset, last_offset; | ||
372 | static int lost_count = 0; | ||
373 | |||
374 | write_seqlock(&monotonic_lock); | ||
375 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
376 | /* | ||
377 | * It is important that these two operations happen almost at | ||
378 | * the same time. We do the RDTSC stuff first, since it's | ||
379 | * faster. To avoid any inconsistencies, we need interrupts | ||
380 | * disabled locally. | ||
381 | */ | ||
382 | |||
383 | /* | ||
384 | * Interrupts are just disabled locally since the timer irq | ||
385 | * has the SA_INTERRUPT flag set. -arca | ||
386 | */ | ||
387 | |||
388 | /* read Pentium cycle counter */ | ||
389 | |||
390 | rdtsc(last_tsc_low, last_tsc_high); | ||
391 | |||
392 | spin_lock(&i8253_lock); | ||
393 | outb_p(0x00, PIT_MODE); /* latch the count ASAP */ | ||
394 | |||
395 | count = inb_p(PIT_CH0); /* read the latched count */ | ||
396 | count |= inb(PIT_CH0) << 8; | ||
397 | |||
398 | /* | ||
399 | * VIA686a test code... reset the latch if count > max + 1 | ||
400 | * from timer_pit.c - cjb | ||
401 | */ | ||
402 | if (count > LATCH) { | ||
403 | outb_p(0x34, PIT_MODE); | ||
404 | outb_p(LATCH & 0xff, PIT_CH0); | ||
405 | outb(LATCH >> 8, PIT_CH0); | ||
406 | count = LATCH - 1; | ||
407 | } | ||
408 | |||
409 | spin_unlock(&i8253_lock); | ||
410 | |||
411 | if (pit_latch_buggy) { | ||
412 | /* get center value of last 3 time lutch */ | ||
413 | if ((count2 >= count && count >= count1) | ||
414 | || (count1 >= count && count >= count2)) { | ||
415 | count2 = count1; count1 = count; | ||
416 | } else if ((count1 >= count2 && count2 >= count) | ||
417 | || (count >= count2 && count2 >= count1)) { | ||
418 | countmp = count;count = count2; | ||
419 | count2 = count1;count1 = countmp; | ||
420 | } else { | ||
421 | count2 = count1; count1 = count; count = count1; | ||
422 | } | ||
423 | } | ||
424 | |||
425 | /* lost tick compensation */ | ||
426 | delta = last_tsc_low - delta; | ||
427 | { | ||
428 | register unsigned long eax, edx; | ||
429 | eax = delta; | ||
430 | __asm__("mull %2" | ||
431 | :"=a" (eax), "=d" (edx) | ||
432 | :"rm" (fast_gettimeoffset_quotient), | ||
433 | "0" (eax)); | ||
434 | delta = edx; | ||
435 | } | ||
436 | delta += delay_at_last_interrupt; | ||
437 | lost = delta/(1000000/HZ); | ||
438 | delay = delta%(1000000/HZ); | ||
439 | if (lost >= 2 && detect_lost_ticks) { | ||
440 | jiffies_64 += lost-1; | ||
441 | |||
442 | /* sanity check to ensure we're not always losing ticks */ | ||
443 | if (lost_count++ > 100) { | ||
444 | printk(KERN_WARNING "Losing too many ticks!\n"); | ||
445 | printk(KERN_WARNING "TSC cannot be used as a timesource. \n"); | ||
446 | printk(KERN_WARNING "Possible reasons for this are:\n"); | ||
447 | printk(KERN_WARNING " You're running with Speedstep,\n"); | ||
448 | printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n"); | ||
449 | printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n"); | ||
450 | printk(KERN_WARNING "Falling back to a sane timesource now.\n"); | ||
451 | |||
452 | clock_fallback(); | ||
453 | } | ||
454 | /* ... but give the TSC a fair chance */ | ||
455 | if (lost_count > 25) | ||
456 | cpufreq_delayed_get(); | ||
457 | } else | ||
458 | lost_count = 0; | ||
459 | /* update the monotonic base value */ | ||
460 | this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
461 | monotonic_base += cycles_2_ns(this_offset - last_offset); | ||
462 | write_sequnlock(&monotonic_lock); | ||
463 | |||
464 | /* calculate delay_at_last_interrupt */ | ||
465 | count = ((LATCH-1) - count) * TICK_SIZE; | ||
466 | delay_at_last_interrupt = (count + LATCH/2) / LATCH; | ||
467 | |||
468 | /* catch corner case where tick rollover occured | ||
469 | * between tsc and pit reads (as noted when | ||
470 | * usec delta is > 90% # of usecs/tick) | ||
471 | */ | ||
472 | if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) | ||
473 | jiffies_64++; | ||
474 | } | ||
475 | |||
476 | static int __init init_tsc(char* override) | ||
477 | { | ||
478 | |||
479 | /* check clock override */ | ||
480 | if (override[0] && strncmp(override,"tsc",3)) { | ||
481 | #ifdef CONFIG_HPET_TIMER | ||
482 | if (is_hpet_enabled()) { | ||
483 | printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n"); | ||
484 | } else | ||
485 | #endif | ||
486 | { | ||
487 | return -ENODEV; | ||
488 | } | ||
489 | } | ||
490 | |||
491 | /* | ||
492 | * If we have APM enabled or the CPU clock speed is variable | ||
493 | * (CPU stops clock on HLT or slows clock to save power) | ||
494 | * then the TSC timestamps may diverge by up to 1 jiffy from | ||
495 | * 'real time' but nothing will break. | ||
496 | * The most frequent case is that the CPU is "woken" from a halt | ||
497 | * state by the timer interrupt itself, so we get 0 error. In the | ||
498 | * rare cases where a driver would "wake" the CPU and request a | ||
499 | * timestamp, the maximum error is < 1 jiffy. But timestamps are | ||
500 | * still perfectly ordered. | ||
501 | * Note that the TSC counter will be reset if APM suspends | ||
502 | * to disk; this won't break the kernel, though, 'cuz we're | ||
503 | * smart. See arch/i386/kernel/apm.c. | ||
504 | */ | ||
505 | /* | ||
506 | * Firstly we have to do a CPU check for chips with | ||
507 | * a potentially buggy TSC. At this point we haven't run | ||
508 | * the ident/bugs checks so we must run this hook as it | ||
509 | * may turn off the TSC flag. | ||
510 | * | ||
511 | * NOTE: this doesn't yet handle SMP 486 machines where only | ||
512 | * some CPU's have a TSC. Thats never worked and nobody has | ||
513 | * moaned if you have the only one in the world - you fix it! | ||
514 | */ | ||
515 | |||
516 | count2 = LATCH; /* initialize counter for mark_offset_tsc() */ | ||
517 | |||
518 | if (cpu_has_tsc) { | ||
519 | unsigned long tsc_quotient; | ||
520 | #ifdef CONFIG_HPET_TIMER | ||
521 | if (is_hpet_enabled() && hpet_use_timer) { | ||
522 | unsigned long result, remain; | ||
523 | printk("Using TSC for gettimeofday\n"); | ||
524 | tsc_quotient = calibrate_tsc_hpet(NULL); | ||
525 | timer_tsc.mark_offset = &mark_offset_tsc_hpet; | ||
526 | /* | ||
527 | * Math to calculate hpet to usec multiplier | ||
528 | * Look for the comments at get_offset_tsc_hpet() | ||
529 | */ | ||
530 | ASM_DIV64_REG(result, remain, hpet_tick, | ||
531 | 0, KERNEL_TICK_USEC); | ||
532 | if (remain > (hpet_tick >> 1)) | ||
533 | result++; /* rounding the result */ | ||
534 | |||
535 | hpet_usec_quotient = result; | ||
536 | } else | ||
537 | #endif | ||
538 | { | ||
539 | tsc_quotient = calibrate_tsc(); | ||
540 | } | ||
541 | |||
542 | if (tsc_quotient) { | ||
543 | fast_gettimeoffset_quotient = tsc_quotient; | ||
544 | use_tsc = 1; | ||
545 | /* | ||
546 | * We could be more selective here I suspect | ||
547 | * and just enable this for the next intel chips ? | ||
548 | */ | ||
549 | /* report CPU clock rate in Hz. | ||
550 | * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = | ||
551 | * clock/second. Our precision is about 100 ppm. | ||
552 | */ | ||
553 | { unsigned long eax=0, edx=1000; | ||
554 | __asm__("divl %2" | ||
555 | :"=a" (cpu_khz), "=d" (edx) | ||
556 | :"r" (tsc_quotient), | ||
557 | "0" (eax), "1" (edx)); | ||
558 | printk("Detected %u.%03u MHz processor.\n", | ||
559 | cpu_khz / 1000, cpu_khz % 1000); | ||
560 | } | ||
561 | set_cyc2ns_scale(cpu_khz); | ||
562 | return 0; | ||
563 | } | ||
564 | } | ||
565 | return -ENODEV; | ||
566 | } | ||
567 | |||
568 | static int tsc_resume(void) | ||
569 | { | ||
570 | write_seqlock(&monotonic_lock); | ||
571 | /* Assume this is the last mark offset time */ | ||
572 | rdtsc(last_tsc_low, last_tsc_high); | ||
573 | #ifdef CONFIG_HPET_TIMER | ||
574 | if (is_hpet_enabled() && hpet_use_timer) | ||
575 | hpet_last = hpet_readl(HPET_COUNTER); | ||
576 | #endif | ||
577 | write_sequnlock(&monotonic_lock); | ||
578 | return 0; | ||
579 | } | ||
580 | |||
581 | #ifndef CONFIG_X86_TSC | ||
582 | /* disable flag for tsc. Takes effect by clearing the TSC cpu flag | ||
583 | * in cpu/common.c */ | ||
584 | static int __init tsc_setup(char *str) | ||
585 | { | ||
586 | tsc_disable = 1; | ||
587 | return 1; | ||
588 | } | ||
589 | #else | ||
590 | static int __init tsc_setup(char *str) | ||
591 | { | ||
592 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | ||
593 | "cannot disable TSC.\n"); | ||
594 | return 1; | ||
595 | } | ||
596 | #endif | ||
597 | __setup("notsc", tsc_setup); | ||
598 | |||
599 | |||
600 | |||
601 | /************************************************************/ | ||
602 | |||
603 | /* tsc timer_opts struct */ | ||
604 | static struct timer_opts timer_tsc = { | ||
605 | .name = "tsc", | ||
606 | .mark_offset = mark_offset_tsc, | ||
607 | .get_offset = get_offset_tsc, | ||
608 | .monotonic_clock = monotonic_clock_tsc, | ||
609 | .delay = delay_tsc, | ||
610 | .read_timer = read_timer_tsc, | ||
611 | .resume = tsc_resume, | ||
612 | }; | ||
613 | |||
614 | struct init_timer_opts __initdata timer_tsc_init = { | ||
615 | .init = init_tsc, | ||
616 | .opts = &timer_tsc, | ||
617 | }; | ||
diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 296355292c7c..e2e281d4bcc8 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c | |||
@@ -32,15 +32,8 @@ | |||
32 | 32 | ||
33 | static struct i386_cpu cpu_devices[NR_CPUS]; | 33 | static struct i386_cpu cpu_devices[NR_CPUS]; |
34 | 34 | ||
35 | int arch_register_cpu(int num){ | 35 | int arch_register_cpu(int num) |
36 | struct node *parent = NULL; | 36 | { |
37 | |||
38 | #ifdef CONFIG_NUMA | ||
39 | int node = cpu_to_node(num); | ||
40 | if (node_online(node)) | ||
41 | parent = &node_devices[node].node; | ||
42 | #endif /* CONFIG_NUMA */ | ||
43 | |||
44 | /* | 37 | /* |
45 | * CPU0 cannot be offlined due to several | 38 | * CPU0 cannot be offlined due to several |
46 | * restrictions and assumptions in kernel. This basically | 39 | * restrictions and assumptions in kernel. This basically |
@@ -50,21 +43,13 @@ int arch_register_cpu(int num){ | |||
50 | if (!num) | 43 | if (!num) |
51 | cpu_devices[num].cpu.no_control = 1; | 44 | cpu_devices[num].cpu.no_control = 1; |
52 | 45 | ||
53 | return register_cpu(&cpu_devices[num].cpu, num, parent); | 46 | return register_cpu(&cpu_devices[num].cpu, num); |
54 | } | 47 | } |
55 | 48 | ||
56 | #ifdef CONFIG_HOTPLUG_CPU | 49 | #ifdef CONFIG_HOTPLUG_CPU |
57 | 50 | ||
58 | void arch_unregister_cpu(int num) { | 51 | void arch_unregister_cpu(int num) { |
59 | struct node *parent = NULL; | 52 | return unregister_cpu(&cpu_devices[num].cpu); |
60 | |||
61 | #ifdef CONFIG_NUMA | ||
62 | int node = cpu_to_node(num); | ||
63 | if (node_online(node)) | ||
64 | parent = &node_devices[node].node; | ||
65 | #endif /* CONFIG_NUMA */ | ||
66 | |||
67 | return unregister_cpu(&cpu_devices[num].cpu, parent); | ||
68 | } | 53 | } |
69 | EXPORT_SYMBOL(arch_register_cpu); | 54 | EXPORT_SYMBOL(arch_register_cpu); |
70 | EXPORT_SYMBOL(arch_unregister_cpu); | 55 | EXPORT_SYMBOL(arch_unregister_cpu); |
@@ -74,16 +59,13 @@ EXPORT_SYMBOL(arch_unregister_cpu); | |||
74 | 59 | ||
75 | #ifdef CONFIG_NUMA | 60 | #ifdef CONFIG_NUMA |
76 | #include <linux/mmzone.h> | 61 | #include <linux/mmzone.h> |
77 | #include <asm/node.h> | ||
78 | |||
79 | struct i386_node node_devices[MAX_NUMNODES]; | ||
80 | 62 | ||
81 | static int __init topology_init(void) | 63 | static int __init topology_init(void) |
82 | { | 64 | { |
83 | int i; | 65 | int i; |
84 | 66 | ||
85 | for_each_online_node(i) | 67 | for_each_online_node(i) |
86 | arch_register_node(i); | 68 | register_one_node(i); |
87 | 69 | ||
88 | for_each_present_cpu(i) | 70 | for_each_present_cpu(i) |
89 | arch_register_cpu(i); | 71 | arch_register_cpu(i); |
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index dcc14477af1f..78464097470a 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/utsname.h> | 28 | #include <linux/utsname.h> |
29 | #include <linux/kprobes.h> | 29 | #include <linux/kprobes.h> |
30 | #include <linux/kexec.h> | 30 | #include <linux/kexec.h> |
31 | #include <linux/unwind.h> | ||
31 | 32 | ||
32 | #ifdef CONFIG_EISA | 33 | #ifdef CONFIG_EISA |
33 | #include <linux/ioport.h> | 34 | #include <linux/ioport.h> |
@@ -47,7 +48,7 @@ | |||
47 | #include <asm/desc.h> | 48 | #include <asm/desc.h> |
48 | #include <asm/i387.h> | 49 | #include <asm/i387.h> |
49 | #include <asm/nmi.h> | 50 | #include <asm/nmi.h> |
50 | 51 | #include <asm/unwind.h> | |
51 | #include <asm/smp.h> | 52 | #include <asm/smp.h> |
52 | #include <asm/arch_hooks.h> | 53 | #include <asm/arch_hooks.h> |
53 | #include <asm/kdebug.h> | 54 | #include <asm/kdebug.h> |
@@ -92,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void); | |||
92 | asmlinkage void machine_check(void); | 93 | asmlinkage void machine_check(void); |
93 | 94 | ||
94 | static int kstack_depth_to_print = 24; | 95 | static int kstack_depth_to_print = 24; |
96 | static int call_trace = 1; | ||
95 | ATOMIC_NOTIFIER_HEAD(i386die_chain); | 97 | ATOMIC_NOTIFIER_HEAD(i386die_chain); |
96 | 98 | ||
97 | int register_die_notifier(struct notifier_block *nb) | 99 | int register_die_notifier(struct notifier_block *nb) |
@@ -170,7 +172,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, | |||
170 | return ebp; | 172 | return ebp; |
171 | } | 173 | } |
172 | 174 | ||
173 | static void show_trace_log_lvl(struct task_struct *task, | 175 | static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) |
176 | { | ||
177 | int n = 0; | ||
178 | int printed = 0; /* nr of entries already printed on current line */ | ||
179 | |||
180 | while (unwind(info) == 0 && UNW_PC(info)) { | ||
181 | ++n; | ||
182 | printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed); | ||
183 | if (arch_unw_user_mode(info)) | ||
184 | break; | ||
185 | } | ||
186 | if (printed) | ||
187 | printk("\n"); | ||
188 | return n; | ||
189 | } | ||
190 | |||
191 | static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
174 | unsigned long *stack, char *log_lvl) | 192 | unsigned long *stack, char *log_lvl) |
175 | { | 193 | { |
176 | unsigned long ebp; | 194 | unsigned long ebp; |
@@ -178,6 +196,26 @@ static void show_trace_log_lvl(struct task_struct *task, | |||
178 | if (!task) | 196 | if (!task) |
179 | task = current; | 197 | task = current; |
180 | 198 | ||
199 | if (call_trace >= 0) { | ||
200 | int unw_ret = 0; | ||
201 | struct unwind_frame_info info; | ||
202 | |||
203 | if (regs) { | ||
204 | if (unwind_init_frame_info(&info, task, regs) == 0) | ||
205 | unw_ret = show_trace_unwind(&info, log_lvl); | ||
206 | } else if (task == current) | ||
207 | unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl); | ||
208 | else { | ||
209 | if (unwind_init_blocked(&info, task) == 0) | ||
210 | unw_ret = show_trace_unwind(&info, log_lvl); | ||
211 | } | ||
212 | if (unw_ret > 0) { | ||
213 | if (call_trace > 0) | ||
214 | return; | ||
215 | printk("%sLegacy call trace:\n", log_lvl); | ||
216 | } | ||
217 | } | ||
218 | |||
181 | if (task == current) { | 219 | if (task == current) { |
182 | /* Grab ebp right from our regs */ | 220 | /* Grab ebp right from our regs */ |
183 | asm ("movl %%ebp, %0" : "=r" (ebp) : ); | 221 | asm ("movl %%ebp, %0" : "=r" (ebp) : ); |
@@ -198,13 +236,13 @@ static void show_trace_log_lvl(struct task_struct *task, | |||
198 | } | 236 | } |
199 | } | 237 | } |
200 | 238 | ||
201 | void show_trace(struct task_struct *task, unsigned long * stack) | 239 | void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack) |
202 | { | 240 | { |
203 | show_trace_log_lvl(task, stack, ""); | 241 | show_trace_log_lvl(task, regs, stack, ""); |
204 | } | 242 | } |
205 | 243 | ||
206 | static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, | 244 | static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
207 | char *log_lvl) | 245 | unsigned long *esp, char *log_lvl) |
208 | { | 246 | { |
209 | unsigned long *stack; | 247 | unsigned long *stack; |
210 | int i; | 248 | int i; |
@@ -225,13 +263,13 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, | |||
225 | printk("%08lx ", *stack++); | 263 | printk("%08lx ", *stack++); |
226 | } | 264 | } |
227 | printk("\n%sCall Trace:\n", log_lvl); | 265 | printk("\n%sCall Trace:\n", log_lvl); |
228 | show_trace_log_lvl(task, esp, log_lvl); | 266 | show_trace_log_lvl(task, regs, esp, log_lvl); |
229 | } | 267 | } |
230 | 268 | ||
231 | void show_stack(struct task_struct *task, unsigned long *esp) | 269 | void show_stack(struct task_struct *task, unsigned long *esp) |
232 | { | 270 | { |
233 | printk(" "); | 271 | printk(" "); |
234 | show_stack_log_lvl(task, esp, ""); | 272 | show_stack_log_lvl(task, NULL, esp, ""); |
235 | } | 273 | } |
236 | 274 | ||
237 | /* | 275 | /* |
@@ -241,7 +279,7 @@ void dump_stack(void) | |||
241 | { | 279 | { |
242 | unsigned long stack; | 280 | unsigned long stack; |
243 | 281 | ||
244 | show_trace(current, &stack); | 282 | show_trace(current, NULL, &stack); |
245 | } | 283 | } |
246 | 284 | ||
247 | EXPORT_SYMBOL(dump_stack); | 285 | EXPORT_SYMBOL(dump_stack); |
@@ -285,7 +323,7 @@ void show_registers(struct pt_regs *regs) | |||
285 | u8 __user *eip; | 323 | u8 __user *eip; |
286 | 324 | ||
287 | printk("\n" KERN_EMERG "Stack: "); | 325 | printk("\n" KERN_EMERG "Stack: "); |
288 | show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG); | 326 | show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); |
289 | 327 | ||
290 | printk(KERN_EMERG "Code: "); | 328 | printk(KERN_EMERG "Code: "); |
291 | 329 | ||
@@ -1215,3 +1253,15 @@ static int __init kstack_setup(char *s) | |||
1215 | return 1; | 1253 | return 1; |
1216 | } | 1254 | } |
1217 | __setup("kstack=", kstack_setup); | 1255 | __setup("kstack=", kstack_setup); |
1256 | |||
1257 | static int __init call_trace_setup(char *s) | ||
1258 | { | ||
1259 | if (strcmp(s, "old") == 0) | ||
1260 | call_trace = -1; | ||
1261 | else if (strcmp(s, "both") == 0) | ||
1262 | call_trace = 0; | ||
1263 | else if (strcmp(s, "new") == 0) | ||
1264 | call_trace = 1; | ||
1265 | return 1; | ||
1266 | } | ||
1267 | __setup("call_trace=", call_trace_setup); | ||
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c new file mode 100644 index 000000000000..7e0d8dab2075 --- /dev/null +++ b/arch/i386/kernel/tsc.c | |||
@@ -0,0 +1,478 @@ | |||
1 | /* | ||
2 | * This code largely moved from arch/i386/kernel/timer/timer_tsc.c | ||
3 | * which was originally moved from arch/i386/kernel/time.c. | ||
4 | * See comments there for proper credits. | ||
5 | */ | ||
6 | |||
7 | #include <linux/clocksource.h> | ||
8 | #include <linux/workqueue.h> | ||
9 | #include <linux/cpufreq.h> | ||
10 | #include <linux/jiffies.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/dmi.h> | ||
13 | |||
14 | #include <asm/delay.h> | ||
15 | #include <asm/tsc.h> | ||
16 | #include <asm/delay.h> | ||
17 | #include <asm/io.h> | ||
18 | |||
19 | #include "mach_timer.h" | ||
20 | |||
21 | /* | ||
22 | * On some systems the TSC frequency does not | ||
23 | * change with the cpu frequency. So we need | ||
24 | * an extra value to store the TSC freq | ||
25 | */ | ||
26 | unsigned int tsc_khz; | ||
27 | |||
28 | int tsc_disable __cpuinitdata = 0; | ||
29 | |||
30 | #ifdef CONFIG_X86_TSC | ||
31 | static int __init tsc_setup(char *str) | ||
32 | { | ||
33 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | ||
34 | "cannot disable TSC.\n"); | ||
35 | return 1; | ||
36 | } | ||
37 | #else | ||
38 | /* | ||
39 | * disable flag for tsc. Takes effect by clearing the TSC cpu flag | ||
40 | * in cpu/common.c | ||
41 | */ | ||
42 | static int __init tsc_setup(char *str) | ||
43 | { | ||
44 | tsc_disable = 1; | ||
45 | |||
46 | return 1; | ||
47 | } | ||
48 | #endif | ||
49 | |||
50 | __setup("notsc", tsc_setup); | ||
51 | |||
52 | /* | ||
53 | * code to mark and check if the TSC is unstable | ||
54 | * due to cpufreq or due to unsynced TSCs | ||
55 | */ | ||
56 | static int tsc_unstable; | ||
57 | |||
58 | static inline int check_tsc_unstable(void) | ||
59 | { | ||
60 | return tsc_unstable; | ||
61 | } | ||
62 | |||
63 | void mark_tsc_unstable(void) | ||
64 | { | ||
65 | tsc_unstable = 1; | ||
66 | } | ||
67 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
68 | |||
69 | /* Accellerators for sched_clock() | ||
70 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
71 | * basic equation: | ||
72 | * ns = cycles / (freq / ns_per_sec) | ||
73 | * ns = cycles * (ns_per_sec / freq) | ||
74 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
75 | * ns = cycles * (10^6 / cpu_khz) | ||
76 | * | ||
77 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
78 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
79 | * ns = cycles * cyc2ns_scale / SC | ||
80 | * | ||
81 | * And since SC is a constant power of two, we can convert the div | ||
82 | * into a shift. | ||
83 | * | ||
84 | * We can use khz divisor instead of mhz to keep a better percision, since | ||
85 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
86 | * (mathieu.desnoyers@polymtl.ca) | ||
87 | * | ||
88 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
89 | */ | ||
90 | static unsigned long cyc2ns_scale __read_mostly; | ||
91 | |||
92 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
93 | |||
94 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | ||
95 | { | ||
96 | cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; | ||
97 | } | ||
98 | |||
99 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
100 | { | ||
101 | return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * Scheduler clock - returns current time in nanosec units. | ||
106 | */ | ||
107 | unsigned long long sched_clock(void) | ||
108 | { | ||
109 | unsigned long long this_offset; | ||
110 | |||
111 | /* | ||
112 | * in the NUMA case we dont use the TSC as they are not | ||
113 | * synchronized across all CPUs. | ||
114 | */ | ||
115 | #ifndef CONFIG_NUMA | ||
116 | if (!cpu_khz || check_tsc_unstable()) | ||
117 | #endif | ||
118 | /* no locking but a rare wrong value is not a big deal */ | ||
119 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | ||
120 | |||
121 | /* read the Time Stamp Counter: */ | ||
122 | rdtscll(this_offset); | ||
123 | |||
124 | /* return the value in ns */ | ||
125 | return cycles_2_ns(this_offset); | ||
126 | } | ||
127 | |||
128 | static unsigned long calculate_cpu_khz(void) | ||
129 | { | ||
130 | unsigned long long start, end; | ||
131 | unsigned long count; | ||
132 | u64 delta64; | ||
133 | int i; | ||
134 | unsigned long flags; | ||
135 | |||
136 | local_irq_save(flags); | ||
137 | |||
138 | /* run 3 times to ensure the cache is warm */ | ||
139 | for (i = 0; i < 3; i++) { | ||
140 | mach_prepare_counter(); | ||
141 | rdtscll(start); | ||
142 | mach_countup(&count); | ||
143 | rdtscll(end); | ||
144 | } | ||
145 | /* | ||
146 | * Error: ECTCNEVERSET | ||
147 | * The CTC wasn't reliable: we got a hit on the very first read, | ||
148 | * or the CPU was so fast/slow that the quotient wouldn't fit in | ||
149 | * 32 bits.. | ||
150 | */ | ||
151 | if (count <= 1) | ||
152 | goto err; | ||
153 | |||
154 | delta64 = end - start; | ||
155 | |||
156 | /* cpu freq too fast: */ | ||
157 | if (delta64 > (1ULL<<32)) | ||
158 | goto err; | ||
159 | |||
160 | /* cpu freq too slow: */ | ||
161 | if (delta64 <= CALIBRATE_TIME_MSEC) | ||
162 | goto err; | ||
163 | |||
164 | delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */ | ||
165 | do_div(delta64,CALIBRATE_TIME_MSEC); | ||
166 | |||
167 | local_irq_restore(flags); | ||
168 | return (unsigned long)delta64; | ||
169 | err: | ||
170 | local_irq_restore(flags); | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | int recalibrate_cpu_khz(void) | ||
175 | { | ||
176 | #ifndef CONFIG_SMP | ||
177 | unsigned long cpu_khz_old = cpu_khz; | ||
178 | |||
179 | if (cpu_has_tsc) { | ||
180 | cpu_khz = calculate_cpu_khz(); | ||
181 | tsc_khz = cpu_khz; | ||
182 | cpu_data[0].loops_per_jiffy = | ||
183 | cpufreq_scale(cpu_data[0].loops_per_jiffy, | ||
184 | cpu_khz_old, cpu_khz); | ||
185 | return 0; | ||
186 | } else | ||
187 | return -ENODEV; | ||
188 | #else | ||
189 | return -ENODEV; | ||
190 | #endif | ||
191 | } | ||
192 | |||
193 | EXPORT_SYMBOL(recalibrate_cpu_khz); | ||
194 | |||
195 | void tsc_init(void) | ||
196 | { | ||
197 | if (!cpu_has_tsc || tsc_disable) | ||
198 | return; | ||
199 | |||
200 | cpu_khz = calculate_cpu_khz(); | ||
201 | tsc_khz = cpu_khz; | ||
202 | |||
203 | if (!cpu_khz) | ||
204 | return; | ||
205 | |||
206 | printk("Detected %lu.%03lu MHz processor.\n", | ||
207 | (unsigned long)cpu_khz / 1000, | ||
208 | (unsigned long)cpu_khz % 1000); | ||
209 | |||
210 | set_cyc2ns_scale(cpu_khz); | ||
211 | use_tsc_delay(); | ||
212 | } | ||
213 | |||
214 | #ifdef CONFIG_CPU_FREQ | ||
215 | |||
216 | static unsigned int cpufreq_delayed_issched = 0; | ||
217 | static unsigned int cpufreq_init = 0; | ||
218 | static struct work_struct cpufreq_delayed_get_work; | ||
219 | |||
220 | static void handle_cpufreq_delayed_get(void *v) | ||
221 | { | ||
222 | unsigned int cpu; | ||
223 | |||
224 | for_each_online_cpu(cpu) | ||
225 | cpufreq_get(cpu); | ||
226 | |||
227 | cpufreq_delayed_issched = 0; | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries | ||
232 | * to verify the CPU frequency the timing core thinks the CPU is running | ||
233 | * at is still correct. | ||
234 | */ | ||
235 | static inline void cpufreq_delayed_get(void) | ||
236 | { | ||
237 | if (cpufreq_init && !cpufreq_delayed_issched) { | ||
238 | cpufreq_delayed_issched = 1; | ||
239 | printk(KERN_DEBUG "Checking if CPU frequency changed.\n"); | ||
240 | schedule_work(&cpufreq_delayed_get_work); | ||
241 | } | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * if the CPU frequency is scaled, TSC-based delays will need a different | ||
246 | * loops_per_jiffy value to function properly. | ||
247 | */ | ||
248 | static unsigned int ref_freq = 0; | ||
249 | static unsigned long loops_per_jiffy_ref = 0; | ||
250 | static unsigned long cpu_khz_ref = 0; | ||
251 | |||
252 | static int | ||
253 | time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) | ||
254 | { | ||
255 | struct cpufreq_freqs *freq = data; | ||
256 | |||
257 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
258 | write_seqlock_irq(&xtime_lock); | ||
259 | |||
260 | if (!ref_freq) { | ||
261 | if (!freq->old){ | ||
262 | ref_freq = freq->new; | ||
263 | goto end; | ||
264 | } | ||
265 | ref_freq = freq->old; | ||
266 | loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; | ||
267 | cpu_khz_ref = cpu_khz; | ||
268 | } | ||
269 | |||
270 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | ||
271 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | ||
272 | (val == CPUFREQ_RESUMECHANGE)) { | ||
273 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
274 | cpu_data[freq->cpu].loops_per_jiffy = | ||
275 | cpufreq_scale(loops_per_jiffy_ref, | ||
276 | ref_freq, freq->new); | ||
277 | |||
278 | if (cpu_khz) { | ||
279 | |||
280 | if (num_online_cpus() == 1) | ||
281 | cpu_khz = cpufreq_scale(cpu_khz_ref, | ||
282 | ref_freq, freq->new); | ||
283 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { | ||
284 | tsc_khz = cpu_khz; | ||
285 | set_cyc2ns_scale(cpu_khz); | ||
286 | /* | ||
287 | * TSC based sched_clock turns | ||
288 | * to junk w/ cpufreq | ||
289 | */ | ||
290 | mark_tsc_unstable(); | ||
291 | } | ||
292 | } | ||
293 | } | ||
294 | end: | ||
295 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
296 | write_sequnlock_irq(&xtime_lock); | ||
297 | |||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | static struct notifier_block time_cpufreq_notifier_block = { | ||
302 | .notifier_call = time_cpufreq_notifier | ||
303 | }; | ||
304 | |||
305 | static int __init cpufreq_tsc(void) | ||
306 | { | ||
307 | int ret; | ||
308 | |||
309 | INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); | ||
310 | ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
311 | CPUFREQ_TRANSITION_NOTIFIER); | ||
312 | if (!ret) | ||
313 | cpufreq_init = 1; | ||
314 | |||
315 | return ret; | ||
316 | } | ||
317 | |||
318 | core_initcall(cpufreq_tsc); | ||
319 | |||
320 | #endif | ||
321 | |||
322 | /* clock source code */ | ||
323 | |||
324 | static unsigned long current_tsc_khz = 0; | ||
325 | static int tsc_update_callback(void); | ||
326 | |||
327 | static cycle_t read_tsc(void) | ||
328 | { | ||
329 | cycle_t ret; | ||
330 | |||
331 | rdtscll(ret); | ||
332 | |||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | static struct clocksource clocksource_tsc = { | ||
337 | .name = "tsc", | ||
338 | .rating = 300, | ||
339 | .read = read_tsc, | ||
340 | .mask = CLOCKSOURCE_MASK(64), | ||
341 | .mult = 0, /* to be set */ | ||
342 | .shift = 22, | ||
343 | .update_callback = tsc_update_callback, | ||
344 | .is_continuous = 1, | ||
345 | }; | ||
346 | |||
347 | static int tsc_update_callback(void) | ||
348 | { | ||
349 | int change = 0; | ||
350 | |||
351 | /* check to see if we should switch to the safe clocksource: */ | ||
352 | if (clocksource_tsc.rating != 50 && check_tsc_unstable()) { | ||
353 | clocksource_tsc.rating = 50; | ||
354 | clocksource_reselect(); | ||
355 | change = 1; | ||
356 | } | ||
357 | |||
358 | /* only update if tsc_khz has changed: */ | ||
359 | if (current_tsc_khz != tsc_khz) { | ||
360 | current_tsc_khz = tsc_khz; | ||
361 | clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, | ||
362 | clocksource_tsc.shift); | ||
363 | change = 1; | ||
364 | } | ||
365 | |||
366 | return change; | ||
367 | } | ||
368 | |||
369 | static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d) | ||
370 | { | ||
371 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", | ||
372 | d->ident); | ||
373 | mark_tsc_unstable(); | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | /* List of systems that have known TSC problems */ | ||
378 | static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | ||
379 | { | ||
380 | .callback = dmi_mark_tsc_unstable, | ||
381 | .ident = "IBM Thinkpad 380XD", | ||
382 | .matches = { | ||
383 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
384 | DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), | ||
385 | }, | ||
386 | }, | ||
387 | {} | ||
388 | }; | ||
389 | |||
390 | #define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */ | ||
391 | static struct timer_list verify_tsc_freq_timer; | ||
392 | |||
393 | /* XXX - Probably should add locking */ | ||
394 | static void verify_tsc_freq(unsigned long unused) | ||
395 | { | ||
396 | static u64 last_tsc; | ||
397 | static unsigned long last_jiffies; | ||
398 | |||
399 | u64 now_tsc, interval_tsc; | ||
400 | unsigned long now_jiffies, interval_jiffies; | ||
401 | |||
402 | |||
403 | if (check_tsc_unstable()) | ||
404 | return; | ||
405 | |||
406 | rdtscll(now_tsc); | ||
407 | now_jiffies = jiffies; | ||
408 | |||
409 | if (!last_jiffies) { | ||
410 | goto out; | ||
411 | } | ||
412 | |||
413 | interval_jiffies = now_jiffies - last_jiffies; | ||
414 | interval_tsc = now_tsc - last_tsc; | ||
415 | interval_tsc *= HZ; | ||
416 | do_div(interval_tsc, cpu_khz*1000); | ||
417 | |||
418 | if (interval_tsc < (interval_jiffies * 3 / 4)) { | ||
419 | printk("TSC appears to be running slowly. " | ||
420 | "Marking it as unstable\n"); | ||
421 | mark_tsc_unstable(); | ||
422 | return; | ||
423 | } | ||
424 | |||
425 | out: | ||
426 | last_tsc = now_tsc; | ||
427 | last_jiffies = now_jiffies; | ||
428 | /* set us up to go off on the next interval: */ | ||
429 | mod_timer(&verify_tsc_freq_timer, | ||
430 | jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL)); | ||
431 | } | ||
432 | |||
433 | /* | ||
434 | * Make an educated guess if the TSC is trustworthy and synchronized | ||
435 | * over all CPUs. | ||
436 | */ | ||
437 | static __init int unsynchronized_tsc(void) | ||
438 | { | ||
439 | /* | ||
440 | * Intel systems are normally all synchronized. | ||
441 | * Exceptions must mark TSC as unstable: | ||
442 | */ | ||
443 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
444 | return 0; | ||
445 | |||
446 | /* assume multi socket systems are not synchronized: */ | ||
447 | return num_possible_cpus() > 1; | ||
448 | } | ||
449 | |||
450 | static int __init init_tsc_clocksource(void) | ||
451 | { | ||
452 | |||
453 | if (cpu_has_tsc && tsc_khz && !tsc_disable) { | ||
454 | /* check blacklist */ | ||
455 | dmi_check_system(bad_tsc_dmi_table); | ||
456 | |||
457 | if (unsynchronized_tsc()) /* mark unstable if unsynced */ | ||
458 | mark_tsc_unstable(); | ||
459 | current_tsc_khz = tsc_khz; | ||
460 | clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, | ||
461 | clocksource_tsc.shift); | ||
462 | /* lower the rating if we already know its unstable: */ | ||
463 | if (check_tsc_unstable()) | ||
464 | clocksource_tsc.rating = 50; | ||
465 | |||
466 | init_timer(&verify_tsc_freq_timer); | ||
467 | verify_tsc_freq_timer.function = verify_tsc_freq; | ||
468 | verify_tsc_freq_timer.expires = | ||
469 | jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL); | ||
470 | add_timer(&verify_tsc_freq_timer); | ||
471 | |||
472 | return clocksource_register(&clocksource_tsc); | ||
473 | } | ||
474 | |||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | module_init(init_tsc_clocksource); | ||
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 7512f39c9f25..2d4f1386e2b1 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S | |||
@@ -71,6 +71,15 @@ SECTIONS | |||
71 | .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } | 71 | .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } |
72 | _edata = .; /* End of data section */ | 72 | _edata = .; /* End of data section */ |
73 | 73 | ||
74 | #ifdef CONFIG_STACK_UNWIND | ||
75 | . = ALIGN(4); | ||
76 | .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { | ||
77 | __start_unwind = .; | ||
78 | *(.eh_frame) | ||
79 | __end_unwind = .; | ||
80 | } | ||
81 | #endif | ||
82 | |||
74 | . = ALIGN(THREAD_SIZE); /* init_task */ | 83 | . = ALIGN(THREAD_SIZE); /* init_task */ |
75 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { | 84 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { |
76 | *(.data.init_task) | 85 | *(.data.init_task) |
diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S index 3b62baa6a371..1a36d26e15eb 100644 --- a/arch/i386/kernel/vsyscall-sysenter.S +++ b/arch/i386/kernel/vsyscall-sysenter.S | |||
@@ -42,10 +42,10 @@ __kernel_vsyscall: | |||
42 | /* 7: align return point with nop's to make disassembly easier */ | 42 | /* 7: align return point with nop's to make disassembly easier */ |
43 | .space 7,0x90 | 43 | .space 7,0x90 |
44 | 44 | ||
45 | /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */ | 45 | /* 14: System call restart point is here! (SYSENTER_RETURN-2) */ |
46 | jmp .Lenter_kernel | 46 | jmp .Lenter_kernel |
47 | /* 16: System call normal return point is here! */ | 47 | /* 16: System call normal return point is here! */ |
48 | .globl SYSENTER_RETURN /* Symbol used by entry.S. */ | 48 | .globl SYSENTER_RETURN /* Symbol used by sysenter.c */ |
49 | SYSENTER_RETURN: | 49 | SYSENTER_RETURN: |
50 | pop %ebp | 50 | pop %ebp |
51 | .Lpop_ebp: | 51 | .Lpop_ebp: |
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S index 98699ca6e52d..e26975fc68b6 100644 --- a/arch/i386/kernel/vsyscall.lds.S +++ b/arch/i386/kernel/vsyscall.lds.S | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | SECTIONS | 8 | SECTIONS |
9 | { | 9 | { |
10 | . = VSYSCALL_BASE + SIZEOF_HEADERS; | 10 | . = VDSO_PRELINK + SIZEOF_HEADERS; |
11 | 11 | ||
12 | .hash : { *(.hash) } :text | 12 | .hash : { *(.hash) } :text |
13 | .dynsym : { *(.dynsym) } | 13 | .dynsym : { *(.dynsym) } |
@@ -20,7 +20,7 @@ SECTIONS | |||
20 | For the layouts to match, we need to skip more than enough | 20 | For the layouts to match, we need to skip more than enough |
21 | space for the dynamic symbol table et al. If this amount | 21 | space for the dynamic symbol table et al. If this amount |
22 | is insufficient, ld -shared will barf. Just increase it here. */ | 22 | is insufficient, ld -shared will barf. Just increase it here. */ |
23 | . = VSYSCALL_BASE + 0x400; | 23 | . = VDSO_PRELINK + 0x400; |
24 | 24 | ||
25 | .text : { *(.text) } :text =0x90909090 | 25 | .text : { *(.text) } :text =0x90909090 |
26 | .note : { *(.note.*) } :text :note | 26 | .note : { *(.note.*) } :text :note |