aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig39
-rw-r--r--arch/i386/Kconfig.cpu2
-rw-r--r--arch/i386/boot/Makefile9
-rw-r--r--arch/i386/boot/compressed/misc.c32
-rw-r--r--arch/i386/boot/video.S19
-rw-r--r--arch/i386/crypto/aes-i586-asm.S29
-rw-r--r--arch/i386/crypto/aes.c20
-rw-r--r--arch/i386/kernel/Makefile8
-rw-r--r--arch/i386/kernel/alternative.c118
-rw-r--r--arch/i386/kernel/apic.c16
-rw-r--r--arch/i386/kernel/apm.c6
-rw-r--r--arch/i386/kernel/asm-offsets.c3
-rw-r--r--arch/i386/kernel/cpu/amd.c16
-rw-r--r--arch/i386/kernel/cpu/cyrix.c2
-rw-r--r--arch/i386/kernel/cpu/intel.c6
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c113
-rw-r--r--arch/i386/kernel/crash.c9
-rw-r--r--arch/i386/kernel/entry.S263
-rw-r--r--arch/i386/kernel/hpet.c67
-rw-r--r--arch/i386/kernel/i8253.c118
-rw-r--r--arch/i386/kernel/i8259.c2
-rw-r--r--arch/i386/kernel/io_apic.c49
-rw-r--r--arch/i386/kernel/irq.c2
-rw-r--r--arch/i386/kernel/kprobes.c95
-rw-r--r--arch/i386/kernel/machine_kexec.c4
-rw-r--r--arch/i386/kernel/nmi.c72
-rw-r--r--arch/i386/kernel/numaq.c10
-rw-r--r--arch/i386/kernel/process.c8
-rw-r--r--arch/i386/kernel/setup.c1
-rw-r--r--arch/i386/kernel/smp.c12
-rw-r--r--arch/i386/kernel/smpboot.c1
-rw-r--r--arch/i386/kernel/time.c157
-rw-r--r--arch/i386/kernel/timers/Makefile9
-rw-r--r--arch/i386/kernel/timers/common.c172
-rw-r--r--arch/i386/kernel/timers/timer.c75
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c259
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c217
-rw-r--r--arch/i386/kernel/timers/timer_none.c39
-rw-r--r--arch/i386/kernel/timers/timer_pit.c177
-rw-r--r--arch/i386/kernel/timers/timer_pm.c342
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c617
-rw-r--r--arch/i386/kernel/traps.c70
-rw-r--r--arch/i386/kernel/tsc.c478
-rw-r--r--arch/i386/kernel/vmlinux.lds.S9
-rw-r--r--arch/i386/lib/delay.c65
-rw-r--r--arch/i386/mm/fault.c38
-rw-r--r--arch/i386/oprofile/nmi_int.c4
-rw-r--r--arch/i386/oprofile/op_model_athlon.c1
-rw-r--r--arch/i386/oprofile/op_model_p4.c1
-rw-r--r--arch/i386/oprofile/op_model_ppro.c1
-rw-r--r--arch/i386/pci/pcbios.c6
51 files changed, 1616 insertions, 2272 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 1596101cfaf8..47c08bcd9b24 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -14,6 +14,10 @@ config X86_32
14 486, 586, Pentiums, and various instruction-set-compatible chips by 14 486, 586, Pentiums, and various instruction-set-compatible chips by
15 AMD, Cyrix, and others. 15 AMD, Cyrix, and others.
16 16
17config GENERIC_TIME
18 bool
19 default y
20
17config SEMAPHORE_SLEEPERS 21config SEMAPHORE_SLEEPERS
18 bool 22 bool
19 default y 23 default y
@@ -324,6 +328,15 @@ config X86_MCE_P4THERMAL
324 Enabling this feature will cause a message to be printed when the P4 328 Enabling this feature will cause a message to be printed when the P4
325 enters thermal throttling. 329 enters thermal throttling.
326 330
331config VM86
332 default y
333 bool "Enable VM86 support" if EMBEDDED
334 help
335 This option is required by programs like DOSEMU to run 16-bit legacy
336 code on X86 processors. It also may be needed by software like
337 XFree86 to initialize some video cards via BIOS. Disabling this
338 option saves about 6k.
339
327config TOSHIBA 340config TOSHIBA
328 tristate "Toshiba Laptop support" 341 tristate "Toshiba Laptop support"
329 ---help--- 342 ---help---
@@ -721,7 +734,7 @@ config KEXEC
721 help 734 help
722 kexec is a system call that implements the ability to shutdown your 735 kexec is a system call that implements the ability to shutdown your
723 current kernel, and to start another kernel. It is like a reboot 736 current kernel, and to start another kernel. It is like a reboot
724 but it is indepedent of the system firmware. And like a reboot 737 but it is independent of the system firmware. And like a reboot
725 you can start any kernel with it, not just Linux. 738 you can start any kernel with it, not just Linux.
726 739
727 The name comes from the similiarity to the exec system call. 740 The name comes from the similiarity to the exec system call.
@@ -1046,13 +1059,27 @@ config SCx200
1046 tristate "NatSemi SCx200 support" 1059 tristate "NatSemi SCx200 support"
1047 depends on !X86_VOYAGER 1060 depends on !X86_VOYAGER
1048 help 1061 help
1049 This provides basic support for the National Semiconductor SCx200 1062 This provides basic support for National Semiconductor's
1050 processor. Right now this is just a driver for the GPIO pins. 1063 (now AMD's) Geode processors. The driver probes for the
1064 PCI-IDs of several on-chip devices, so its a good dependency
1065 for other scx200_* drivers.
1051 1066
1052 If you don't know what to do here, say N. 1067 If compiled as a module, the driver is named scx200.
1053 1068
1054 This support is also available as a module. If compiled as a 1069config SCx200HR_TIMER
1055 module, it will be called scx200. 1070 tristate "NatSemi SCx200 27MHz High-Resolution Timer Support"
1071 depends on SCx200 && GENERIC_TIME
1072 default y
1073 help
1074 This driver provides a clocksource built upon the on-chip
1075 27MHz high-resolution timer. Its also a workaround for
1076 NSC Geode SC-1100's buggy TSC, which loses time when the
1077 processor goes idle (as is done by the scheduler). The
1078 other workaround is idle=poll boot option.
1079
1080config K8_NB
1081 def_bool y
1082 depends on AGP_AMD64
1056 1083
1057source "drivers/pcmcia/Kconfig" 1084source "drivers/pcmcia/Kconfig"
1058 1085
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu
index eb130482ba18..21c9a4e71104 100644
--- a/arch/i386/Kconfig.cpu
+++ b/arch/i386/Kconfig.cpu
@@ -41,7 +41,7 @@ config M386
41 - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). 41 - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
42 - "Geode GX/LX" For AMD Geode GX and LX processors. 42 - "Geode GX/LX" For AMD Geode GX and LX processors.
43 - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. 43 - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
44 - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). 44 - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
45 45
46 If you don't know what to do, choose "386". 46 If you don't know what to do, choose "386".
47 47
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index 33e55476381b..e97946626064 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -109,8 +109,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
109isoimage: $(BOOTIMAGE) 109isoimage: $(BOOTIMAGE)
110 -rm -rf $(obj)/isoimage 110 -rm -rf $(obj)/isoimage
111 mkdir $(obj)/isoimage 111 mkdir $(obj)/isoimage
112 cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \ 112 for i in lib lib64 share end ; do \
113 $(obj)/isoimage 113 if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
114 cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
115 break ; \
116 fi ; \
117 if [ $$i = end ] ; then exit 1 ; fi ; \
118 done
114 cp $(BOOTIMAGE) $(obj)/isoimage/linux 119 cp $(BOOTIMAGE) $(obj)/isoimage/linux
115 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg 120 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
116 if [ -f '$(FDINITRD)' ] ; then \ 121 if [ -f '$(FDINITRD)' ] ; then \
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index f19f3a7492a5..b2ccd543410d 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -24,14 +24,6 @@
24 24
25#undef memset 25#undef memset
26#undef memcpy 26#undef memcpy
27
28/*
29 * Why do we do this? Don't ask me..
30 *
31 * Incomprehensible are the ways of bootloaders.
32 */
33static void* memset(void *, int, size_t);
34static void* memcpy(void *, __const void *, size_t);
35#define memzero(s, n) memset ((s), 0, (n)) 27#define memzero(s, n) memset ((s), 0, (n))
36 28
37typedef unsigned char uch; 29typedef unsigned char uch;
@@ -93,7 +85,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */
93#endif 85#endif
94#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) 86#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
95 87
96extern char input_data[]; 88extern unsigned char input_data[];
97extern int input_len; 89extern int input_len;
98 90
99static long bytes_out = 0; 91static long bytes_out = 0;
@@ -103,6 +95,9 @@ static unsigned long output_ptr = 0;
103static void *malloc(int size); 95static void *malloc(int size);
104static void free(void *where); 96static void free(void *where);
105 97
98static void *memset(void *s, int c, unsigned n);
99static void *memcpy(void *dest, const void *src, unsigned n);
100
106static void putstr(const char *); 101static void putstr(const char *);
107 102
108extern int end; 103extern int end;
@@ -205,7 +200,7 @@ static void putstr(const char *s)
205 outb_p(0xff & (pos >> 1), vidport+1); 200 outb_p(0xff & (pos >> 1), vidport+1);
206} 201}
207 202
208static void* memset(void* s, int c, size_t n) 203static void* memset(void* s, int c, unsigned n)
209{ 204{
210 int i; 205 int i;
211 char *ss = (char*)s; 206 char *ss = (char*)s;
@@ -214,14 +209,13 @@ static void* memset(void* s, int c, size_t n)
214 return s; 209 return s;
215} 210}
216 211
217static void* memcpy(void* __dest, __const void* __src, 212static void* memcpy(void* dest, const void* src, unsigned n)
218 size_t __n)
219{ 213{
220 int i; 214 int i;
221 char *d = (char *)__dest, *s = (char *)__src; 215 char *d = (char *)dest, *s = (char *)src;
222 216
223 for (i=0;i<__n;i++) d[i] = s[i]; 217 for (i=0;i<n;i++) d[i] = s[i];
224 return __dest; 218 return dest;
225} 219}
226 220
227/* =========================================================================== 221/* ===========================================================================
@@ -309,7 +303,7 @@ static void setup_normal_output_buffer(void)
309#else 303#else
310 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); 304 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
311#endif 305#endif
312 output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */ 306 output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
313 free_mem_end_ptr = (long)real_mode; 307 free_mem_end_ptr = (long)real_mode;
314} 308}
315 309
@@ -324,11 +318,9 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
324#ifdef STANDARD_MEMORY_BIOS_CALL 318#ifdef STANDARD_MEMORY_BIOS_CALL
325 if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); 319 if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
326#else 320#else
327 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 321 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
328 (3*1024))
329 error("Less than 4MB of memory");
330#endif 322#endif
331 mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START; 323 mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
332 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX 324 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
333 ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; 325 ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
334 low_buffer_size = low_buffer_end - LOW_BUFFER_START; 326 low_buffer_size = low_buffer_end - LOW_BUFFER_START;
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
index c9343c3a8082..8c2a6faeeae5 100644
--- a/arch/i386/boot/video.S
+++ b/arch/i386/boot/video.S
@@ -1929,7 +1929,7 @@ skip10: movb %ah, %al
1929 ret 1929 ret
1930 1930
1931store_edid: 1931store_edid:
1932#ifdef CONFIG_FB_FIRMWARE_EDID 1932#ifdef CONFIG_FIRMWARE_EDID
1933 pushw %es # just save all registers 1933 pushw %es # just save all registers
1934 pushw %ax 1934 pushw %ax
1935 pushw %bx 1935 pushw %bx
@@ -1947,6 +1947,22 @@ store_edid:
1947 rep 1947 rep
1948 stosl 1948 stosl
1949 1949
1950 pushw %es # save ES
1951 xorw %di, %di # Report Capability
1952 pushw %di
1953 popw %es # ES:DI must be 0:0
1954 movw $0x4f15, %ax
1955 xorw %bx, %bx
1956 xorw %cx, %cx
1957 int $0x10
1958 popw %es # restore ES
1959
1960 cmpb $0x00, %ah # call successful
1961 jne no_edid
1962
1963 cmpb $0x4f, %al # function supported
1964 jne no_edid
1965
1950 movw $0x4f15, %ax # do VBE/DDC 1966 movw $0x4f15, %ax # do VBE/DDC
1951 movw $0x01, %bx 1967 movw $0x01, %bx
1952 movw $0x00, %cx 1968 movw $0x00, %cx
@@ -1954,6 +1970,7 @@ store_edid:
1954 movw $0x140, %di 1970 movw $0x140, %di
1955 int $0x10 1971 int $0x10
1956 1972
1973no_edid:
1957 popw %di # restore all registers 1974 popw %di # restore all registers
1958 popw %dx 1975 popw %dx
1959 popw %cx 1976 popw %cx
diff --git a/arch/i386/crypto/aes-i586-asm.S b/arch/i386/crypto/aes-i586-asm.S
index 911b15377f2e..f942f0c8f630 100644
--- a/arch/i386/crypto/aes-i586-asm.S
+++ b/arch/i386/crypto/aes-i586-asm.S
@@ -36,22 +36,19 @@
36.file "aes-i586-asm.S" 36.file "aes-i586-asm.S"
37.text 37.text
38 38
39// aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])// 39#include <asm/asm-offsets.h>
40// aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
41
42#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
43 40
44// offsets to parameters with one register pushed onto stack 41#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
45
46#define in_blk 8 // input byte array address parameter
47#define out_blk 12 // output byte array address parameter
48#define ctx 16 // AES context structure
49 42
50// offsets in context structure 43/* offsets to parameters with one register pushed onto stack */
44#define tfm 8
45#define out_blk 12
46#define in_blk 16
51 47
52#define ekey 0 // encryption key schedule base address 48/* offsets in crypto_tfm structure */
53#define nrnd 256 // number of rounds 49#define ekey (crypto_tfm_ctx_offset + 0)
54#define dkey 260 // decryption key schedule base address 50#define nrnd (crypto_tfm_ctx_offset + 256)
51#define dkey (crypto_tfm_ctx_offset + 260)
55 52
56// register mapping for encrypt and decrypt subroutines 53// register mapping for encrypt and decrypt subroutines
57 54
@@ -220,6 +217,7 @@
220 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ 217 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
221 218
222// AES (Rijndael) Encryption Subroutine 219// AES (Rijndael) Encryption Subroutine
220/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
223 221
224.global aes_enc_blk 222.global aes_enc_blk
225 223
@@ -230,7 +228,7 @@
230 228
231aes_enc_blk: 229aes_enc_blk:
232 push %ebp 230 push %ebp
233 mov ctx(%esp),%ebp // pointer to context 231 mov tfm(%esp),%ebp
234 232
235// CAUTION: the order and the values used in these assigns 233// CAUTION: the order and the values used in these assigns
236// rely on the register mappings 234// rely on the register mappings
@@ -295,6 +293,7 @@ aes_enc_blk:
295 ret 293 ret
296 294
297// AES (Rijndael) Decryption Subroutine 295// AES (Rijndael) Decryption Subroutine
296/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
298 297
299.global aes_dec_blk 298.global aes_dec_blk
300 299
@@ -305,7 +304,7 @@ aes_enc_blk:
305 304
306aes_dec_blk: 305aes_dec_blk:
307 push %ebp 306 push %ebp
308 mov ctx(%esp),%ebp // pointer to context 307 mov tfm(%esp),%ebp
309 308
310// CAUTION: the order and the values used in these assigns 309// CAUTION: the order and the values used in these assigns
311// rely on the register mappings 310// rely on the register mappings
diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c
index a50397b1d5c7..d3806daa3de3 100644
--- a/arch/i386/crypto/aes.c
+++ b/arch/i386/crypto/aes.c
@@ -45,8 +45,8 @@
45#include <linux/crypto.h> 45#include <linux/crypto.h>
46#include <linux/linkage.h> 46#include <linux/linkage.h>
47 47
48asmlinkage void aes_enc_blk(const u8 *src, u8 *dst, void *ctx); 48asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
49asmlinkage void aes_dec_blk(const u8 *src, u8 *dst, void *ctx); 49asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
50 50
51#define AES_MIN_KEY_SIZE 16 51#define AES_MIN_KEY_SIZE 16
52#define AES_MAX_KEY_SIZE 32 52#define AES_MAX_KEY_SIZE 32
@@ -378,12 +378,12 @@ static void gen_tabs(void)
378 k[8*(i)+11] = ss[3]; \ 378 k[8*(i)+11] = ss[3]; \
379} 379}
380 380
381static int 381static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
382aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags) 382 unsigned int key_len, u32 *flags)
383{ 383{
384 int i; 384 int i;
385 u32 ss[8]; 385 u32 ss[8];
386 struct aes_ctx *ctx = ctx_arg; 386 struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
387 const __le32 *key = (const __le32 *)in_key; 387 const __le32 *key = (const __le32 *)in_key;
388 388
389 /* encryption schedule */ 389 /* encryption schedule */
@@ -464,16 +464,16 @@ aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags)
464 return 0; 464 return 0;
465} 465}
466 466
467static inline void aes_encrypt(void *ctx, u8 *dst, const u8 *src) 467static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
468{ 468{
469 aes_enc_blk(src, dst, ctx); 469 aes_enc_blk(tfm, dst, src);
470} 470}
471static inline void aes_decrypt(void *ctx, u8 *dst, const u8 *src) 471
472static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
472{ 473{
473 aes_dec_blk(src, dst, ctx); 474 aes_dec_blk(tfm, dst, src);
474} 475}
475 476
476
477static struct crypto_alg aes_alg = { 477static struct crypto_alg aes_alg = {
478 .cra_name = "aes", 478 .cra_name = "aes",
479 .cra_driver_name = "aes-i586", 479 .cra_driver_name = "aes-i586",
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 96fb8a020af2..5e70c2fb273a 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -7,10 +7,9 @@ extra-y := head.o init_task.o vmlinux.lds
7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ 7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
9 pci-dma.o i386_ksyms.o i387.o bootflag.o \ 9 pci-dma.o i386_ksyms.o i387.o bootflag.o \
10 quirks.o i8237.o topology.o alternative.o 10 quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
11 11
12obj-y += cpu/ 12obj-y += cpu/
13obj-y += timers/
14obj-y += acpi/ 13obj-y += acpi/
15obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o 14obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
16obj-$(CONFIG_MCA) += mca.o 15obj-$(CONFIG_MCA) += mca.o
@@ -37,6 +36,8 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
37obj-$(CONFIG_DOUBLEFAULT) += doublefault.o 36obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
38obj-$(CONFIG_VM86) += vm86.o 37obj-$(CONFIG_VM86) += vm86.o
39obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 38obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
39obj-$(CONFIG_HPET_TIMER) += hpet.o
40obj-$(CONFIG_K8_NB) += k8.o
40 41
41EXTRA_AFLAGS := -traditional 42EXTRA_AFLAGS := -traditional
42 43
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r
76$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ 77$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
77 $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE 78 $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
78 $(call if_changed,syscall) 79 $(call if_changed,syscall)
80
81k8-y += ../../x86_64/kernel/k8.o
82
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 5cbd6f99fb2a..50eb0e03777e 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -4,27 +4,41 @@
4#include <asm/alternative.h> 4#include <asm/alternative.h>
5#include <asm/sections.h> 5#include <asm/sections.h>
6 6
7#define DEBUG 0 7static int no_replacement = 0;
8#if DEBUG 8static int smp_alt_once = 0;
9# define DPRINTK(fmt, args...) printk(fmt, args) 9static int debug_alternative = 0;
10#else 10
11# define DPRINTK(fmt, args...) 11static int __init noreplacement_setup(char *s)
12#endif 12{
13 no_replacement = 1;
14 return 1;
15}
16static int __init bootonly(char *str)
17{
18 smp_alt_once = 1;
19 return 1;
20}
21static int __init debug_alt(char *str)
22{
23 debug_alternative = 1;
24 return 1;
25}
13 26
27__setup("noreplacement", noreplacement_setup);
28__setup("smp-alt-boot", bootonly);
29__setup("debug-alternative", debug_alt);
30
31#define DPRINTK(fmt, args...) if (debug_alternative) \
32 printk(KERN_DEBUG fmt, args)
33
34#ifdef GENERIC_NOP1
14/* Use inline assembly to define this because the nops are defined 35/* Use inline assembly to define this because the nops are defined
15 as inline assembly strings in the include files and we cannot 36 as inline assembly strings in the include files and we cannot
16 get them easily into strings. */ 37 get them easily into strings. */
17asm("\t.data\nintelnops: " 38asm("\t.data\nintelnops: "
18 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 39 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
19 GENERIC_NOP7 GENERIC_NOP8); 40 GENERIC_NOP7 GENERIC_NOP8);
20asm("\t.data\nk8nops: " 41extern unsigned char intelnops[];
21 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
22 K8_NOP7 K8_NOP8);
23asm("\t.data\nk7nops: "
24 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
25 K7_NOP7 K7_NOP8);
26
27extern unsigned char intelnops[], k8nops[], k7nops[];
28static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 42static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
29 NULL, 43 NULL,
30 intelnops, 44 intelnops,
@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
36 intelnops + 1 + 2 + 3 + 4 + 5 + 6, 50 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
37 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 51 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
38}; 52};
53#endif
54
55#ifdef K8_NOP1
56asm("\t.data\nk8nops: "
57 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
58 K8_NOP7 K8_NOP8);
59extern unsigned char k8nops[];
39static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 60static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
40 NULL, 61 NULL,
41 k8nops, 62 k8nops,
@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
47 k8nops + 1 + 2 + 3 + 4 + 5 + 6, 68 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
48 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 69 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
49}; 70};
71#endif
72
73#ifdef K7_NOP1
74asm("\t.data\nk7nops: "
75 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
76 K7_NOP7 K7_NOP8);
77extern unsigned char k7nops[];
50static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 78static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
51 NULL, 79 NULL,
52 k7nops, 80 k7nops,
@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
58 k7nops + 1 + 2 + 3 + 4 + 5 + 6, 86 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
59 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 87 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
60}; 88};
89#endif
90
91#ifdef CONFIG_X86_64
92
93extern char __vsyscall_0;
94static inline unsigned char** find_nop_table(void)
95{
96 return k8_nops;
97}
98
99#else /* CONFIG_X86_64 */
100
61static struct nop { 101static struct nop {
62 int cpuid; 102 int cpuid;
63 unsigned char **noptable; 103 unsigned char **noptable;
@@ -67,14 +107,6 @@ static struct nop {
67 { -1, NULL } 107 { -1, NULL }
68}; 108};
69 109
70
71extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
72extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
73extern u8 *__smp_locks[], *__smp_locks_end[];
74
75extern u8 __smp_alt_begin[], __smp_alt_end[];
76
77
78static unsigned char** find_nop_table(void) 110static unsigned char** find_nop_table(void)
79{ 111{
80 unsigned char **noptable = intel_nops; 112 unsigned char **noptable = intel_nops;
@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void)
89 return noptable; 121 return noptable;
90} 122}
91 123
124#endif /* CONFIG_X86_64 */
125
126extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
127extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
128extern u8 *__smp_locks[], *__smp_locks_end[];
129
130extern u8 __smp_alt_begin[], __smp_alt_end[];
131
92/* Replace instructions with better alternatives for this CPU type. 132/* Replace instructions with better alternatives for this CPU type.
93 This runs before SMP is initialized to avoid SMP problems with 133 This runs before SMP is initialized to avoid SMP problems with
94 self modifying code. This implies that assymetric systems where 134 self modifying code. This implies that assymetric systems where
@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
99{ 139{
100 unsigned char **noptable = find_nop_table(); 140 unsigned char **noptable = find_nop_table();
101 struct alt_instr *a; 141 struct alt_instr *a;
142 u8 *instr;
102 int diff, i, k; 143 int diff, i, k;
103 144
104 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); 145 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
106 BUG_ON(a->replacementlen > a->instrlen); 147 BUG_ON(a->replacementlen > a->instrlen);
107 if (!boot_cpu_has(a->cpuid)) 148 if (!boot_cpu_has(a->cpuid))
108 continue; 149 continue;
109 memcpy(a->instr, a->replacement, a->replacementlen); 150 instr = a->instr;
151#ifdef CONFIG_X86_64
152 /* vsyscall code is not mapped yet. resolve it manually. */
153 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
154 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
155 DPRINTK("%s: vsyscall fixup: %p => %p\n",
156 __FUNCTION__, a->instr, instr);
157 }
158#endif
159 memcpy(instr, a->replacement, a->replacementlen);
110 diff = a->instrlen - a->replacementlen; 160 diff = a->instrlen - a->replacementlen;
111 /* Pad the rest with nops */ 161 /* Pad the rest with nops */
112 for (i = a->replacementlen; diff > 0; diff -= k, i += k) { 162 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
@@ -186,14 +236,6 @@ struct smp_alt_module {
186static LIST_HEAD(smp_alt_modules); 236static LIST_HEAD(smp_alt_modules);
187static DEFINE_SPINLOCK(smp_alt); 237static DEFINE_SPINLOCK(smp_alt);
188 238
189static int smp_alt_once = 0;
190static int __init bootonly(char *str)
191{
192 smp_alt_once = 1;
193 return 1;
194}
195__setup("smp-alt-boot", bootonly);
196
197void alternatives_smp_module_add(struct module *mod, char *name, 239void alternatives_smp_module_add(struct module *mod, char *name,
198 void *locks, void *locks_end, 240 void *locks, void *locks_end,
199 void *text, void *text_end) 241 void *text, void *text_end)
@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
201 struct smp_alt_module *smp; 243 struct smp_alt_module *smp;
202 unsigned long flags; 244 unsigned long flags;
203 245
246 if (no_replacement)
247 return;
248
204 if (smp_alt_once) { 249 if (smp_alt_once) {
205 if (boot_cpu_has(X86_FEATURE_UP)) 250 if (boot_cpu_has(X86_FEATURE_UP))
206 alternatives_smp_unlock(locks, locks_end, 251 alternatives_smp_unlock(locks, locks_end,
@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod)
235 struct smp_alt_module *item; 280 struct smp_alt_module *item;
236 unsigned long flags; 281 unsigned long flags;
237 282
238 if (smp_alt_once) 283 if (no_replacement || smp_alt_once)
239 return; 284 return;
240 285
241 spin_lock_irqsave(&smp_alt, flags); 286 spin_lock_irqsave(&smp_alt, flags);
@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp)
256 struct smp_alt_module *mod; 301 struct smp_alt_module *mod;
257 unsigned long flags; 302 unsigned long flags;
258 303
259 if (smp_alt_once) 304 if (no_replacement || smp_alt_once)
260 return; 305 return;
261 BUG_ON(!smp && (num_online_cpus() > 1)); 306 BUG_ON(!smp && (num_online_cpus() > 1));
262 307
@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp)
285 330
286void __init alternative_instructions(void) 331void __init alternative_instructions(void)
287{ 332{
333 if (no_replacement) {
334 printk(KERN_INFO "(SMP-)alternatives turned off\n");
335 free_init_pages("SMP alternatives",
336 (unsigned long)__smp_alt_begin,
337 (unsigned long)__smp_alt_end);
338 return;
339 }
288 apply_alternatives(__alt_instructions, __alt_instructions_end); 340 apply_alternatives(__alt_instructions, __alt_instructions_end);
289 341
290 /* switch to patch-once-at-boottime-only mode and free the 342 /* switch to patch-once-at-boottime-only mode and free the
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 5ab59c12335b..7ce09492fc0c 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -36,6 +36,7 @@
36#include <asm/arch_hooks.h> 36#include <asm/arch_hooks.h>
37#include <asm/hpet.h> 37#include <asm/hpet.h>
38#include <asm/i8253.h> 38#include <asm/i8253.h>
39#include <asm/nmi.h>
39 40
40#include <mach_apic.h> 41#include <mach_apic.h>
41#include <mach_apicdef.h> 42#include <mach_apicdef.h>
@@ -156,7 +157,7 @@ void clear_local_APIC(void)
156 maxlvt = get_maxlvt(); 157 maxlvt = get_maxlvt();
157 158
158 /* 159 /*
159 * Masking an LVT entry on a P6 can trigger a local APIC error 160 * Masking an LVT entry can trigger a local APIC error
160 * if the vector is zero. Mask LVTERR first to prevent this. 161 * if the vector is zero. Mask LVTERR first to prevent this.
161 */ 162 */
162 if (maxlvt >= 3) { 163 if (maxlvt >= 3) {
@@ -1117,7 +1118,18 @@ void disable_APIC_timer(void)
1117 unsigned long v; 1118 unsigned long v;
1118 1119
1119 v = apic_read(APIC_LVTT); 1120 v = apic_read(APIC_LVTT);
1120 apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); 1121 /*
1122 * When an illegal vector value (0-15) is written to an LVT
1123 * entry and delivery mode is Fixed, the APIC may signal an
1124 * illegal vector error, with out regard to whether the mask
1125 * bit is set or whether an interrupt is actually seen on input.
1126 *
1127 * Boot sequence might call this function when the LVTT has
1128 * '0' vector value. So make sure vector field is set to
1129 * valid value.
1130 */
1131 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1132 apic_write_around(APIC_LVTT, v);
1121 } 1133 }
1122} 1134}
1123 1135
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 9e819eb68229..7c5729d1fd06 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -764,9 +764,9 @@ static int apm_do_idle(void)
764 int idled = 0; 764 int idled = 0;
765 int polling; 765 int polling;
766 766
767 polling = test_thread_flag(TIF_POLLING_NRFLAG); 767 polling = !!(current_thread_info()->status & TS_POLLING);
768 if (polling) { 768 if (polling) {
769 clear_thread_flag(TIF_POLLING_NRFLAG); 769 current_thread_info()->status &= ~TS_POLLING;
770 smp_mb__after_clear_bit(); 770 smp_mb__after_clear_bit();
771 } 771 }
772 if (!need_resched()) { 772 if (!need_resched()) {
@@ -774,7 +774,7 @@ static int apm_do_idle(void)
774 ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); 774 ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
775 } 775 }
776 if (polling) 776 if (polling)
777 set_thread_flag(TIF_POLLING_NRFLAG); 777 current_thread_info()->status |= TS_POLLING;
778 778
779 if (!idled) 779 if (!idled)
780 return 0; 780 return 0;
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 36d66e2077d0..1c3a809e6421 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -4,6 +4,7 @@
4 * to extract and format the required data. 4 * to extract and format the required data.
5 */ 5 */
6 6
7#include <linux/crypto.h>
7#include <linux/sched.h> 8#include <linux/sched.h>
8#include <linux/signal.h> 9#include <linux/signal.h>
9#include <linux/personality.h> 10#include <linux/personality.h>
@@ -69,4 +70,6 @@ void foo(void)
69 70
70 DEFINE(PAGE_SIZE_asm, PAGE_SIZE); 71 DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
71 DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); 72 DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
73
74 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
72} 75}
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 786d1a57048b..fd0457c9c827 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -224,15 +224,17 @@ static void __init init_amd(struct cpuinfo_x86 *c)
224 224
225#ifdef CONFIG_X86_HT 225#ifdef CONFIG_X86_HT
226 /* 226 /*
227 * On a AMD dual core setup the lower bits of the APIC id 227 * On a AMD multi core setup the lower bits of the APIC id
228 * distingush the cores. Assumes number of cores is a power 228 * distingush the cores.
229 * of two.
230 */ 229 */
231 if (c->x86_max_cores > 1) { 230 if (c->x86_max_cores > 1) {
232 int cpu = smp_processor_id(); 231 int cpu = smp_processor_id();
233 unsigned bits = 0; 232 unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
234 while ((1 << bits) < c->x86_max_cores) 233
235 bits++; 234 if (bits == 0) {
235 while ((1 << bits) < c->x86_max_cores)
236 bits++;
237 }
236 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); 238 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
237 phys_proc_id[cpu] >>= bits; 239 phys_proc_id[cpu] >>= bits;
238 printk(KERN_INFO "CPU %d(%d) -> Core %d\n", 240 printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
@@ -240,6 +242,8 @@ static void __init init_amd(struct cpuinfo_x86 *c)
240 } 242 }
241#endif 243#endif
242 244
245 if (cpuid_eax(0x80000000) >= 0x80000006)
246 num_cache_leaves = 3;
243} 247}
244 248
245static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) 249static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index fc32c8028e24..f03b7f94c304 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -354,7 +354,7 @@ static void __init init_nsc(struct cpuinfo_x86 *c)
354 * This function only handles the GX processor, and kicks every 354 * This function only handles the GX processor, and kicks every
355 * thing else to the Cyrix init function above - that should 355 * thing else to the Cyrix init function above - that should
356 * cover any processors that might have been branded differently 356 * cover any processors that might have been branded differently
357 * after NSC aquired Cyrix. 357 * after NSC acquired Cyrix.
358 * 358 *
359 * If this breaks your GX1 horribly, please e-mail 359 * If this breaks your GX1 horribly, please e-mail
360 * info-linux@ldcmail.amd.com to tell us. 360 * info-linux@ldcmail.amd.com to tell us.
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 5386b29bb5a5..10afc645c540 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
122 122
123 select_idle_routine(c); 123 select_idle_routine(c);
124 l2 = init_intel_cacheinfo(c); 124 l2 = init_intel_cacheinfo(c);
125 if (c->cpuid_level > 9 ) {
126 unsigned eax = cpuid_eax(10);
127 /* Check for version and the number of counters */
128 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
129 set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
130 }
125 131
126 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ 132 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
127 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) 133 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index c8547a6fa7e6..6c37b4fd8ce2 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -4,6 +4,7 @@
4 * Changes: 4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen : CPUID4 emulation on AMD.
7 */ 8 */
8 9
9#include <linux/init.h> 10#include <linux/init.h>
@@ -130,25 +131,111 @@ struct _cpuid4_info {
130 cpumask_t shared_cpu_map; 131 cpumask_t shared_cpu_map;
131}; 132};
132 133
133static unsigned short num_cache_leaves; 134unsigned short num_cache_leaves;
135
136/* AMD doesn't have CPUID4. Emulate it here to report the same
137 information to the user. This makes some assumptions about the machine:
138 No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
139
140 In theory the TLBs could be reported as fake type (they are in "dummy").
141 Maybe later */
142union l1_cache {
143 struct {
144 unsigned line_size : 8;
145 unsigned lines_per_tag : 8;
146 unsigned assoc : 8;
147 unsigned size_in_kb : 8;
148 };
149 unsigned val;
150};
151
152union l2_cache {
153 struct {
154 unsigned line_size : 8;
155 unsigned lines_per_tag : 4;
156 unsigned assoc : 4;
157 unsigned size_in_kb : 16;
158 };
159 unsigned val;
160};
161
162static unsigned short assocs[] = {
163 [1] = 1, [2] = 2, [4] = 4, [6] = 8,
164 [8] = 16,
165 [0xf] = 0xffff // ??
166 };
167static unsigned char levels[] = { 1, 1, 2 };
168static unsigned char types[] = { 1, 2, 3 };
169
170static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
171 union _cpuid4_leaf_ebx *ebx,
172 union _cpuid4_leaf_ecx *ecx)
173{
174 unsigned dummy;
175 unsigned line_size, lines_per_tag, assoc, size_in_kb;
176 union l1_cache l1i, l1d;
177 union l2_cache l2;
178
179 eax->full = 0;
180 ebx->full = 0;
181 ecx->full = 0;
182
183 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
184 cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
185
186 if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
187 return;
188
189 eax->split.is_self_initializing = 1;
190 eax->split.type = types[leaf];
191 eax->split.level = levels[leaf];
192 eax->split.num_threads_sharing = 0;
193 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
194
195 if (leaf <= 1) {
196 union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
197 assoc = l1->assoc;
198 line_size = l1->line_size;
199 lines_per_tag = l1->lines_per_tag;
200 size_in_kb = l1->size_in_kb;
201 } else {
202 assoc = l2.assoc;
203 line_size = l2.line_size;
204 lines_per_tag = l2.lines_per_tag;
205 /* cpu_data has errata corrections for K7 applied */
206 size_in_kb = current_cpu_data.x86_cache_size;
207 }
208
209 if (assoc == 0xf)
210 eax->split.is_fully_associative = 1;
211 ebx->split.coherency_line_size = line_size - 1;
212 ebx->split.ways_of_associativity = assocs[assoc] - 1;
213 ebx->split.physical_line_partition = lines_per_tag - 1;
214 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
215 (ebx->split.ways_of_associativity + 1) - 1;
216}
134 217
135static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 218static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
136{ 219{
137 unsigned int eax, ebx, ecx, edx; 220 union _cpuid4_leaf_eax eax;
138 union _cpuid4_leaf_eax cache_eax; 221 union _cpuid4_leaf_ebx ebx;
222 union _cpuid4_leaf_ecx ecx;
223 unsigned edx;
139 224
140 cpuid_count(4, index, &eax, &ebx, &ecx, &edx); 225 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
141 cache_eax.full = eax; 226 amd_cpuid4(index, &eax, &ebx, &ecx);
142 if (cache_eax.split.type == CACHE_TYPE_NULL) 227 else
228 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
229 if (eax.split.type == CACHE_TYPE_NULL)
143 return -EIO; /* better error ? */ 230 return -EIO; /* better error ? */
144 231
145 this_leaf->eax.full = eax; 232 this_leaf->eax = eax;
146 this_leaf->ebx.full = ebx; 233 this_leaf->ebx = ebx;
147 this_leaf->ecx.full = ecx; 234 this_leaf->ecx = ecx;
148 this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) * 235 this_leaf->size = (ecx.split.number_of_sets + 1) *
149 (this_leaf->ebx.split.coherency_line_size + 1) * 236 (ebx.split.coherency_line_size + 1) *
150 (this_leaf->ebx.split.physical_line_partition + 1) * 237 (ebx.split.physical_line_partition + 1) *
151 (this_leaf->ebx.split.ways_of_associativity + 1); 238 (ebx.split.ways_of_associativity + 1);
152 return 0; 239 return 0;
153} 240}
154 241
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index 21dc1bbb8067..48f0f62f781c 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -120,14 +120,9 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
120 return 1; 120 return 1;
121} 121}
122 122
123/*
124 * By using the NMI code instead of a vector we just sneak thru the
125 * word generator coming out with just what we want. AND it does
126 * not matter if clustered_apic_mode is set or not.
127 */
128static void smp_send_nmi_allbutself(void) 123static void smp_send_nmi_allbutself(void)
129{ 124{
130 send_IPI_allbutself(APIC_DM_NMI); 125 send_IPI_allbutself(NMI_VECTOR);
131} 126}
132 127
133static void nmi_shootdown_cpus(void) 128static void nmi_shootdown_cpus(void)
@@ -163,7 +158,7 @@ static void nmi_shootdown_cpus(void)
163void machine_crash_shutdown(struct pt_regs *regs) 158void machine_crash_shutdown(struct pt_regs *regs)
164{ 159{
165 /* This function is only called after the system 160 /* This function is only called after the system
166 * has paniced or is otherwise in a critical state. 161 * has panicked or is otherwise in a critical state.
167 * The minimum amount of code to allow a kexec'd kernel 162 * The minimum amount of code to allow a kexec'd kernel
168 * to run successfully needs to happen here. 163 * to run successfully needs to happen here.
169 * 164 *
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index cfc683f153b9..e6e4506e749a 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -48,6 +48,7 @@
48#include <asm/smp.h> 48#include <asm/smp.h>
49#include <asm/page.h> 49#include <asm/page.h>
50#include <asm/desc.h> 50#include <asm/desc.h>
51#include <asm/dwarf2.h>
51#include "irq_vectors.h" 52#include "irq_vectors.h"
52 53
53#define nr_syscalls ((syscall_table_size)/4) 54#define nr_syscalls ((syscall_table_size)/4)
@@ -85,31 +86,67 @@ VM_MASK = 0x00020000
85#define SAVE_ALL \ 86#define SAVE_ALL \
86 cld; \ 87 cld; \
87 pushl %es; \ 88 pushl %es; \
89 CFI_ADJUST_CFA_OFFSET 4;\
90 /*CFI_REL_OFFSET es, 0;*/\
88 pushl %ds; \ 91 pushl %ds; \
92 CFI_ADJUST_CFA_OFFSET 4;\
93 /*CFI_REL_OFFSET ds, 0;*/\
89 pushl %eax; \ 94 pushl %eax; \
95 CFI_ADJUST_CFA_OFFSET 4;\
96 CFI_REL_OFFSET eax, 0;\
90 pushl %ebp; \ 97 pushl %ebp; \
98 CFI_ADJUST_CFA_OFFSET 4;\
99 CFI_REL_OFFSET ebp, 0;\
91 pushl %edi; \ 100 pushl %edi; \
101 CFI_ADJUST_CFA_OFFSET 4;\
102 CFI_REL_OFFSET edi, 0;\
92 pushl %esi; \ 103 pushl %esi; \
104 CFI_ADJUST_CFA_OFFSET 4;\
105 CFI_REL_OFFSET esi, 0;\
93 pushl %edx; \ 106 pushl %edx; \
107 CFI_ADJUST_CFA_OFFSET 4;\
108 CFI_REL_OFFSET edx, 0;\
94 pushl %ecx; \ 109 pushl %ecx; \
110 CFI_ADJUST_CFA_OFFSET 4;\
111 CFI_REL_OFFSET ecx, 0;\
95 pushl %ebx; \ 112 pushl %ebx; \
113 CFI_ADJUST_CFA_OFFSET 4;\
114 CFI_REL_OFFSET ebx, 0;\
96 movl $(__USER_DS), %edx; \ 115 movl $(__USER_DS), %edx; \
97 movl %edx, %ds; \ 116 movl %edx, %ds; \
98 movl %edx, %es; 117 movl %edx, %es;
99 118
100#define RESTORE_INT_REGS \ 119#define RESTORE_INT_REGS \
101 popl %ebx; \ 120 popl %ebx; \
121 CFI_ADJUST_CFA_OFFSET -4;\
122 CFI_RESTORE ebx;\
102 popl %ecx; \ 123 popl %ecx; \
124 CFI_ADJUST_CFA_OFFSET -4;\
125 CFI_RESTORE ecx;\
103 popl %edx; \ 126 popl %edx; \
127 CFI_ADJUST_CFA_OFFSET -4;\
128 CFI_RESTORE edx;\
104 popl %esi; \ 129 popl %esi; \
130 CFI_ADJUST_CFA_OFFSET -4;\
131 CFI_RESTORE esi;\
105 popl %edi; \ 132 popl %edi; \
133 CFI_ADJUST_CFA_OFFSET -4;\
134 CFI_RESTORE edi;\
106 popl %ebp; \ 135 popl %ebp; \
107 popl %eax 136 CFI_ADJUST_CFA_OFFSET -4;\
137 CFI_RESTORE ebp;\
138 popl %eax; \
139 CFI_ADJUST_CFA_OFFSET -4;\
140 CFI_RESTORE eax
108 141
109#define RESTORE_REGS \ 142#define RESTORE_REGS \
110 RESTORE_INT_REGS; \ 143 RESTORE_INT_REGS; \
1111: popl %ds; \ 1441: popl %ds; \
145 CFI_ADJUST_CFA_OFFSET -4;\
146 /*CFI_RESTORE ds;*/\
1122: popl %es; \ 1472: popl %es; \
148 CFI_ADJUST_CFA_OFFSET -4;\
149 /*CFI_RESTORE es;*/\
113.section .fixup,"ax"; \ 150.section .fixup,"ax"; \
1143: movl $0,(%esp); \ 1513: movl $0,(%esp); \
115 jmp 1b; \ 152 jmp 1b; \
@@ -122,13 +159,43 @@ VM_MASK = 0x00020000
122 .long 2b,4b; \ 159 .long 2b,4b; \
123.previous 160.previous
124 161
162#define RING0_INT_FRAME \
163 CFI_STARTPROC simple;\
164 CFI_DEF_CFA esp, 3*4;\
165 /*CFI_OFFSET cs, -2*4;*/\
166 CFI_OFFSET eip, -3*4
167
168#define RING0_EC_FRAME \
169 CFI_STARTPROC simple;\
170 CFI_DEF_CFA esp, 4*4;\
171 /*CFI_OFFSET cs, -2*4;*/\
172 CFI_OFFSET eip, -3*4
173
174#define RING0_PTREGS_FRAME \
175 CFI_STARTPROC simple;\
176 CFI_DEF_CFA esp, OLDESP-EBX;\
177 /*CFI_OFFSET cs, CS-OLDESP;*/\
178 CFI_OFFSET eip, EIP-OLDESP;\
179 /*CFI_OFFSET es, ES-OLDESP;*/\
180 /*CFI_OFFSET ds, DS-OLDESP;*/\
181 CFI_OFFSET eax, EAX-OLDESP;\
182 CFI_OFFSET ebp, EBP-OLDESP;\
183 CFI_OFFSET edi, EDI-OLDESP;\
184 CFI_OFFSET esi, ESI-OLDESP;\
185 CFI_OFFSET edx, EDX-OLDESP;\
186 CFI_OFFSET ecx, ECX-OLDESP;\
187 CFI_OFFSET ebx, EBX-OLDESP
125 188
126ENTRY(ret_from_fork) 189ENTRY(ret_from_fork)
190 CFI_STARTPROC
127 pushl %eax 191 pushl %eax
192 CFI_ADJUST_CFA_OFFSET -4
128 call schedule_tail 193 call schedule_tail
129 GET_THREAD_INFO(%ebp) 194 GET_THREAD_INFO(%ebp)
130 popl %eax 195 popl %eax
196 CFI_ADJUST_CFA_OFFSET -4
131 jmp syscall_exit 197 jmp syscall_exit
198 CFI_ENDPROC
132 199
133/* 200/*
134 * Return to user mode is not as complex as all this looks, 201 * Return to user mode is not as complex as all this looks,
@@ -139,6 +206,7 @@ ENTRY(ret_from_fork)
139 206
140 # userspace resumption stub bypassing syscall exit tracing 207 # userspace resumption stub bypassing syscall exit tracing
141 ALIGN 208 ALIGN
209 RING0_PTREGS_FRAME
142ret_from_exception: 210ret_from_exception:
143 preempt_stop 211 preempt_stop
144ret_from_intr: 212ret_from_intr:
@@ -171,20 +239,33 @@ need_resched:
171 call preempt_schedule_irq 239 call preempt_schedule_irq
172 jmp need_resched 240 jmp need_resched
173#endif 241#endif
242 CFI_ENDPROC
174 243
175/* SYSENTER_RETURN points to after the "sysenter" instruction in 244/* SYSENTER_RETURN points to after the "sysenter" instruction in
176 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ 245 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
177 246
178 # sysenter call handler stub 247 # sysenter call handler stub
179ENTRY(sysenter_entry) 248ENTRY(sysenter_entry)
249 CFI_STARTPROC simple
250 CFI_DEF_CFA esp, 0
251 CFI_REGISTER esp, ebp
180 movl TSS_sysenter_esp0(%esp),%esp 252 movl TSS_sysenter_esp0(%esp),%esp
181sysenter_past_esp: 253sysenter_past_esp:
182 sti 254 sti
183 pushl $(__USER_DS) 255 pushl $(__USER_DS)
256 CFI_ADJUST_CFA_OFFSET 4
257 /*CFI_REL_OFFSET ss, 0*/
184 pushl %ebp 258 pushl %ebp
259 CFI_ADJUST_CFA_OFFSET 4
260 CFI_REL_OFFSET esp, 0
185 pushfl 261 pushfl
262 CFI_ADJUST_CFA_OFFSET 4
186 pushl $(__USER_CS) 263 pushl $(__USER_CS)
264 CFI_ADJUST_CFA_OFFSET 4
265 /*CFI_REL_OFFSET cs, 0*/
187 pushl $SYSENTER_RETURN 266 pushl $SYSENTER_RETURN
267 CFI_ADJUST_CFA_OFFSET 4
268 CFI_REL_OFFSET eip, 0
188 269
189/* 270/*
190 * Load the potential sixth argument from user stack. 271 * Load the potential sixth argument from user stack.
@@ -199,6 +280,7 @@ sysenter_past_esp:
199.previous 280.previous
200 281
201 pushl %eax 282 pushl %eax
283 CFI_ADJUST_CFA_OFFSET 4
202 SAVE_ALL 284 SAVE_ALL
203 GET_THREAD_INFO(%ebp) 285 GET_THREAD_INFO(%ebp)
204 286
@@ -219,11 +301,14 @@ sysenter_past_esp:
219 xorl %ebp,%ebp 301 xorl %ebp,%ebp
220 sti 302 sti
221 sysexit 303 sysexit
304 CFI_ENDPROC
222 305
223 306
224 # system call handler stub 307 # system call handler stub
225ENTRY(system_call) 308ENTRY(system_call)
309 RING0_INT_FRAME # can't unwind into user space anyway
226 pushl %eax # save orig_eax 310 pushl %eax # save orig_eax
311 CFI_ADJUST_CFA_OFFSET 4
227 SAVE_ALL 312 SAVE_ALL
228 GET_THREAD_INFO(%ebp) 313 GET_THREAD_INFO(%ebp)
229 testl $TF_MASK,EFLAGS(%esp) 314 testl $TF_MASK,EFLAGS(%esp)
@@ -256,10 +341,12 @@ restore_all:
256 movb CS(%esp), %al 341 movb CS(%esp), %al
257 andl $(VM_MASK | (4 << 8) | 3), %eax 342 andl $(VM_MASK | (4 << 8) | 3), %eax
258 cmpl $((4 << 8) | 3), %eax 343 cmpl $((4 << 8) | 3), %eax
344 CFI_REMEMBER_STATE
259 je ldt_ss # returning to user-space with LDT SS 345 je ldt_ss # returning to user-space with LDT SS
260restore_nocheck: 346restore_nocheck:
261 RESTORE_REGS 347 RESTORE_REGS
262 addl $4, %esp 348 addl $4, %esp
349 CFI_ADJUST_CFA_OFFSET -4
2631: iret 3501: iret
264.section .fixup,"ax" 351.section .fixup,"ax"
265iret_exc: 352iret_exc:
@@ -273,6 +360,7 @@ iret_exc:
273 .long 1b,iret_exc 360 .long 1b,iret_exc
274.previous 361.previous
275 362
363 CFI_RESTORE_STATE
276ldt_ss: 364ldt_ss:
277 larl OLDSS(%esp), %eax 365 larl OLDSS(%esp), %eax
278 jnz restore_nocheck 366 jnz restore_nocheck
@@ -285,11 +373,13 @@ ldt_ss:
285 * CPUs, which we can try to work around to make 373 * CPUs, which we can try to work around to make
286 * dosemu and wine happy. */ 374 * dosemu and wine happy. */
287 subl $8, %esp # reserve space for switch16 pointer 375 subl $8, %esp # reserve space for switch16 pointer
376 CFI_ADJUST_CFA_OFFSET 8
288 cli 377 cli
289 movl %esp, %eax 378 movl %esp, %eax
290 /* Set up the 16bit stack frame with switch32 pointer on top, 379 /* Set up the 16bit stack frame with switch32 pointer on top,
291 * and a switch16 pointer on top of the current frame. */ 380 * and a switch16 pointer on top of the current frame. */
292 call setup_x86_bogus_stack 381 call setup_x86_bogus_stack
382 CFI_ADJUST_CFA_OFFSET -8 # frame has moved
293 RESTORE_REGS 383 RESTORE_REGS
294 lss 20+4(%esp), %esp # switch to 16bit stack 384 lss 20+4(%esp), %esp # switch to 16bit stack
2951: iret 3851: iret
@@ -297,9 +387,11 @@ ldt_ss:
297 .align 4 387 .align 4
298 .long 1b,iret_exc 388 .long 1b,iret_exc
299.previous 389.previous
390 CFI_ENDPROC
300 391
301 # perform work that needs to be done immediately before resumption 392 # perform work that needs to be done immediately before resumption
302 ALIGN 393 ALIGN
394 RING0_PTREGS_FRAME # can't unwind into user space anyway
303work_pending: 395work_pending:
304 testb $_TIF_NEED_RESCHED, %cl 396 testb $_TIF_NEED_RESCHED, %cl
305 jz work_notifysig 397 jz work_notifysig
@@ -329,8 +421,10 @@ work_notifysig: # deal with pending signals and
329work_notifysig_v86: 421work_notifysig_v86:
330#ifdef CONFIG_VM86 422#ifdef CONFIG_VM86
331 pushl %ecx # save ti_flags for do_notify_resume 423 pushl %ecx # save ti_flags for do_notify_resume
424 CFI_ADJUST_CFA_OFFSET 4
332 call save_v86_state # %eax contains pt_regs pointer 425 call save_v86_state # %eax contains pt_regs pointer
333 popl %ecx 426 popl %ecx
427 CFI_ADJUST_CFA_OFFSET -4
334 movl %eax, %esp 428 movl %eax, %esp
335 xorl %edx, %edx 429 xorl %edx, %edx
336 call do_notify_resume 430 call do_notify_resume
@@ -363,19 +457,21 @@ syscall_exit_work:
363 movl $1, %edx 457 movl $1, %edx
364 call do_syscall_trace 458 call do_syscall_trace
365 jmp resume_userspace 459 jmp resume_userspace
460 CFI_ENDPROC
366 461
367 ALIGN 462 RING0_INT_FRAME # can't unwind into user space anyway
368syscall_fault: 463syscall_fault:
369 pushl %eax # save orig_eax 464 pushl %eax # save orig_eax
465 CFI_ADJUST_CFA_OFFSET 4
370 SAVE_ALL 466 SAVE_ALL
371 GET_THREAD_INFO(%ebp) 467 GET_THREAD_INFO(%ebp)
372 movl $-EFAULT,EAX(%esp) 468 movl $-EFAULT,EAX(%esp)
373 jmp resume_userspace 469 jmp resume_userspace
374 470
375 ALIGN
376syscall_badsys: 471syscall_badsys:
377 movl $-ENOSYS,EAX(%esp) 472 movl $-ENOSYS,EAX(%esp)
378 jmp resume_userspace 473 jmp resume_userspace
474 CFI_ENDPROC
379 475
380#define FIXUP_ESPFIX_STACK \ 476#define FIXUP_ESPFIX_STACK \
381 movl %esp, %eax; \ 477 movl %esp, %eax; \
@@ -387,16 +483,21 @@ syscall_badsys:
387 movl %eax, %esp; 483 movl %eax, %esp;
388#define UNWIND_ESPFIX_STACK \ 484#define UNWIND_ESPFIX_STACK \
389 pushl %eax; \ 485 pushl %eax; \
486 CFI_ADJUST_CFA_OFFSET 4; \
390 movl %ss, %eax; \ 487 movl %ss, %eax; \
391 /* see if on 16bit stack */ \ 488 /* see if on 16bit stack */ \
392 cmpw $__ESPFIX_SS, %ax; \ 489 cmpw $__ESPFIX_SS, %ax; \
393 jne 28f; \ 490 je 28f; \
394 movl $__KERNEL_DS, %edx; \ 49127: popl %eax; \
395 movl %edx, %ds; \ 492 CFI_ADJUST_CFA_OFFSET -4; \
396 movl %edx, %es; \ 493.section .fixup,"ax"; \
49428: movl $__KERNEL_DS, %eax; \
495 movl %eax, %ds; \
496 movl %eax, %es; \
397 /* switch to 32bit stack */ \ 497 /* switch to 32bit stack */ \
398 FIXUP_ESPFIX_STACK \ 498 FIXUP_ESPFIX_STACK; \
39928: popl %eax; 499 jmp 27b; \
500.previous
400 501
401/* 502/*
402 * Build the entry stubs and pointer table with 503 * Build the entry stubs and pointer table with
@@ -408,9 +509,14 @@ ENTRY(interrupt)
408 509
409vector=0 510vector=0
410ENTRY(irq_entries_start) 511ENTRY(irq_entries_start)
512 RING0_INT_FRAME
411.rept NR_IRQS 513.rept NR_IRQS
412 ALIGN 514 ALIGN
515 .if vector
516 CFI_ADJUST_CFA_OFFSET -4
517 .endif
4131: pushl $vector-256 5181: pushl $vector-256
519 CFI_ADJUST_CFA_OFFSET 4
414 jmp common_interrupt 520 jmp common_interrupt
415.data 521.data
416 .long 1b 522 .long 1b
@@ -424,60 +530,99 @@ common_interrupt:
424 movl %esp,%eax 530 movl %esp,%eax
425 call do_IRQ 531 call do_IRQ
426 jmp ret_from_intr 532 jmp ret_from_intr
533 CFI_ENDPROC
427 534
428#define BUILD_INTERRUPT(name, nr) \ 535#define BUILD_INTERRUPT(name, nr) \
429ENTRY(name) \ 536ENTRY(name) \
537 RING0_INT_FRAME; \
430 pushl $nr-256; \ 538 pushl $nr-256; \
431 SAVE_ALL \ 539 CFI_ADJUST_CFA_OFFSET 4; \
540 SAVE_ALL; \
432 movl %esp,%eax; \ 541 movl %esp,%eax; \
433 call smp_/**/name; \ 542 call smp_/**/name; \
434 jmp ret_from_intr; 543 jmp ret_from_intr; \
544 CFI_ENDPROC
435 545
436/* The include is where all of the SMP etc. interrupts come from */ 546/* The include is where all of the SMP etc. interrupts come from */
437#include "entry_arch.h" 547#include "entry_arch.h"
438 548
439ENTRY(divide_error) 549ENTRY(divide_error)
550 RING0_INT_FRAME
440 pushl $0 # no error code 551 pushl $0 # no error code
552 CFI_ADJUST_CFA_OFFSET 4
441 pushl $do_divide_error 553 pushl $do_divide_error
554 CFI_ADJUST_CFA_OFFSET 4
442 ALIGN 555 ALIGN
443error_code: 556error_code:
444 pushl %ds 557 pushl %ds
558 CFI_ADJUST_CFA_OFFSET 4
559 /*CFI_REL_OFFSET ds, 0*/
445 pushl %eax 560 pushl %eax
561 CFI_ADJUST_CFA_OFFSET 4
562 CFI_REL_OFFSET eax, 0
446 xorl %eax, %eax 563 xorl %eax, %eax
447 pushl %ebp 564 pushl %ebp
565 CFI_ADJUST_CFA_OFFSET 4
566 CFI_REL_OFFSET ebp, 0
448 pushl %edi 567 pushl %edi
568 CFI_ADJUST_CFA_OFFSET 4
569 CFI_REL_OFFSET edi, 0
449 pushl %esi 570 pushl %esi
571 CFI_ADJUST_CFA_OFFSET 4
572 CFI_REL_OFFSET esi, 0
450 pushl %edx 573 pushl %edx
574 CFI_ADJUST_CFA_OFFSET 4
575 CFI_REL_OFFSET edx, 0
451 decl %eax # eax = -1 576 decl %eax # eax = -1
452 pushl %ecx 577 pushl %ecx
578 CFI_ADJUST_CFA_OFFSET 4
579 CFI_REL_OFFSET ecx, 0
453 pushl %ebx 580 pushl %ebx
581 CFI_ADJUST_CFA_OFFSET 4
582 CFI_REL_OFFSET ebx, 0
454 cld 583 cld
455 pushl %es 584 pushl %es
585 CFI_ADJUST_CFA_OFFSET 4
586 /*CFI_REL_OFFSET es, 0*/
456 UNWIND_ESPFIX_STACK 587 UNWIND_ESPFIX_STACK
457 popl %ecx 588 popl %ecx
589 CFI_ADJUST_CFA_OFFSET -4
590 /*CFI_REGISTER es, ecx*/
458 movl ES(%esp), %edi # get the function address 591 movl ES(%esp), %edi # get the function address
459 movl ORIG_EAX(%esp), %edx # get the error code 592 movl ORIG_EAX(%esp), %edx # get the error code
460 movl %eax, ORIG_EAX(%esp) 593 movl %eax, ORIG_EAX(%esp)
461 movl %ecx, ES(%esp) 594 movl %ecx, ES(%esp)
595 /*CFI_REL_OFFSET es, ES*/
462 movl $(__USER_DS), %ecx 596 movl $(__USER_DS), %ecx
463 movl %ecx, %ds 597 movl %ecx, %ds
464 movl %ecx, %es 598 movl %ecx, %es
465 movl %esp,%eax # pt_regs pointer 599 movl %esp,%eax # pt_regs pointer
466 call *%edi 600 call *%edi
467 jmp ret_from_exception 601 jmp ret_from_exception
602 CFI_ENDPROC
468 603
469ENTRY(coprocessor_error) 604ENTRY(coprocessor_error)
605 RING0_INT_FRAME
470 pushl $0 606 pushl $0
607 CFI_ADJUST_CFA_OFFSET 4
471 pushl $do_coprocessor_error 608 pushl $do_coprocessor_error
609 CFI_ADJUST_CFA_OFFSET 4
472 jmp error_code 610 jmp error_code
611 CFI_ENDPROC
473 612
474ENTRY(simd_coprocessor_error) 613ENTRY(simd_coprocessor_error)
614 RING0_INT_FRAME
475 pushl $0 615 pushl $0
616 CFI_ADJUST_CFA_OFFSET 4
476 pushl $do_simd_coprocessor_error 617 pushl $do_simd_coprocessor_error
618 CFI_ADJUST_CFA_OFFSET 4
477 jmp error_code 619 jmp error_code
620 CFI_ENDPROC
478 621
479ENTRY(device_not_available) 622ENTRY(device_not_available)
623 RING0_INT_FRAME
480 pushl $-1 # mark this as an int 624 pushl $-1 # mark this as an int
625 CFI_ADJUST_CFA_OFFSET 4
481 SAVE_ALL 626 SAVE_ALL
482 movl %cr0, %eax 627 movl %cr0, %eax
483 testl $0x4, %eax # EM (math emulation bit) 628 testl $0x4, %eax # EM (math emulation bit)
@@ -487,9 +632,12 @@ ENTRY(device_not_available)
487 jmp ret_from_exception 632 jmp ret_from_exception
488device_not_available_emulate: 633device_not_available_emulate:
489 pushl $0 # temporary storage for ORIG_EIP 634 pushl $0 # temporary storage for ORIG_EIP
635 CFI_ADJUST_CFA_OFFSET 4
490 call math_emulate 636 call math_emulate
491 addl $4, %esp 637 addl $4, %esp
638 CFI_ADJUST_CFA_OFFSET -4
492 jmp ret_from_exception 639 jmp ret_from_exception
640 CFI_ENDPROC
493 641
494/* 642/*
495 * Debug traps and NMI can happen at the one SYSENTER instruction 643 * Debug traps and NMI can happen at the one SYSENTER instruction
@@ -514,16 +662,19 @@ label: \
514 pushl $sysenter_past_esp 662 pushl $sysenter_past_esp
515 663
516KPROBE_ENTRY(debug) 664KPROBE_ENTRY(debug)
665 RING0_INT_FRAME
517 cmpl $sysenter_entry,(%esp) 666 cmpl $sysenter_entry,(%esp)
518 jne debug_stack_correct 667 jne debug_stack_correct
519 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) 668 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
520debug_stack_correct: 669debug_stack_correct:
521 pushl $-1 # mark this as an int 670 pushl $-1 # mark this as an int
671 CFI_ADJUST_CFA_OFFSET 4
522 SAVE_ALL 672 SAVE_ALL
523 xorl %edx,%edx # error code 0 673 xorl %edx,%edx # error code 0
524 movl %esp,%eax # pt_regs pointer 674 movl %esp,%eax # pt_regs pointer
525 call do_debug 675 call do_debug
526 jmp ret_from_exception 676 jmp ret_from_exception
677 CFI_ENDPROC
527 .previous .text 678 .previous .text
528/* 679/*
529 * NMI is doubly nasty. It can happen _while_ we're handling 680 * NMI is doubly nasty. It can happen _while_ we're handling
@@ -534,14 +685,18 @@ debug_stack_correct:
534 * fault happened on the sysenter path. 685 * fault happened on the sysenter path.
535 */ 686 */
536ENTRY(nmi) 687ENTRY(nmi)
688 RING0_INT_FRAME
537 pushl %eax 689 pushl %eax
690 CFI_ADJUST_CFA_OFFSET 4
538 movl %ss, %eax 691 movl %ss, %eax
539 cmpw $__ESPFIX_SS, %ax 692 cmpw $__ESPFIX_SS, %ax
540 popl %eax 693 popl %eax
694 CFI_ADJUST_CFA_OFFSET -4
541 je nmi_16bit_stack 695 je nmi_16bit_stack
542 cmpl $sysenter_entry,(%esp) 696 cmpl $sysenter_entry,(%esp)
543 je nmi_stack_fixup 697 je nmi_stack_fixup
544 pushl %eax 698 pushl %eax
699 CFI_ADJUST_CFA_OFFSET 4
545 movl %esp,%eax 700 movl %esp,%eax
546 /* Do not access memory above the end of our stack page, 701 /* Do not access memory above the end of our stack page,
547 * it might not exist. 702 * it might not exist.
@@ -549,16 +704,19 @@ ENTRY(nmi)
549 andl $(THREAD_SIZE-1),%eax 704 andl $(THREAD_SIZE-1),%eax
550 cmpl $(THREAD_SIZE-20),%eax 705 cmpl $(THREAD_SIZE-20),%eax
551 popl %eax 706 popl %eax
707 CFI_ADJUST_CFA_OFFSET -4
552 jae nmi_stack_correct 708 jae nmi_stack_correct
553 cmpl $sysenter_entry,12(%esp) 709 cmpl $sysenter_entry,12(%esp)
554 je nmi_debug_stack_check 710 je nmi_debug_stack_check
555nmi_stack_correct: 711nmi_stack_correct:
556 pushl %eax 712 pushl %eax
713 CFI_ADJUST_CFA_OFFSET 4
557 SAVE_ALL 714 SAVE_ALL
558 xorl %edx,%edx # zero error code 715 xorl %edx,%edx # zero error code
559 movl %esp,%eax # pt_regs pointer 716 movl %esp,%eax # pt_regs pointer
560 call do_nmi 717 call do_nmi
561 jmp restore_all 718 jmp restore_all
719 CFI_ENDPROC
562 720
563nmi_stack_fixup: 721nmi_stack_fixup:
564 FIX_STACK(12,nmi_stack_correct, 1) 722 FIX_STACK(12,nmi_stack_correct, 1)
@@ -574,94 +732,177 @@ nmi_debug_stack_check:
574 jmp nmi_stack_correct 732 jmp nmi_stack_correct
575 733
576nmi_16bit_stack: 734nmi_16bit_stack:
735 RING0_INT_FRAME
577 /* create the pointer to lss back */ 736 /* create the pointer to lss back */
578 pushl %ss 737 pushl %ss
738 CFI_ADJUST_CFA_OFFSET 4
579 pushl %esp 739 pushl %esp
740 CFI_ADJUST_CFA_OFFSET 4
580 movzwl %sp, %esp 741 movzwl %sp, %esp
581 addw $4, (%esp) 742 addw $4, (%esp)
582 /* copy the iret frame of 12 bytes */ 743 /* copy the iret frame of 12 bytes */
583 .rept 3 744 .rept 3
584 pushl 16(%esp) 745 pushl 16(%esp)
746 CFI_ADJUST_CFA_OFFSET 4
585 .endr 747 .endr
586 pushl %eax 748 pushl %eax
749 CFI_ADJUST_CFA_OFFSET 4
587 SAVE_ALL 750 SAVE_ALL
588 FIXUP_ESPFIX_STACK # %eax == %esp 751 FIXUP_ESPFIX_STACK # %eax == %esp
752 CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
589 xorl %edx,%edx # zero error code 753 xorl %edx,%edx # zero error code
590 call do_nmi 754 call do_nmi
591 RESTORE_REGS 755 RESTORE_REGS
592 lss 12+4(%esp), %esp # back to 16bit stack 756 lss 12+4(%esp), %esp # back to 16bit stack
5931: iret 7571: iret
758 CFI_ENDPROC
594.section __ex_table,"a" 759.section __ex_table,"a"
595 .align 4 760 .align 4
596 .long 1b,iret_exc 761 .long 1b,iret_exc
597.previous 762.previous
598 763
599KPROBE_ENTRY(int3) 764KPROBE_ENTRY(int3)
765 RING0_INT_FRAME
600 pushl $-1 # mark this as an int 766 pushl $-1 # mark this as an int
767 CFI_ADJUST_CFA_OFFSET 4
601 SAVE_ALL 768 SAVE_ALL
602 xorl %edx,%edx # zero error code 769 xorl %edx,%edx # zero error code
603 movl %esp,%eax # pt_regs pointer 770 movl %esp,%eax # pt_regs pointer
604 call do_int3 771 call do_int3
605 jmp ret_from_exception 772 jmp ret_from_exception
773 CFI_ENDPROC
606 .previous .text 774 .previous .text
607 775
608ENTRY(overflow) 776ENTRY(overflow)
777 RING0_INT_FRAME
609 pushl $0 778 pushl $0
779 CFI_ADJUST_CFA_OFFSET 4
610 pushl $do_overflow 780 pushl $do_overflow
781 CFI_ADJUST_CFA_OFFSET 4
611 jmp error_code 782 jmp error_code
783 CFI_ENDPROC
612 784
613ENTRY(bounds) 785ENTRY(bounds)
786 RING0_INT_FRAME
614 pushl $0 787 pushl $0
788 CFI_ADJUST_CFA_OFFSET 4
615 pushl $do_bounds 789 pushl $do_bounds
790 CFI_ADJUST_CFA_OFFSET 4
616 jmp error_code 791 jmp error_code
792 CFI_ENDPROC
617 793
618ENTRY(invalid_op) 794ENTRY(invalid_op)
795 RING0_INT_FRAME
619 pushl $0 796 pushl $0
797 CFI_ADJUST_CFA_OFFSET 4
620 pushl $do_invalid_op 798 pushl $do_invalid_op
799 CFI_ADJUST_CFA_OFFSET 4
621 jmp error_code 800 jmp error_code
801 CFI_ENDPROC
622 802
623ENTRY(coprocessor_segment_overrun) 803ENTRY(coprocessor_segment_overrun)
804 RING0_INT_FRAME
624 pushl $0 805 pushl $0
806 CFI_ADJUST_CFA_OFFSET 4
625 pushl $do_coprocessor_segment_overrun 807 pushl $do_coprocessor_segment_overrun
808 CFI_ADJUST_CFA_OFFSET 4
626 jmp error_code 809 jmp error_code
810 CFI_ENDPROC
627 811
628ENTRY(invalid_TSS) 812ENTRY(invalid_TSS)
813 RING0_EC_FRAME
629 pushl $do_invalid_TSS 814 pushl $do_invalid_TSS
815 CFI_ADJUST_CFA_OFFSET 4
630 jmp error_code 816 jmp error_code
817 CFI_ENDPROC
631 818
632ENTRY(segment_not_present) 819ENTRY(segment_not_present)
820 RING0_EC_FRAME
633 pushl $do_segment_not_present 821 pushl $do_segment_not_present
822 CFI_ADJUST_CFA_OFFSET 4
634 jmp error_code 823 jmp error_code
824 CFI_ENDPROC
635 825
636ENTRY(stack_segment) 826ENTRY(stack_segment)
827 RING0_EC_FRAME
637 pushl $do_stack_segment 828 pushl $do_stack_segment
829 CFI_ADJUST_CFA_OFFSET 4
638 jmp error_code 830 jmp error_code
831 CFI_ENDPROC
639 832
640KPROBE_ENTRY(general_protection) 833KPROBE_ENTRY(general_protection)
834 RING0_EC_FRAME
641 pushl $do_general_protection 835 pushl $do_general_protection
836 CFI_ADJUST_CFA_OFFSET 4
642 jmp error_code 837 jmp error_code
838 CFI_ENDPROC
643 .previous .text 839 .previous .text
644 840
645ENTRY(alignment_check) 841ENTRY(alignment_check)
842 RING0_EC_FRAME
646 pushl $do_alignment_check 843 pushl $do_alignment_check
844 CFI_ADJUST_CFA_OFFSET 4
647 jmp error_code 845 jmp error_code
846 CFI_ENDPROC
648 847
649KPROBE_ENTRY(page_fault) 848KPROBE_ENTRY(page_fault)
849 RING0_EC_FRAME
650 pushl $do_page_fault 850 pushl $do_page_fault
851 CFI_ADJUST_CFA_OFFSET 4
651 jmp error_code 852 jmp error_code
853 CFI_ENDPROC
652 .previous .text 854 .previous .text
653 855
654#ifdef CONFIG_X86_MCE 856#ifdef CONFIG_X86_MCE
655ENTRY(machine_check) 857ENTRY(machine_check)
858 RING0_INT_FRAME
656 pushl $0 859 pushl $0
860 CFI_ADJUST_CFA_OFFSET 4
657 pushl machine_check_vector 861 pushl machine_check_vector
862 CFI_ADJUST_CFA_OFFSET 4
658 jmp error_code 863 jmp error_code
864 CFI_ENDPROC
659#endif 865#endif
660 866
661ENTRY(spurious_interrupt_bug) 867ENTRY(spurious_interrupt_bug)
868 RING0_INT_FRAME
662 pushl $0 869 pushl $0
870 CFI_ADJUST_CFA_OFFSET 4
663 pushl $do_spurious_interrupt_bug 871 pushl $do_spurious_interrupt_bug
872 CFI_ADJUST_CFA_OFFSET 4
664 jmp error_code 873 jmp error_code
874 CFI_ENDPROC
875
876#ifdef CONFIG_STACK_UNWIND
877ENTRY(arch_unwind_init_running)
878 CFI_STARTPROC
879 movl 4(%esp), %edx
880 movl (%esp), %ecx
881 leal 4(%esp), %eax
882 movl %ebx, EBX(%edx)
883 xorl %ebx, %ebx
884 movl %ebx, ECX(%edx)
885 movl %ebx, EDX(%edx)
886 movl %esi, ESI(%edx)
887 movl %edi, EDI(%edx)
888 movl %ebp, EBP(%edx)
889 movl %ebx, EAX(%edx)
890 movl $__USER_DS, DS(%edx)
891 movl $__USER_DS, ES(%edx)
892 movl %ebx, ORIG_EAX(%edx)
893 movl %ecx, EIP(%edx)
894 movl 12(%esp), %ecx
895 movl $__KERNEL_CS, CS(%edx)
896 movl %ebx, EFLAGS(%edx)
897 movl %eax, OLDESP(%edx)
898 movl 8(%esp), %eax
899 movl %ecx, 8(%esp)
900 movl EBX(%edx), %ebx
901 movl $__KERNEL_DS, OLDSS(%edx)
902 jmpl *%eax
903 CFI_ENDPROC
904ENDPROC(arch_unwind_init_running)
905#endif
665 906
666.section .rodata,"a" 907.section .rodata,"a"
667#include "syscall_table.S" 908#include "syscall_table.S"
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
new file mode 100644
index 000000000000..c6737c35815d
--- /dev/null
+++ b/arch/i386/kernel/hpet.c
@@ -0,0 +1,67 @@
1#include <linux/clocksource.h>
2#include <linux/errno.h>
3#include <linux/hpet.h>
4#include <linux/init.h>
5
6#include <asm/hpet.h>
7#include <asm/io.h>
8
9#define HPET_MASK CLOCKSOURCE_MASK(32)
10#define HPET_SHIFT 22
11
12/* FSEC = 10^-15 NSEC = 10^-9 */
13#define FSEC_PER_NSEC 1000000
14
15static void *hpet_ptr;
16
17static cycle_t read_hpet(void)
18{
19 return (cycle_t)readl(hpet_ptr);
20}
21
22static struct clocksource clocksource_hpet = {
23 .name = "hpet",
24 .rating = 250,
25 .read = read_hpet,
26 .mask = HPET_MASK,
27 .mult = 0, /* set below */
28 .shift = HPET_SHIFT,
29 .is_continuous = 1,
30};
31
32static int __init init_hpet_clocksource(void)
33{
34 unsigned long hpet_period;
35 void __iomem* hpet_base;
36 u64 tmp;
37
38 if (!hpet_address)
39 return -ENODEV;
40
41 /* calculate the hpet address: */
42 hpet_base =
43 (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
44 hpet_ptr = hpet_base + HPET_COUNTER;
45
46 /* calculate the frequency: */
47 hpet_period = readl(hpet_base + HPET_PERIOD);
48
49 /*
50 * hpet period is in femto seconds per cycle
51 * so we need to convert this to ns/cyc units
52 * aproximated by mult/2^shift
53 *
54 * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
55 * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
56 * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
57 * (fsec/cyc << shift)/1000000 = mult
58 * (hpet_period << shift)/FSEC_PER_NSEC = mult
59 */
60 tmp = (u64)hpet_period << HPET_SHIFT;
61 do_div(tmp, FSEC_PER_NSEC);
62 clocksource_hpet.mult = (u32)tmp;
63
64 return clocksource_register(&clocksource_hpet);
65}
66
67module_init(init_hpet_clocksource);
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
new file mode 100644
index 000000000000..477b24daff53
--- /dev/null
+++ b/arch/i386/kernel/i8253.c
@@ -0,0 +1,118 @@
1/*
2 * i8253.c 8253/PIT functions
3 *
4 */
5#include <linux/clocksource.h>
6#include <linux/spinlock.h>
7#include <linux/jiffies.h>
8#include <linux/sysdev.h>
9#include <linux/module.h>
10#include <linux/init.h>
11
12#include <asm/smp.h>
13#include <asm/delay.h>
14#include <asm/i8253.h>
15#include <asm/io.h>
16
17#include "io_ports.h"
18
19DEFINE_SPINLOCK(i8253_lock);
20EXPORT_SYMBOL(i8253_lock);
21
22void setup_pit_timer(void)
23{
24 unsigned long flags;
25
26 spin_lock_irqsave(&i8253_lock, flags);
27 outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
28 udelay(10);
29 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
30 udelay(10);
31 outb(LATCH >> 8 , PIT_CH0); /* MSB */
32 spin_unlock_irqrestore(&i8253_lock, flags);
33}
34
35/*
36 * Since the PIT overflows every tick, its not very useful
37 * to just read by itself. So use jiffies to emulate a free
38 * running counter:
39 */
40static cycle_t pit_read(void)
41{
42 unsigned long flags;
43 int count;
44 u32 jifs;
45 static int old_count;
46 static u32 old_jifs;
47
48 spin_lock_irqsave(&i8253_lock, flags);
49 /*
50 * Although our caller may have the read side of xtime_lock,
51 * this is now a seqlock, and we are cheating in this routine
52 * by having side effects on state that we cannot undo if
53 * there is a collision on the seqlock and our caller has to
54 * retry. (Namely, old_jifs and old_count.) So we must treat
55 * jiffies as volatile despite the lock. We read jiffies
56 * before latching the timer count to guarantee that although
57 * the jiffies value might be older than the count (that is,
58 * the counter may underflow between the last point where
59 * jiffies was incremented and the point where we latch the
60 * count), it cannot be newer.
61 */
62 jifs = jiffies;
63 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
64 count = inb_p(PIT_CH0); /* read the latched count */
65 count |= inb_p(PIT_CH0) << 8;
66
67 /* VIA686a test code... reset the latch if count > max + 1 */
68 if (count > LATCH) {
69 outb_p(0x34, PIT_MODE);
70 outb_p(LATCH & 0xff, PIT_CH0);
71 outb(LATCH >> 8, PIT_CH0);
72 count = LATCH - 1;
73 }
74
75 /*
76 * It's possible for count to appear to go the wrong way for a
77 * couple of reasons:
78 *
79 * 1. The timer counter underflows, but we haven't handled the
80 * resulting interrupt and incremented jiffies yet.
81 * 2. Hardware problem with the timer, not giving us continuous time,
82 * the counter does small "jumps" upwards on some Pentium systems,
83 * (see c't 95/10 page 335 for Neptun bug.)
84 *
85 * Previous attempts to handle these cases intelligently were
86 * buggy, so we just do the simple thing now.
87 */
88 if (count > old_count && jifs == old_jifs) {
89 count = old_count;
90 }
91 old_count = count;
92 old_jifs = jifs;
93
94 spin_unlock_irqrestore(&i8253_lock, flags);
95
96 count = (LATCH - 1) - count;
97
98 return (cycle_t)(jifs * LATCH) + count;
99}
100
101static struct clocksource clocksource_pit = {
102 .name = "pit",
103 .rating = 110,
104 .read = pit_read,
105 .mask = CLOCKSOURCE_MASK(32),
106 .mult = 0,
107 .shift = 20,
108};
109
110static int __init init_pit_clocksource(void)
111{
112 if (num_possible_cpus() > 4) /* PIT does not scale! */
113 return 0;
114
115 clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
116 return clocksource_register(&clocksource_pit);
117}
118module_init(init_pit_clocksource);
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index b7636b96e104..c1a42feba286 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -175,7 +175,7 @@ static void mask_and_ack_8259A(unsigned int irq)
175 * Lightweight spurious IRQ detection. We do not want 175 * Lightweight spurious IRQ detection. We do not want
176 * to overdo spurious IRQ handling - it's usually a sign 176 * to overdo spurious IRQ handling - it's usually a sign
177 * of hardware problems, so we only do the checks we can 177 * of hardware problems, so we only do the checks we can
178 * do without slowing down good hardware unnecesserily. 178 * do without slowing down good hardware unnecessarily.
179 * 179 *
180 * Note that IRQ7 and IRQ15 (the two spurious IRQs 180 * Note that IRQ7 and IRQ15 (the two spurious IRQs
181 * usually resulting from the 8259A-1|2 PICs) occur 181 * usually resulting from the 8259A-1|2 PICs) occur
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index a62df3e764c5..72ae414e4d49 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -38,6 +38,7 @@
38#include <asm/desc.h> 38#include <asm/desc.h>
39#include <asm/timer.h> 39#include <asm/timer.h>
40#include <asm/i8259.h> 40#include <asm/i8259.h>
41#include <asm/nmi.h>
41 42
42#include <mach_apic.h> 43#include <mach_apic.h>
43 44
@@ -50,6 +51,7 @@ atomic_t irq_mis_count;
50static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 51static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
51 52
52static DEFINE_SPINLOCK(ioapic_lock); 53static DEFINE_SPINLOCK(ioapic_lock);
54static DEFINE_SPINLOCK(vector_lock);
53 55
54int timer_over_8254 __initdata = 1; 56int timer_over_8254 __initdata = 1;
55 57
@@ -1161,10 +1163,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
1161int assign_irq_vector(int irq) 1163int assign_irq_vector(int irq)
1162{ 1164{
1163 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 1165 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
1166 unsigned long flags;
1167 int vector;
1168
1169 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
1164 1170
1165 BUG_ON(irq >= NR_IRQ_VECTORS); 1171 spin_lock_irqsave(&vector_lock, flags);
1166 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) 1172
1173 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
1174 spin_unlock_irqrestore(&vector_lock, flags);
1167 return IO_APIC_VECTOR(irq); 1175 return IO_APIC_VECTOR(irq);
1176 }
1168next: 1177next:
1169 current_vector += 8; 1178 current_vector += 8;
1170 if (current_vector == SYSCALL_VECTOR) 1179 if (current_vector == SYSCALL_VECTOR)
@@ -1172,16 +1181,21 @@ next:
1172 1181
1173 if (current_vector >= FIRST_SYSTEM_VECTOR) { 1182 if (current_vector >= FIRST_SYSTEM_VECTOR) {
1174 offset++; 1183 offset++;
1175 if (!(offset%8)) 1184 if (!(offset%8)) {
1185 spin_unlock_irqrestore(&vector_lock, flags);
1176 return -ENOSPC; 1186 return -ENOSPC;
1187 }
1177 current_vector = FIRST_DEVICE_VECTOR + offset; 1188 current_vector = FIRST_DEVICE_VECTOR + offset;
1178 } 1189 }
1179 1190
1180 vector_irq[current_vector] = irq; 1191 vector = current_vector;
1192 vector_irq[vector] = irq;
1181 if (irq != AUTO_ASSIGN) 1193 if (irq != AUTO_ASSIGN)
1182 IO_APIC_VECTOR(irq) = current_vector; 1194 IO_APIC_VECTOR(irq) = vector;
1183 1195
1184 return current_vector; 1196 spin_unlock_irqrestore(&vector_lock, flags);
1197
1198 return vector;
1185} 1199}
1186 1200
1187static struct hw_interrupt_type ioapic_level_type; 1201static struct hw_interrupt_type ioapic_level_type;
@@ -1193,21 +1207,14 @@ static struct hw_interrupt_type ioapic_edge_type;
1193 1207
1194static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) 1208static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1195{ 1209{
1196 if (use_pci_vector() && !platform_legacy_irq(irq)) { 1210 unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
1197 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1211
1198 trigger == IOAPIC_LEVEL) 1212 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1199 irq_desc[vector].handler = &ioapic_level_type; 1213 trigger == IOAPIC_LEVEL)
1200 else 1214 irq_desc[idx].handler = &ioapic_level_type;
1201 irq_desc[vector].handler = &ioapic_edge_type; 1215 else
1202 set_intr_gate(vector, interrupt[vector]); 1216 irq_desc[idx].handler = &ioapic_edge_type;
1203 } else { 1217 set_intr_gate(vector, interrupt[idx]);
1204 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1205 trigger == IOAPIC_LEVEL)
1206 irq_desc[irq].handler = &ioapic_level_type;
1207 else
1208 irq_desc[irq].handler = &ioapic_edge_type;
1209 set_intr_gate(vector, interrupt[irq]);
1210 }
1211} 1218}
1212 1219
1213static void __init setup_IO_APIC_irqs(void) 1220static void __init setup_IO_APIC_irqs(void)
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 49ce4c31b713..061533e0cb5e 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -227,7 +227,7 @@ int show_interrupts(struct seq_file *p, void *v)
227 if (i == 0) { 227 if (i == 0) {
228 seq_printf(p, " "); 228 seq_printf(p, " ");
229 for_each_online_cpu(j) 229 for_each_online_cpu(j)
230 seq_printf(p, "CPU%d ",j); 230 seq_printf(p, "CPU%-8d",j);
231 seq_putc(p, '\n'); 231 seq_putc(p, '\n');
232 } 232 }
233 233
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 395a9a6dff88..727e419ad78a 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -57,34 +57,85 @@ static __always_inline void set_jmp_op(void *from, void *to)
57/* 57/*
58 * returns non-zero if opcodes can be boosted. 58 * returns non-zero if opcodes can be boosted.
59 */ 59 */
60static __always_inline int can_boost(kprobe_opcode_t opcode) 60static __always_inline int can_boost(kprobe_opcode_t *opcodes)
61{ 61{
62 switch (opcode & 0xf0 ) { 62#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
63 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
64 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
65 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
66 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
67 << (row % 32))
68 /*
69 * Undefined/reserved opcodes, conditional jump, Opcode Extension
70 * Groups, and some special opcodes can not be boost.
71 */
72 static const unsigned long twobyte_is_boostable[256 / 32] = {
73 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
74 /* ------------------------------- */
75 W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
76 W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
77 W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
78 W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
79 W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
80 W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
81 W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
82 W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
83 W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
84 W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
85 W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
86 W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
87 W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
88 W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
89 W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
90 W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */
91 /* ------------------------------- */
92 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
93 };
94#undef W
95 kprobe_opcode_t opcode;
96 kprobe_opcode_t *orig_opcodes = opcodes;
97retry:
98 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
99 return 0;
100 opcode = *(opcodes++);
101
102 /* 2nd-byte opcode */
103 if (opcode == 0x0f) {
104 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
105 return 0;
106 return test_bit(*opcodes, twobyte_is_boostable);
107 }
108
109 switch (opcode & 0xf0) {
110 case 0x60:
111 if (0x63 < opcode && opcode < 0x67)
112 goto retry; /* prefixes */
113 /* can't boost Address-size override and bound */
114 return (opcode != 0x62 && opcode != 0x67);
63 case 0x70: 115 case 0x70:
64 return 0; /* can't boost conditional jump */ 116 return 0; /* can't boost conditional jump */
65 case 0x90:
66 /* can't boost call and pushf */
67 return opcode != 0x9a && opcode != 0x9c;
68 case 0xc0: 117 case 0xc0:
69 /* can't boost undefined opcodes and soft-interruptions */ 118 /* can't boost software-interruptions */
70 return (0xc1 < opcode && opcode < 0xc6) || 119 return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
71 (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf;
72 case 0xd0: 120 case 0xd0:
73 /* can boost AA* and XLAT */ 121 /* can boost AA* and XLAT */
74 return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); 122 return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
75 case 0xe0: 123 case 0xe0:
76 /* can boost in/out and (may be) jmps */ 124 /* can boost in/out and absolute jmps */
77 return (0xe3 < opcode && opcode != 0xe8); 125 return ((opcode & 0x04) || opcode == 0xea);
78 case 0xf0: 126 case 0xf0:
127 if ((opcode & 0x0c) == 0 && opcode != 0xf1)
128 goto retry; /* lock/rep(ne) prefix */
79 /* clear and set flags can be boost */ 129 /* clear and set flags can be boost */
80 return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); 130 return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
81 default: 131 default:
82 /* currently, can't boost 2 bytes opcodes */ 132 if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
83 return opcode != 0x0f; 133 goto retry; /* prefixes */
134 /* can't boost CS override and call */
135 return (opcode != 0x2e && opcode != 0x9a);
84 } 136 }
85} 137}
86 138
87
88/* 139/*
89 * returns non-zero if opcode modifies the interrupt flag. 140 * returns non-zero if opcode modifies the interrupt flag.
90 */ 141 */
@@ -109,7 +160,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
109 160
110 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 161 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
111 p->opcode = *p->addr; 162 p->opcode = *p->addr;
112 if (can_boost(p->opcode)) { 163 if (can_boost(p->addr)) {
113 p->ainsn.boostable = 0; 164 p->ainsn.boostable = 0;
114 } else { 165 } else {
115 p->ainsn.boostable = -1; 166 p->ainsn.boostable = -1;
@@ -208,7 +259,9 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
208 struct kprobe_ctlblk *kcb; 259 struct kprobe_ctlblk *kcb;
209#ifdef CONFIG_PREEMPT 260#ifdef CONFIG_PREEMPT
210 unsigned pre_preempt_count = preempt_count(); 261 unsigned pre_preempt_count = preempt_count();
211#endif /* CONFIG_PREEMPT */ 262#else
263 unsigned pre_preempt_count = 1;
264#endif
212 265
213 addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); 266 addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
214 267
@@ -285,22 +338,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
285 /* handler has already set things up, so skip ss setup */ 338 /* handler has already set things up, so skip ss setup */
286 return 1; 339 return 1;
287 340
288 if (p->ainsn.boostable == 1 && 341ss_probe:
289#ifdef CONFIG_PREEMPT 342 if (pre_preempt_count && p->ainsn.boostable == 1 && !p->post_handler){
290 !(pre_preempt_count) && /*
291 * This enables booster when the direct
292 * execution path aren't preempted.
293 */
294#endif /* CONFIG_PREEMPT */
295 !p->post_handler && !p->break_handler ) {
296 /* Boost up -- we can execute copied instructions directly */ 343 /* Boost up -- we can execute copied instructions directly */
297 reset_current_kprobe(); 344 reset_current_kprobe();
298 regs->eip = (unsigned long)p->ainsn.insn; 345 regs->eip = (unsigned long)p->ainsn.insn;
299 preempt_enable_no_resched(); 346 preempt_enable_no_resched();
300 return 1; 347 return 1;
301 } 348 }
302
303ss_probe:
304 prepare_singlestep(p, regs); 349 prepare_singlestep(p, regs);
305 kcb->kprobe_status = KPROBE_HIT_SS; 350 kcb->kprobe_status = KPROBE_HIT_SS;
306 return 1; 351 return 1;
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
index f73d7374a2ba..511abe52a94e 100644
--- a/arch/i386/kernel/machine_kexec.c
+++ b/arch/i386/kernel/machine_kexec.c
@@ -133,9 +133,9 @@ typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
133 unsigned long start_address, 133 unsigned long start_address,
134 unsigned int has_pae) ATTRIB_NORET; 134 unsigned int has_pae) ATTRIB_NORET;
135 135
136const extern unsigned char relocate_new_kernel[]; 136extern const unsigned char relocate_new_kernel[];
137extern void relocate_new_kernel_end(void); 137extern void relocate_new_kernel_end(void);
138const extern unsigned int relocate_new_kernel_size; 138extern const unsigned int relocate_new_kernel_size;
139 139
140/* 140/*
141 * A architecture hook called to validate the 141 * A architecture hook called to validate the
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index d43b498ec745..a76e93146585 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -14,21 +14,17 @@
14 */ 14 */
15 15
16#include <linux/config.h> 16#include <linux/config.h>
17#include <linux/mm.h>
18#include <linux/delay.h> 17#include <linux/delay.h>
19#include <linux/bootmem.h>
20#include <linux/smp_lock.h>
21#include <linux/interrupt.h> 18#include <linux/interrupt.h>
22#include <linux/mc146818rtc.h>
23#include <linux/kernel_stat.h>
24#include <linux/module.h> 19#include <linux/module.h>
25#include <linux/nmi.h> 20#include <linux/nmi.h>
26#include <linux/sysdev.h> 21#include <linux/sysdev.h>
27#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/percpu.h>
28 24
29#include <asm/smp.h> 25#include <asm/smp.h>
30#include <asm/div64.h>
31#include <asm/nmi.h> 26#include <asm/nmi.h>
27#include <asm/intel_arch_perfmon.h>
32 28
33#include "mach_traps.h" 29#include "mach_traps.h"
34 30
@@ -100,6 +96,9 @@ int nmi_active;
100 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ 96 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
101 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) 97 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
102 98
99#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
100#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
101
103#ifdef CONFIG_SMP 102#ifdef CONFIG_SMP
104/* The performance counters used by NMI_LOCAL_APIC don't trigger when 103/* The performance counters used by NMI_LOCAL_APIC don't trigger when
105 * the CPU is idle. To make sure the NMI watchdog really ticks on all 104 * the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -212,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str)
212 211
213__setup("nmi_watchdog=", setup_nmi_watchdog); 212__setup("nmi_watchdog=", setup_nmi_watchdog);
214 213
214static void disable_intel_arch_watchdog(void);
215
215static void disable_lapic_nmi_watchdog(void) 216static void disable_lapic_nmi_watchdog(void)
216{ 217{
217 if (nmi_active <= 0) 218 if (nmi_active <= 0)
@@ -221,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void)
221 wrmsr(MSR_K7_EVNTSEL0, 0, 0); 222 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
222 break; 223 break;
223 case X86_VENDOR_INTEL: 224 case X86_VENDOR_INTEL:
225 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
226 disable_intel_arch_watchdog();
227 break;
228 }
224 switch (boot_cpu_data.x86) { 229 switch (boot_cpu_data.x86) {
225 case 6: 230 case 6:
226 if (boot_cpu_data.x86_model > 0xd) 231 if (boot_cpu_data.x86_model > 0xd)
@@ -449,6 +454,53 @@ static int setup_p4_watchdog(void)
449 return 1; 454 return 1;
450} 455}
451 456
457static void disable_intel_arch_watchdog(void)
458{
459 unsigned ebx;
460
461 /*
462 * Check whether the Architectural PerfMon supports
463 * Unhalted Core Cycles Event or not.
464 * NOTE: Corresponding bit = 0 in ebp indicates event present.
465 */
466 ebx = cpuid_ebx(10);
467 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
468 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
469}
470
471static int setup_intel_arch_watchdog(void)
472{
473 unsigned int evntsel;
474 unsigned ebx;
475
476 /*
477 * Check whether the Architectural PerfMon supports
478 * Unhalted Core Cycles Event or not.
479 * NOTE: Corresponding bit = 0 in ebp indicates event present.
480 */
481 ebx = cpuid_ebx(10);
482 if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
483 return 0;
484
485 nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
486
487 clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
488 clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
489
490 evntsel = ARCH_PERFMON_EVENTSEL_INT
491 | ARCH_PERFMON_EVENTSEL_OS
492 | ARCH_PERFMON_EVENTSEL_USR
493 | ARCH_PERFMON_NMI_EVENT_SEL
494 | ARCH_PERFMON_NMI_EVENT_UMASK;
495
496 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
497 write_watchdog_counter("INTEL_ARCH_PERFCTR0");
498 apic_write(APIC_LVTPC, APIC_DM_NMI);
499 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
500 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
501 return 1;
502}
503
452void setup_apic_nmi_watchdog (void) 504void setup_apic_nmi_watchdog (void)
453{ 505{
454 switch (boot_cpu_data.x86_vendor) { 506 switch (boot_cpu_data.x86_vendor) {
@@ -458,6 +510,11 @@ void setup_apic_nmi_watchdog (void)
458 setup_k7_watchdog(); 510 setup_k7_watchdog();
459 break; 511 break;
460 case X86_VENDOR_INTEL: 512 case X86_VENDOR_INTEL:
513 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
514 if (!setup_intel_arch_watchdog())
515 return;
516 break;
517 }
461 switch (boot_cpu_data.x86) { 518 switch (boot_cpu_data.x86) {
462 case 6: 519 case 6:
463 if (boot_cpu_data.x86_model > 0xd) 520 if (boot_cpu_data.x86_model > 0xd)
@@ -561,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs)
561 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 618 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
562 apic_write(APIC_LVTPC, APIC_DM_NMI); 619 apic_write(APIC_LVTPC, APIC_DM_NMI);
563 } 620 }
564 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { 621 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
622 nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
565 /* Only P6 based Pentium M need to re-unmask 623 /* Only P6 based Pentium M need to re-unmask
566 * the apic vector but it doesn't hurt 624 * the apic vector but it doesn't hurt
567 * other P6 variant */ 625 * other P6 variant */
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c
index 5f5b075f860a..0caf14652bad 100644
--- a/arch/i386/kernel/numaq.c
+++ b/arch/i386/kernel/numaq.c
@@ -79,10 +79,12 @@ int __init get_memcfg_numaq(void)
79 return 1; 79 return 1;
80} 80}
81 81
82static int __init numaq_dsc_disable(void) 82static int __init numaq_tsc_disable(void)
83{ 83{
84 printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); 84 if (num_online_nodes() > 1) {
85 tsc_disable = 1; 85 printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
86 tsc_disable = 1;
87 }
86 return 0; 88 return 0;
87} 89}
88core_initcall(numaq_dsc_disable); 90arch_initcall(numaq_tsc_disable);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 6259afea46d1..6946b06e2784 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -102,7 +102,7 @@ void default_idle(void)
102 local_irq_enable(); 102 local_irq_enable();
103 103
104 if (!hlt_counter && boot_cpu_data.hlt_works_ok) { 104 if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
105 clear_thread_flag(TIF_POLLING_NRFLAG); 105 current_thread_info()->status &= ~TS_POLLING;
106 smp_mb__after_clear_bit(); 106 smp_mb__after_clear_bit();
107 while (!need_resched()) { 107 while (!need_resched()) {
108 local_irq_disable(); 108 local_irq_disable();
@@ -111,7 +111,7 @@ void default_idle(void)
111 else 111 else
112 local_irq_enable(); 112 local_irq_enable();
113 } 113 }
114 set_thread_flag(TIF_POLLING_NRFLAG); 114 current_thread_info()->status |= TS_POLLING;
115 } else { 115 } else {
116 while (!need_resched()) 116 while (!need_resched())
117 cpu_relax(); 117 cpu_relax();
@@ -174,7 +174,7 @@ void cpu_idle(void)
174{ 174{
175 int cpu = smp_processor_id(); 175 int cpu = smp_processor_id();
176 176
177 set_thread_flag(TIF_POLLING_NRFLAG); 177 current_thread_info()->status |= TS_POLLING;
178 178
179 /* endless idle loop with no priority at all */ 179 /* endless idle loop with no priority at all */
180 while (1) { 180 while (1) {
@@ -312,7 +312,7 @@ void show_regs(struct pt_regs * regs)
312 cr3 = read_cr3(); 312 cr3 = read_cr3();
313 cr4 = read_cr4_safe(); 313 cr4 = read_cr4_safe();
314 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); 314 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
315 show_trace(NULL, &regs->esp); 315 show_trace(NULL, regs, &regs->esp);
316} 316}
317 317
318/* 318/*
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 6bef9273733e..4a65040cc624 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -1575,6 +1575,7 @@ void __init setup_arch(char **cmdline_p)
1575 conswitchp = &dummy_con; 1575 conswitchp = &dummy_con;
1576#endif 1576#endif
1577#endif 1577#endif
1578 tsc_init();
1578} 1579}
1579 1580
1580static __init int add_pcspkr(void) 1581static __init int add_pcspkr(void)
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index d134e9643a58..c10789d7a9d3 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -114,7 +114,17 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_m
114 114
115static inline int __prepare_ICR (unsigned int shortcut, int vector) 115static inline int __prepare_ICR (unsigned int shortcut, int vector)
116{ 116{
117 return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; 117 unsigned int icr = shortcut | APIC_DEST_LOGICAL;
118
119 switch (vector) {
120 default:
121 icr |= APIC_DM_FIXED | vector;
122 break;
123 case NMI_VECTOR:
124 icr |= APIC_DM_NMI;
125 break;
126 }
127 return icr;
118} 128}
119 129
120static inline int __prepare_ICR2 (unsigned int mask) 130static inline int __prepare_ICR2 (unsigned int mask)
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index bd0ca5c9f053..bce5470ecb42 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -52,6 +52,7 @@
52#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
53#include <asm/desc.h> 53#include <asm/desc.h>
54#include <asm/arch_hooks.h> 54#include <asm/arch_hooks.h>
55#include <asm/nmi.h>
55 56
56#include <mach_apic.h> 57#include <mach_apic.h>
57#include <mach_wakecpu.h> 58#include <mach_wakecpu.h>
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 9d3074759856..5f43d0410122 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -82,13 +82,6 @@ extern unsigned long wall_jiffies;
82DEFINE_SPINLOCK(rtc_lock); 82DEFINE_SPINLOCK(rtc_lock);
83EXPORT_SYMBOL(rtc_lock); 83EXPORT_SYMBOL(rtc_lock);
84 84
85#include <asm/i8253.h>
86
87DEFINE_SPINLOCK(i8253_lock);
88EXPORT_SYMBOL(i8253_lock);
89
90struct timer_opts *cur_timer __read_mostly = &timer_none;
91
92/* 85/*
93 * This is a special lock that is owned by the CPU and holds the index 86 * This is a special lock that is owned by the CPU and holds the index
94 * register we are working with. It is required for NMI access to the 87 * register we are working with. It is required for NMI access to the
@@ -118,99 +111,19 @@ void rtc_cmos_write(unsigned char val, unsigned char addr)
118} 111}
119EXPORT_SYMBOL(rtc_cmos_write); 112EXPORT_SYMBOL(rtc_cmos_write);
120 113
121/*
122 * This version of gettimeofday has microsecond resolution
123 * and better than microsecond precision on fast x86 machines with TSC.
124 */
125void do_gettimeofday(struct timeval *tv)
126{
127 unsigned long seq;
128 unsigned long usec, sec;
129 unsigned long max_ntp_tick;
130
131 do {
132 unsigned long lost;
133
134 seq = read_seqbegin(&xtime_lock);
135
136 usec = cur_timer->get_offset();
137 lost = jiffies - wall_jiffies;
138
139 /*
140 * If time_adjust is negative then NTP is slowing the clock
141 * so make sure not to go into next possible interval.
142 * Better to lose some accuracy than have time go backwards..
143 */
144 if (unlikely(time_adjust < 0)) {
145 max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
146 usec = min(usec, max_ntp_tick);
147
148 if (lost)
149 usec += lost * max_ntp_tick;
150 }
151 else if (unlikely(lost))
152 usec += lost * (USEC_PER_SEC / HZ);
153
154 sec = xtime.tv_sec;
155 usec += (xtime.tv_nsec / 1000);
156 } while (read_seqretry(&xtime_lock, seq));
157
158 while (usec >= 1000000) {
159 usec -= 1000000;
160 sec++;
161 }
162
163 tv->tv_sec = sec;
164 tv->tv_usec = usec;
165}
166
167EXPORT_SYMBOL(do_gettimeofday);
168
169int do_settimeofday(struct timespec *tv)
170{
171 time_t wtm_sec, sec = tv->tv_sec;
172 long wtm_nsec, nsec = tv->tv_nsec;
173
174 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
175 return -EINVAL;
176
177 write_seqlock_irq(&xtime_lock);
178 /*
179 * This is revolting. We need to set "xtime" correctly. However, the
180 * value in this location is the value at the most recent update of
181 * wall time. Discover what correction gettimeofday() would have
182 * made, and then undo it!
183 */
184 nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
185 nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
186
187 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
188 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
189
190 set_normalized_timespec(&xtime, sec, nsec);
191 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
192
193 ntp_clear();
194 write_sequnlock_irq(&xtime_lock);
195 clock_was_set();
196 return 0;
197}
198
199EXPORT_SYMBOL(do_settimeofday);
200
201static int set_rtc_mmss(unsigned long nowtime) 114static int set_rtc_mmss(unsigned long nowtime)
202{ 115{
203 int retval; 116 int retval;
204 117 unsigned long flags;
205 WARN_ON(irqs_disabled());
206 118
207 /* gets recalled with irq locally disabled */ 119 /* gets recalled with irq locally disabled */
208 spin_lock_irq(&rtc_lock); 120 /* XXX - does irqsave resolve this? -johnstul */
121 spin_lock_irqsave(&rtc_lock, flags);
209 if (efi_enabled) 122 if (efi_enabled)
210 retval = efi_set_rtc_mmss(nowtime); 123 retval = efi_set_rtc_mmss(nowtime);
211 else 124 else
212 retval = mach_set_rtc_mmss(nowtime); 125 retval = mach_set_rtc_mmss(nowtime);
213 spin_unlock_irq(&rtc_lock); 126 spin_unlock_irqrestore(&rtc_lock, flags);
214 127
215 return retval; 128 return retval;
216} 129}
@@ -218,16 +131,6 @@ static int set_rtc_mmss(unsigned long nowtime)
218 131
219int timer_ack; 132int timer_ack;
220 133
221/* monotonic_clock(): returns # of nanoseconds passed since time_init()
222 * Note: This function is required to return accurate
223 * time even in the absence of multiple timer ticks.
224 */
225unsigned long long monotonic_clock(void)
226{
227 return cur_timer->monotonic_clock();
228}
229EXPORT_SYMBOL(monotonic_clock);
230
231#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) 134#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
232unsigned long profile_pc(struct pt_regs *regs) 135unsigned long profile_pc(struct pt_regs *regs)
233{ 136{
@@ -242,11 +145,21 @@ EXPORT_SYMBOL(profile_pc);
242#endif 145#endif
243 146
244/* 147/*
245 * timer_interrupt() needs to keep up the real-time clock, 148 * This is the same as the above, except we _also_ save the current
246 * as well as call the "do_timer()" routine every clocktick 149 * Time Stamp Counter value at the time of the timer interrupt, so that
150 * we later on can estimate the time of day more exactly.
247 */ 151 */
248static inline void do_timer_interrupt(int irq, struct pt_regs *regs) 152irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
249{ 153{
154 /*
155 * Here we are in the timer irq handler. We just have irqs locally
156 * disabled but we don't know if the timer_bh is running on the other
157 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
158 * the irq version of write_lock because as just said we have irq
159 * locally disabled. -arca
160 */
161 write_seqlock(&xtime_lock);
162
250#ifdef CONFIG_X86_IO_APIC 163#ifdef CONFIG_X86_IO_APIC
251 if (timer_ack) { 164 if (timer_ack) {
252 /* 165 /*
@@ -279,27 +192,6 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
279 irq = inb_p( 0x61 ); /* read the current state */ 192 irq = inb_p( 0x61 ); /* read the current state */
280 outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ 193 outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
281 } 194 }
282}
283
284/*
285 * This is the same as the above, except we _also_ save the current
286 * Time Stamp Counter value at the time of the timer interrupt, so that
287 * we later on can estimate the time of day more exactly.
288 */
289irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
290{
291 /*
292 * Here we are in the timer irq handler. We just have irqs locally
293 * disabled but we don't know if the timer_bh is running on the other
294 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
295 * the irq version of write_lock because as just said we have irq
296 * locally disabled. -arca
297 */
298 write_seqlock(&xtime_lock);
299
300 cur_timer->mark_offset();
301
302 do_timer_interrupt(irq, regs);
303 195
304 write_sequnlock(&xtime_lock); 196 write_sequnlock(&xtime_lock);
305 197
@@ -380,7 +272,6 @@ void notify_arch_cmos_timer(void)
380 272
381static long clock_cmos_diff, sleep_start; 273static long clock_cmos_diff, sleep_start;
382 274
383static struct timer_opts *last_timer;
384static int timer_suspend(struct sys_device *dev, pm_message_t state) 275static int timer_suspend(struct sys_device *dev, pm_message_t state)
385{ 276{
386 /* 277 /*
@@ -389,10 +280,6 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state)
389 clock_cmos_diff = -get_cmos_time(); 280 clock_cmos_diff = -get_cmos_time();
390 clock_cmos_diff += get_seconds(); 281 clock_cmos_diff += get_seconds();
391 sleep_start = get_cmos_time(); 282 sleep_start = get_cmos_time();
392 last_timer = cur_timer;
393 cur_timer = &timer_none;
394 if (last_timer->suspend)
395 last_timer->suspend(state);
396 return 0; 283 return 0;
397} 284}
398 285
@@ -415,10 +302,6 @@ static int timer_resume(struct sys_device *dev)
415 jiffies_64 += sleep_length; 302 jiffies_64 += sleep_length;
416 wall_jiffies += sleep_length; 303 wall_jiffies += sleep_length;
417 write_sequnlock_irqrestore(&xtime_lock, flags); 304 write_sequnlock_irqrestore(&xtime_lock, flags);
418 if (last_timer->resume)
419 last_timer->resume();
420 cur_timer = last_timer;
421 last_timer = NULL;
422 touch_softlockup_watchdog(); 305 touch_softlockup_watchdog();
423 return 0; 306 return 0;
424} 307}
@@ -460,9 +343,6 @@ static void __init hpet_time_init(void)
460 printk("Using HPET for base-timer\n"); 343 printk("Using HPET for base-timer\n");
461 } 344 }
462 345
463 cur_timer = select_timer();
464 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
465
466 time_init_hook(); 346 time_init_hook();
467} 347}
468#endif 348#endif
@@ -484,8 +364,5 @@ void __init time_init(void)
484 set_normalized_timespec(&wall_to_monotonic, 364 set_normalized_timespec(&wall_to_monotonic,
485 -xtime.tv_sec, -xtime.tv_nsec); 365 -xtime.tv_sec, -xtime.tv_nsec);
486 366
487 cur_timer = select_timer();
488 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
489
490 time_init_hook(); 367 time_init_hook();
491} 368}
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile
deleted file mode 100644
index 8fa12be658dd..000000000000
--- a/arch/i386/kernel/timers/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
1#
2# Makefile for x86 timers
3#
4
5obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
6
7obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
8obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
9obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
deleted file mode 100644
index 8163fe0cf1f0..000000000000
--- a/arch/i386/kernel/timers/common.c
+++ /dev/null
@@ -1,172 +0,0 @@
1/*
2 * Common functions used across the timers go here
3 */
4
5#include <linux/init.h>
6#include <linux/timex.h>
7#include <linux/errno.h>
8#include <linux/jiffies.h>
9#include <linux/module.h>
10
11#include <asm/io.h>
12#include <asm/timer.h>
13#include <asm/hpet.h>
14
15#include "mach_timer.h"
16
17/* ------ Calibrate the TSC -------
18 * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
19 * Too much 64-bit arithmetic here to do this cleanly in C, and for
20 * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
21 * output busy loop as low as possible. We avoid reading the CTC registers
22 * directly because of the awkward 8-bit access mechanism of the 82C54
23 * device.
24 */
25
26#define CALIBRATE_TIME (5 * 1000020/HZ)
27
28unsigned long calibrate_tsc(void)
29{
30 mach_prepare_counter();
31
32 {
33 unsigned long startlow, starthigh;
34 unsigned long endlow, endhigh;
35 unsigned long count;
36
37 rdtsc(startlow,starthigh);
38 mach_countup(&count);
39 rdtsc(endlow,endhigh);
40
41
42 /* Error: ECTCNEVERSET */
43 if (count <= 1)
44 goto bad_ctc;
45
46 /* 64-bit subtract - gcc just messes up with long longs */
47 __asm__("subl %2,%0\n\t"
48 "sbbl %3,%1"
49 :"=a" (endlow), "=d" (endhigh)
50 :"g" (startlow), "g" (starthigh),
51 "0" (endlow), "1" (endhigh));
52
53 /* Error: ECPUTOOFAST */
54 if (endhigh)
55 goto bad_ctc;
56
57 /* Error: ECPUTOOSLOW */
58 if (endlow <= CALIBRATE_TIME)
59 goto bad_ctc;
60
61 __asm__("divl %2"
62 :"=a" (endlow), "=d" (endhigh)
63 :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
64
65 return endlow;
66 }
67
68 /*
69 * The CTC wasn't reliable: we got a hit on the very first read,
70 * or the CPU was so fast/slow that the quotient wouldn't fit in
71 * 32 bits..
72 */
73bad_ctc:
74 return 0;
75}
76
77#ifdef CONFIG_HPET_TIMER
78/* ------ Calibrate the TSC using HPET -------
79 * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
80 * Second output is parameter 1 (when non NULL)
81 * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
82 * calibrate_tsc() calibrates the processor TSC by comparing
83 * it to the HPET timer of known frequency.
84 * Too much 64-bit arithmetic here to do this cleanly in C
85 */
86#define CALIBRATE_CNT_HPET (5 * hpet_tick)
87#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
88
89unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
90{
91 unsigned long tsc_startlow, tsc_starthigh;
92 unsigned long tsc_endlow, tsc_endhigh;
93 unsigned long hpet_start, hpet_end;
94 unsigned long result, remain;
95
96 hpet_start = hpet_readl(HPET_COUNTER);
97 rdtsc(tsc_startlow, tsc_starthigh);
98 do {
99 hpet_end = hpet_readl(HPET_COUNTER);
100 } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
101 rdtsc(tsc_endlow, tsc_endhigh);
102
103 /* 64-bit subtract - gcc just messes up with long longs */
104 __asm__("subl %2,%0\n\t"
105 "sbbl %3,%1"
106 :"=a" (tsc_endlow), "=d" (tsc_endhigh)
107 :"g" (tsc_startlow), "g" (tsc_starthigh),
108 "0" (tsc_endlow), "1" (tsc_endhigh));
109
110 /* Error: ECPUTOOFAST */
111 if (tsc_endhigh)
112 goto bad_calibration;
113
114 /* Error: ECPUTOOSLOW */
115 if (tsc_endlow <= CALIBRATE_TIME_HPET)
116 goto bad_calibration;
117
118 ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
119 if (remain > (tsc_endlow >> 1))
120 result++; /* rounding the result */
121
122 if (tsc_hpet_quotient_ptr) {
123 unsigned long tsc_hpet_quotient;
124
125 ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
126 CALIBRATE_CNT_HPET);
127 if (remain > (tsc_endlow >> 1))
128 tsc_hpet_quotient++; /* rounding the result */
129 *tsc_hpet_quotient_ptr = tsc_hpet_quotient;
130 }
131
132 return result;
133bad_calibration:
134 /*
135 * the CPU was so fast/slow that the quotient wouldn't fit in
136 * 32 bits..
137 */
138 return 0;
139}
140#endif
141
142
143unsigned long read_timer_tsc(void)
144{
145 unsigned long retval;
146 rdtscl(retval);
147 return retval;
148}
149
150
151/* calculate cpu_khz */
152void init_cpu_khz(void)
153{
154 if (cpu_has_tsc) {
155 unsigned long tsc_quotient = calibrate_tsc();
156 if (tsc_quotient) {
157 /* report CPU clock rate in Hz.
158 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
159 * clock/second. Our precision is about 100 ppm.
160 */
161 { unsigned long eax=0, edx=1000;
162 __asm__("divl %2"
163 :"=a" (cpu_khz), "=d" (edx)
164 :"r" (tsc_quotient),
165 "0" (eax), "1" (edx));
166 printk("Detected %u.%03u MHz processor.\n",
167 cpu_khz / 1000, cpu_khz % 1000);
168 }
169 }
170 }
171}
172
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
deleted file mode 100644
index 7e39ed8e33f8..000000000000
--- a/arch/i386/kernel/timers/timer.c
+++ /dev/null
@@ -1,75 +0,0 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/string.h>
4#include <asm/timer.h>
5
6#ifdef CONFIG_HPET_TIMER
7/*
8 * HPET memory read is slower than tsc reads, but is more dependable as it
9 * always runs at constant frequency and reduces complexity due to
10 * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
11 * timer_pit when HPET is active. So, we default to timer_tsc.
12 */
13#endif
14/* list of timers, ordered by preference, NULL terminated */
15static struct init_timer_opts* __initdata timers[] = {
16#ifdef CONFIG_X86_CYCLONE_TIMER
17 &timer_cyclone_init,
18#endif
19#ifdef CONFIG_HPET_TIMER
20 &timer_hpet_init,
21#endif
22#ifdef CONFIG_X86_PM_TIMER
23 &timer_pmtmr_init,
24#endif
25 &timer_tsc_init,
26 &timer_pit_init,
27 NULL,
28};
29
30static char clock_override[10] __initdata;
31
32static int __init clock_setup(char* str)
33{
34 if (str)
35 strlcpy(clock_override, str, sizeof(clock_override));
36 return 1;
37}
38__setup("clock=", clock_setup);
39
40
41/* The chosen timesource has been found to be bad.
42 * Fall back to a known good timesource (the PIT)
43 */
44void clock_fallback(void)
45{
46 cur_timer = &timer_pit;
47}
48
49/* iterates through the list of timers, returning the first
50 * one that initializes successfully.
51 */
52struct timer_opts* __init select_timer(void)
53{
54 int i = 0;
55
56 /* find most preferred working timer */
57 while (timers[i]) {
58 if (timers[i]->init)
59 if (timers[i]->init(clock_override) == 0)
60 return timers[i]->opts;
61 ++i;
62 }
63
64 panic("select_timer: Cannot find a suitable timer\n");
65 return NULL;
66}
67
68int read_current_timer(unsigned long *timer_val)
69{
70 if (cur_timer->read_timer) {
71 *timer_val = cur_timer->read_timer();
72 return 0;
73 }
74 return -1;
75}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
deleted file mode 100644
index 13892a65c941..000000000000
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ /dev/null
@@ -1,259 +0,0 @@
1/* Cyclone-timer:
2 * This code implements timer_ops for the cyclone counter found
3 * on IBM x440, x360, and other Summit based systems.
4 *
5 * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
6 */
7
8
9#include <linux/spinlock.h>
10#include <linux/init.h>
11#include <linux/timex.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/jiffies.h>
15
16#include <asm/timer.h>
17#include <asm/io.h>
18#include <asm/pgtable.h>
19#include <asm/fixmap.h>
20#include <asm/i8253.h>
21
22#include "io_ports.h"
23
24/* Number of usecs that the last interrupt was delayed */
25static int delay_at_last_interrupt;
26
27#define CYCLONE_CBAR_ADDR 0xFEB00CD0
28#define CYCLONE_PMCC_OFFSET 0x51A0
29#define CYCLONE_MPMC_OFFSET 0x51D0
30#define CYCLONE_MPCS_OFFSET 0x51A8
31#define CYCLONE_TIMER_FREQ 100000000
32#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
33int use_cyclone = 0;
34
35static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
36static u32 last_cyclone_low;
37static u32 last_cyclone_high;
38static unsigned long long monotonic_base;
39static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
40
41/* helper macro to atomically read both cyclone counter registers */
42#define read_cyclone_counter(low,high) \
43 do{ \
44 high = cyclone_timer[1]; low = cyclone_timer[0]; \
45 } while (high != cyclone_timer[1]);
46
47
48static void mark_offset_cyclone(void)
49{
50 unsigned long lost, delay;
51 unsigned long delta = last_cyclone_low;
52 int count;
53 unsigned long long this_offset, last_offset;
54
55 write_seqlock(&monotonic_lock);
56 last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
57
58 spin_lock(&i8253_lock);
59 read_cyclone_counter(last_cyclone_low,last_cyclone_high);
60
61 /* read values for delay_at_last_interrupt */
62 outb_p(0x00, 0x43); /* latch the count ASAP */
63
64 count = inb_p(0x40); /* read the latched count */
65 count |= inb(0x40) << 8;
66
67 /*
68 * VIA686a test code... reset the latch if count > max + 1
69 * from timer_pit.c - cjb
70 */
71 if (count > LATCH) {
72 outb_p(0x34, PIT_MODE);
73 outb_p(LATCH & 0xff, PIT_CH0);
74 outb(LATCH >> 8, PIT_CH0);
75 count = LATCH - 1;
76 }
77 spin_unlock(&i8253_lock);
78
79 /* lost tick compensation */
80 delta = last_cyclone_low - delta;
81 delta /= (CYCLONE_TIMER_FREQ/1000000);
82 delta += delay_at_last_interrupt;
83 lost = delta/(1000000/HZ);
84 delay = delta%(1000000/HZ);
85 if (lost >= 2)
86 jiffies_64 += lost-1;
87
88 /* update the monotonic base value */
89 this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
90 monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
91 write_sequnlock(&monotonic_lock);
92
93 /* calculate delay_at_last_interrupt */
94 count = ((LATCH-1) - count) * TICK_SIZE;
95 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
96
97
98 /* catch corner case where tick rollover occured
99 * between cyclone and pit reads (as noted when
100 * usec delta is > 90% # of usecs/tick)
101 */
102 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
103 jiffies_64++;
104}
105
106static unsigned long get_offset_cyclone(void)
107{
108 u32 offset;
109
110 if(!cyclone_timer)
111 return delay_at_last_interrupt;
112
113 /* Read the cyclone timer */
114 offset = cyclone_timer[0];
115
116 /* .. relative to previous jiffy */
117 offset = offset - last_cyclone_low;
118
119 /* convert cyclone ticks to microseconds */
120 /* XXX slow, can we speed this up? */
121 offset = offset/(CYCLONE_TIMER_FREQ/1000000);
122
123 /* our adjusted time offset in microseconds */
124 return delay_at_last_interrupt + offset;
125}
126
127static unsigned long long monotonic_clock_cyclone(void)
128{
129 u32 now_low, now_high;
130 unsigned long long last_offset, this_offset, base;
131 unsigned long long ret;
132 unsigned seq;
133
134 /* atomically read monotonic base & last_offset */
135 do {
136 seq = read_seqbegin(&monotonic_lock);
137 last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
138 base = monotonic_base;
139 } while (read_seqretry(&monotonic_lock, seq));
140
141
142 /* Read the cyclone counter */
143 read_cyclone_counter(now_low,now_high);
144 this_offset = ((unsigned long long)now_high<<32)|now_low;
145
146 /* convert to nanoseconds */
147 ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
148 return ret * (1000000000 / CYCLONE_TIMER_FREQ);
149}
150
151static int __init init_cyclone(char* override)
152{
153 u32* reg;
154 u32 base; /* saved cyclone base address */
155 u32 pageaddr; /* page that contains cyclone_timer register */
156 u32 offset; /* offset from pageaddr to cyclone_timer register */
157 int i;
158
159 /* check clock override */
160 if (override[0] && strncmp(override,"cyclone",7))
161 return -ENODEV;
162
163 /*make sure we're on a summit box*/
164 if(!use_cyclone) return -ENODEV;
165
166 printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
167
168 /* find base address */
169 pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
170 offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
171 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
172 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
173 if(!reg){
174 printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
175 return -ENODEV;
176 }
177 base = *reg;
178 if(!base){
179 printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
180 return -ENODEV;
181 }
182
183 /* setup PMCC */
184 pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
185 offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
186 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
187 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
188 if(!reg){
189 printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
190 return -ENODEV;
191 }
192 reg[0] = 0x00000001;
193
194 /* setup MPCS */
195 pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
196 offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
197 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
198 reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
199 if(!reg){
200 printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
201 return -ENODEV;
202 }
203 reg[0] = 0x00000001;
204
205 /* map in cyclone_timer */
206 pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
207 offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
208 set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
209 cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
210 if(!cyclone_timer){
211 printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
212 return -ENODEV;
213 }
214
215 /*quick test to make sure its ticking*/
216 for(i=0; i<3; i++){
217 u32 old = cyclone_timer[0];
218 int stall = 100;
219 while(stall--) barrier();
220 if(cyclone_timer[0] == old){
221 printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
222 cyclone_timer = 0;
223 return -ENODEV;
224 }
225 }
226
227 init_cpu_khz();
228
229 /* Everything looks good! */
230 return 0;
231}
232
233
234static void delay_cyclone(unsigned long loops)
235{
236 unsigned long bclock, now;
237 if(!cyclone_timer)
238 return;
239 bclock = cyclone_timer[0];
240 do {
241 rep_nop();
242 now = cyclone_timer[0];
243 } while ((now-bclock) < loops);
244}
245/************************************************************/
246
247/* cyclone timer_opts struct */
248static struct timer_opts timer_cyclone = {
249 .name = "cyclone",
250 .mark_offset = mark_offset_cyclone,
251 .get_offset = get_offset_cyclone,
252 .monotonic_clock = monotonic_clock_cyclone,
253 .delay = delay_cyclone,
254};
255
256struct init_timer_opts __initdata timer_cyclone_init = {
257 .init = init_cyclone,
258 .opts = &timer_cyclone,
259};
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
deleted file mode 100644
index 17a6fe7166e7..000000000000
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ /dev/null
@@ -1,217 +0,0 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 */
5
6#include <linux/spinlock.h>
7#include <linux/init.h>
8#include <linux/timex.h>
9#include <linux/errno.h>
10#include <linux/string.h>
11#include <linux/jiffies.h>
12
13#include <asm/timer.h>
14#include <asm/io.h>
15#include <asm/processor.h>
16
17#include "io_ports.h"
18#include "mach_timer.h"
19#include <asm/hpet.h>
20
21static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */
22static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */
23static unsigned long hpet_last; /* hpet counter value at last tick*/
24static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
25static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
26static unsigned long long monotonic_base;
27static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
28
29/* convert from cycles(64bits) => nanoseconds (64bits)
30 * basic equation:
31 * ns = cycles / (freq / ns_per_sec)
32 * ns = cycles * (ns_per_sec / freq)
33 * ns = cycles * (10^9 / (cpu_khz * 10^3))
34 * ns = cycles * (10^6 / cpu_khz)
35 *
36 * Then we use scaling math (suggested by george@mvista.com) to get:
37 * ns = cycles * (10^6 * SC / cpu_khz) / SC
38 * ns = cycles * cyc2ns_scale / SC
39 *
40 * And since SC is a constant power of two, we can convert the div
41 * into a shift.
42 *
43 * We can use khz divisor instead of mhz to keep a better percision, since
44 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
45 * (mathieu.desnoyers@polymtl.ca)
46 *
47 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
48 */
49static unsigned long cyc2ns_scale __read_mostly;
50#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
51
52static inline void set_cyc2ns_scale(unsigned long cpu_khz)
53{
54 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
55}
56
57static inline unsigned long long cycles_2_ns(unsigned long long cyc)
58{
59 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
60}
61
62static unsigned long long monotonic_clock_hpet(void)
63{
64 unsigned long long last_offset, this_offset, base;
65 unsigned seq;
66
67 /* atomically read monotonic base & last_offset */
68 do {
69 seq = read_seqbegin(&monotonic_lock);
70 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
71 base = monotonic_base;
72 } while (read_seqretry(&monotonic_lock, seq));
73
74 /* Read the Time Stamp Counter */
75 rdtscll(this_offset);
76
77 /* return the value in ns */
78 return base + cycles_2_ns(this_offset - last_offset);
79}
80
81static unsigned long get_offset_hpet(void)
82{
83 register unsigned long eax, edx;
84
85 eax = hpet_readl(HPET_COUNTER);
86 eax -= hpet_last; /* hpet delta */
87 eax = min(hpet_tick, eax);
88 /*
89 * Time offset = (hpet delta) * ( usecs per HPET clock )
90 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
91 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
92 *
93 * Where,
94 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
95 *
96 * Using a mull instead of a divl saves some cycles in critical path.
97 */
98 ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
99
100 /* our adjusted time offset in microseconds */
101 return edx;
102}
103
104static void mark_offset_hpet(void)
105{
106 unsigned long long this_offset, last_offset;
107 unsigned long offset;
108
109 write_seqlock(&monotonic_lock);
110 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
111 rdtsc(last_tsc_low, last_tsc_high);
112
113 if (hpet_use_timer)
114 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
115 else
116 offset = hpet_readl(HPET_COUNTER);
117 if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
118 int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
119 jiffies_64 += lost_ticks;
120 }
121 hpet_last = offset;
122
123 /* update the monotonic base value */
124 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
125 monotonic_base += cycles_2_ns(this_offset - last_offset);
126 write_sequnlock(&monotonic_lock);
127}
128
129static void delay_hpet(unsigned long loops)
130{
131 unsigned long hpet_start, hpet_end;
132 unsigned long eax;
133
134 /* loops is the number of cpu cycles. Convert it to hpet clocks */
135 ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
136
137 hpet_start = hpet_readl(HPET_COUNTER);
138 do {
139 rep_nop();
140 hpet_end = hpet_readl(HPET_COUNTER);
141 } while ((hpet_end - hpet_start) < (loops));
142}
143
144static struct timer_opts timer_hpet;
145
146static int __init init_hpet(char* override)
147{
148 unsigned long result, remain;
149
150 /* check clock override */
151 if (override[0] && strncmp(override,"hpet",4))
152 return -ENODEV;
153
154 if (!is_hpet_enabled())
155 return -ENODEV;
156
157 printk("Using HPET for gettimeofday\n");
158 if (cpu_has_tsc) {
159 unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
160 if (tsc_quotient) {
161 /* report CPU clock rate in Hz.
162 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
163 * clock/second. Our precision is about 100 ppm.
164 */
165 { unsigned long eax=0, edx=1000;
166 ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
167 eax, edx);
168 printk("Detected %u.%03u MHz processor.\n",
169 cpu_khz / 1000, cpu_khz % 1000);
170 }
171 set_cyc2ns_scale(cpu_khz);
172 }
173 /* set this only when cpu_has_tsc */
174 timer_hpet.read_timer = read_timer_tsc;
175 }
176
177 /*
178 * Math to calculate hpet to usec multiplier
179 * Look for the comments at get_offset_hpet()
180 */
181 ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
182 if (remain > (hpet_tick >> 1))
183 result++; /* rounding the result */
184 hpet_usec_quotient = result;
185
186 return 0;
187}
188
189static int hpet_resume(void)
190{
191 write_seqlock(&monotonic_lock);
192 /* Assume this is the last mark offset time */
193 rdtsc(last_tsc_low, last_tsc_high);
194
195 if (hpet_use_timer)
196 hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
197 else
198 hpet_last = hpet_readl(HPET_COUNTER);
199 write_sequnlock(&monotonic_lock);
200 return 0;
201}
202/************************************************************/
203
204/* tsc timer_opts struct */
205static struct timer_opts timer_hpet __read_mostly = {
206 .name = "hpet",
207 .mark_offset = mark_offset_hpet,
208 .get_offset = get_offset_hpet,
209 .monotonic_clock = monotonic_clock_hpet,
210 .delay = delay_hpet,
211 .resume = hpet_resume,
212};
213
214struct init_timer_opts __initdata timer_hpet_init = {
215 .init = init_hpet,
216 .opts = &timer_hpet,
217};
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
deleted file mode 100644
index 4ea2f414dbbd..000000000000
--- a/arch/i386/kernel/timers/timer_none.c
+++ /dev/null
@@ -1,39 +0,0 @@
1#include <linux/init.h>
2#include <asm/timer.h>
3
4static void mark_offset_none(void)
5{
6 /* nothing needed */
7}
8
9static unsigned long get_offset_none(void)
10{
11 return 0;
12}
13
14static unsigned long long monotonic_clock_none(void)
15{
16 return 0;
17}
18
19static void delay_none(unsigned long loops)
20{
21 int d0;
22 __asm__ __volatile__(
23 "\tjmp 1f\n"
24 ".align 16\n"
25 "1:\tjmp 2f\n"
26 ".align 16\n"
27 "2:\tdecl %0\n\tjns 2b"
28 :"=&a" (d0)
29 :"0" (loops));
30}
31
32/* none timer_opts struct */
33struct timer_opts timer_none = {
34 .name = "none",
35 .mark_offset = mark_offset_none,
36 .get_offset = get_offset_none,
37 .monotonic_clock = monotonic_clock_none,
38 .delay = delay_none,
39};
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
deleted file mode 100644
index b9b6bd56b9ba..000000000000
--- a/arch/i386/kernel/timers/timer_pit.c
+++ /dev/null
@@ -1,177 +0,0 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 */
5
6#include <linux/spinlock.h>
7#include <linux/module.h>
8#include <linux/device.h>
9#include <linux/sysdev.h>
10#include <linux/timex.h>
11#include <asm/delay.h>
12#include <asm/mpspec.h>
13#include <asm/timer.h>
14#include <asm/smp.h>
15#include <asm/io.h>
16#include <asm/arch_hooks.h>
17#include <asm/i8253.h>
18
19#include "do_timer.h"
20#include "io_ports.h"
21
22static int count_p; /* counter in get_offset_pit() */
23
24static int __init init_pit(char* override)
25{
26 /* check clock override */
27 if (override[0] && strncmp(override,"pit",3))
28 printk(KERN_ERR "Warning: clock= override failed. Defaulting "
29 "to PIT\n");
30 init_cpu_khz();
31 count_p = LATCH;
32 return 0;
33}
34
35static void mark_offset_pit(void)
36{
37 /* nothing needed */
38}
39
40static unsigned long long monotonic_clock_pit(void)
41{
42 return 0;
43}
44
45static void delay_pit(unsigned long loops)
46{
47 int d0;
48 __asm__ __volatile__(
49 "\tjmp 1f\n"
50 ".align 16\n"
51 "1:\tjmp 2f\n"
52 ".align 16\n"
53 "2:\tdecl %0\n\tjns 2b"
54 :"=&a" (d0)
55 :"0" (loops));
56}
57
58
59/* This function must be called with xtime_lock held.
60 * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
61 *
62 * However, the pc-audio speaker driver changes the divisor so that
63 * it gets interrupted rather more often - it loads 64 into the
64 * counter rather than 11932! This has an adverse impact on
65 * do_gettimeoffset() -- it stops working! What is also not
66 * good is that the interval that our timer function gets called
67 * is no longer 10.0002 ms, but 9.9767 ms. To get around this
68 * would require using a different timing source. Maybe someone
69 * could use the RTC - I know that this can interrupt at frequencies
70 * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
71 * it so that at startup, the timer code in sched.c would select
72 * using either the RTC or the 8253 timer. The decision would be
73 * based on whether there was any other device around that needed
74 * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
75 * and then do some jiggery to have a version of do_timer that
76 * advanced the clock by 1/1024 s. Every time that reached over 1/100
77 * of a second, then do all the old code. If the time was kept correct
78 * then do_gettimeoffset could just return 0 - there is no low order
79 * divider that can be accessed.
80 *
81 * Ideally, you would be able to use the RTC for the speaker driver,
82 * but it appears that the speaker driver really needs interrupt more
83 * often than every 120 us or so.
84 *
85 * Anyway, this needs more thought.... pjsg (1993-08-28)
86 *
87 * If you are really that interested, you should be reading
88 * comp.protocols.time.ntp!
89 */
90
91static unsigned long get_offset_pit(void)
92{
93 int count;
94 unsigned long flags;
95 static unsigned long jiffies_p = 0;
96
97 /*
98 * cache volatile jiffies temporarily; we have xtime_lock.
99 */
100 unsigned long jiffies_t;
101
102 spin_lock_irqsave(&i8253_lock, flags);
103 /* timer count may underflow right here */
104 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
105
106 count = inb_p(PIT_CH0); /* read the latched count */
107
108 /*
109 * We do this guaranteed double memory access instead of a _p
110 * postfix in the previous port access. Wheee, hackady hack
111 */
112 jiffies_t = jiffies;
113
114 count |= inb_p(PIT_CH0) << 8;
115
116 /* VIA686a test code... reset the latch if count > max + 1 */
117 if (count > LATCH) {
118 outb_p(0x34, PIT_MODE);
119 outb_p(LATCH & 0xff, PIT_CH0);
120 outb(LATCH >> 8, PIT_CH0);
121 count = LATCH - 1;
122 }
123
124 /*
125 * avoiding timer inconsistencies (they are rare, but they happen)...
126 * there are two kinds of problems that must be avoided here:
127 * 1. the timer counter underflows
128 * 2. hardware problem with the timer, not giving us continuous time,
129 * the counter does small "jumps" upwards on some Pentium systems,
130 * (see c't 95/10 page 335 for Neptun bug.)
131 */
132
133 if( jiffies_t == jiffies_p ) {
134 if( count > count_p ) {
135 /* the nutcase */
136 count = do_timer_overflow(count);
137 }
138 } else
139 jiffies_p = jiffies_t;
140
141 count_p = count;
142
143 spin_unlock_irqrestore(&i8253_lock, flags);
144
145 count = ((LATCH-1) - count) * TICK_SIZE;
146 count = (count + LATCH/2) / LATCH;
147
148 return count;
149}
150
151
152/* tsc timer_opts struct */
153struct timer_opts timer_pit = {
154 .name = "pit",
155 .mark_offset = mark_offset_pit,
156 .get_offset = get_offset_pit,
157 .monotonic_clock = monotonic_clock_pit,
158 .delay = delay_pit,
159};
160
161struct init_timer_opts __initdata timer_pit_init = {
162 .init = init_pit,
163 .opts = &timer_pit,
164};
165
166void setup_pit_timer(void)
167{
168 unsigned long flags;
169
170 spin_lock_irqsave(&i8253_lock, flags);
171 outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
172 udelay(10);
173 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
174 udelay(10);
175 outb(LATCH >> 8 , PIT_CH0); /* MSB */
176 spin_unlock_irqrestore(&i8253_lock, flags);
177}
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
deleted file mode 100644
index 144e94a04933..000000000000
--- a/arch/i386/kernel/timers/timer_pm.c
+++ /dev/null
@@ -1,342 +0,0 @@
1/*
2 * (C) Dominik Brodowski <linux@brodo.de> 2003
3 *
4 * Driver to use the Power Management Timer (PMTMR) available in some
5 * southbridges as primary timing source for the Linux kernel.
6 *
7 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
8 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
9 *
10 * This file is licensed under the GPL v2.
11 */
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/device.h>
17#include <linux/init.h>
18#include <linux/pci.h>
19#include <asm/types.h>
20#include <asm/timer.h>
21#include <asm/smp.h>
22#include <asm/io.h>
23#include <asm/arch_hooks.h>
24
25#include <linux/timex.h>
26#include "mach_timer.h"
27
28/* Number of PMTMR ticks expected during calibration run */
29#define PMTMR_TICKS_PER_SEC 3579545
30#define PMTMR_EXPECTED_RATE \
31 ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
32
33
34/* The I/O port the PMTMR resides at.
35 * The location is detected during setup_arch(),
36 * in arch/i386/acpi/boot.c */
37u32 pmtmr_ioport = 0;
38
39
40/* value of the Power timer at last timer interrupt */
41static u32 offset_tick;
42static u32 offset_delay;
43
44static unsigned long long monotonic_base;
45static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
46
47#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
48
49static int pmtmr_need_workaround __read_mostly = 1;
50
51/*helper function to safely read acpi pm timesource*/
52static inline u32 read_pmtmr(void)
53{
54 if (pmtmr_need_workaround) {
55 u32 v1, v2, v3;
56
57 /* It has been reported that because of various broken
58 * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
59 * source is not latched, so you must read it multiple
60 * times to insure a safe value is read.
61 */
62 do {
63 v1 = inl(pmtmr_ioport);
64 v2 = inl(pmtmr_ioport);
65 v3 = inl(pmtmr_ioport);
66 } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
67 || (v3 > v1 && v3 < v2));
68
69 /* mask the output to 24 bits */
70 return v2 & ACPI_PM_MASK;
71 }
72
73 return inl(pmtmr_ioport) & ACPI_PM_MASK;
74}
75
76
77/*
78 * Some boards have the PMTMR running way too fast. We check
79 * the PMTMR rate against PIT channel 2 to catch these cases.
80 */
81static int verify_pmtmr_rate(void)
82{
83 u32 value1, value2;
84 unsigned long count, delta;
85
86 mach_prepare_counter();
87 value1 = read_pmtmr();
88 mach_countup(&count);
89 value2 = read_pmtmr();
90 delta = (value2 - value1) & ACPI_PM_MASK;
91
92 /* Check that the PMTMR delta is within 5% of what we expect */
93 if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
94 delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
95 printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
96 return -1;
97 }
98
99 return 0;
100}
101
102
103static int init_pmtmr(char* override)
104{
105 u32 value1, value2;
106 unsigned int i;
107
108 if (override[0] && strncmp(override,"pmtmr",5))
109 return -ENODEV;
110
111 if (!pmtmr_ioport)
112 return -ENODEV;
113
114 /* we use the TSC for delay_pmtmr, so make sure it exists */
115 if (!cpu_has_tsc)
116 return -ENODEV;
117
118 /* "verify" this timing source */
119 value1 = read_pmtmr();
120 for (i = 0; i < 10000; i++) {
121 value2 = read_pmtmr();
122 if (value2 == value1)
123 continue;
124 if (value2 > value1)
125 goto pm_good;
126 if ((value2 < value1) && ((value2) < 0xFFF))
127 goto pm_good;
128 printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
129 return -EINVAL;
130 }
131 printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
132 return -ENODEV;
133
134pm_good:
135 if (verify_pmtmr_rate() != 0)
136 return -ENODEV;
137
138 init_cpu_khz();
139 return 0;
140}
141
142static inline u32 cyc2us(u32 cycles)
143{
144 /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
145 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
146 *
147 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
148 * easily be multiplied with 286 (=0x11E) without having to fear
149 * u32 overflows.
150 */
151 cycles *= 286;
152 return (cycles >> 10);
153}
154
155/*
156 * this gets called during each timer interrupt
157 * - Called while holding the writer xtime_lock
158 */
159static void mark_offset_pmtmr(void)
160{
161 u32 lost, delta, last_offset;
162 static int first_run = 1;
163 last_offset = offset_tick;
164
165 write_seqlock(&monotonic_lock);
166
167 offset_tick = read_pmtmr();
168
169 /* calculate tick interval */
170 delta = (offset_tick - last_offset) & ACPI_PM_MASK;
171
172 /* convert to usecs */
173 delta = cyc2us(delta);
174
175 /* update the monotonic base value */
176 monotonic_base += delta * NSEC_PER_USEC;
177 write_sequnlock(&monotonic_lock);
178
179 /* convert to ticks */
180 delta += offset_delay;
181 lost = delta / (USEC_PER_SEC / HZ);
182 offset_delay = delta % (USEC_PER_SEC / HZ);
183
184
185 /* compensate for lost ticks */
186 if (lost >= 2)
187 jiffies_64 += lost - 1;
188
189 /* don't calculate delay for first run,
190 or if we've got less then a tick */
191 if (first_run || (lost < 1)) {
192 first_run = 0;
193 offset_delay = 0;
194 }
195}
196
197static int pmtmr_resume(void)
198{
199 write_seqlock(&monotonic_lock);
200 /* Assume this is the last mark offset time */
201 offset_tick = read_pmtmr();
202 write_sequnlock(&monotonic_lock);
203 return 0;
204}
205
206static unsigned long long monotonic_clock_pmtmr(void)
207{
208 u32 last_offset, this_offset;
209 unsigned long long base, ret;
210 unsigned seq;
211
212
213 /* atomically read monotonic base & last_offset */
214 do {
215 seq = read_seqbegin(&monotonic_lock);
216 last_offset = offset_tick;
217 base = monotonic_base;
218 } while (read_seqretry(&monotonic_lock, seq));
219
220 /* Read the pmtmr */
221 this_offset = read_pmtmr();
222
223 /* convert to nanoseconds */
224 ret = (this_offset - last_offset) & ACPI_PM_MASK;
225 ret = base + (cyc2us(ret) * NSEC_PER_USEC);
226 return ret;
227}
228
229static void delay_pmtmr(unsigned long loops)
230{
231 unsigned long bclock, now;
232
233 rdtscl(bclock);
234 do
235 {
236 rep_nop();
237 rdtscl(now);
238 } while ((now-bclock) < loops);
239}
240
241
242/*
243 * get the offset (in microseconds) from the last call to mark_offset()
244 * - Called holding a reader xtime_lock
245 */
246static unsigned long get_offset_pmtmr(void)
247{
248 u32 now, offset, delta = 0;
249
250 offset = offset_tick;
251 now = read_pmtmr();
252 delta = (now - offset)&ACPI_PM_MASK;
253
254 return (unsigned long) offset_delay + cyc2us(delta);
255}
256
257
258/* acpi timer_opts struct */
259static struct timer_opts timer_pmtmr = {
260 .name = "pmtmr",
261 .mark_offset = mark_offset_pmtmr,
262 .get_offset = get_offset_pmtmr,
263 .monotonic_clock = monotonic_clock_pmtmr,
264 .delay = delay_pmtmr,
265 .read_timer = read_timer_tsc,
266 .resume = pmtmr_resume,
267};
268
269struct init_timer_opts __initdata timer_pmtmr_init = {
270 .init = init_pmtmr,
271 .opts = &timer_pmtmr,
272};
273
274#ifdef CONFIG_PCI
275/*
276 * PIIX4 Errata:
277 *
278 * The power management timer may return improper results when read.
279 * Although the timer value settles properly after incrementing,
280 * while incrementing there is a 3 ns window every 69.8 ns where the
281 * timer value is indeterminate (a 4.2% chance that the data will be
282 * incorrect when read). As a result, the ACPI free running count up
283 * timer specification is violated due to erroneous reads.
284 */
285static int __init pmtmr_bug_check(void)
286{
287 static struct pci_device_id gray_list[] __initdata = {
288 /* these chipsets may have bug. */
289 { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
290 PCI_DEVICE_ID_INTEL_82801DB_0) },
291 { },
292 };
293 struct pci_dev *dev;
294 int pmtmr_has_bug = 0;
295 u8 rev;
296
297 if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround)
298 return 0;
299
300 dev = pci_get_device(PCI_VENDOR_ID_INTEL,
301 PCI_DEVICE_ID_INTEL_82371AB_3, NULL);
302 if (dev) {
303 pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
304 /* the bug has been fixed in PIIX4M */
305 if (rev < 3) {
306 printk(KERN_WARNING "* Found PM-Timer Bug on this "
307 "chipset. Due to workarounds for a bug,\n"
308 "* this time source is slow. Consider trying "
309 "other time sources (clock=)\n");
310 pmtmr_has_bug = 1;
311 }
312 pci_dev_put(dev);
313 }
314
315 if (pci_dev_present(gray_list)) {
316 printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due"
317 " to workarounds for a bug,\n"
318 "* this time source is slow. If you are sure your timer"
319 " does not have\n"
320 "* this bug, please use \"pmtmr_good\" to disable the "
321 "workaround\n");
322 pmtmr_has_bug = 1;
323 }
324
325 if (!pmtmr_has_bug)
326 pmtmr_need_workaround = 0;
327
328 return 0;
329}
330device_initcall(pmtmr_bug_check);
331#endif
332
333static int __init pmtr_good_setup(char *__str)
334{
335 pmtmr_need_workaround = 0;
336 return 1;
337}
338__setup("pmtmr_good", pmtr_good_setup);
339
340MODULE_LICENSE("GPL");
341MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
342MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
deleted file mode 100644
index f1187ddb0d0f..000000000000
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ /dev/null
@@ -1,617 +0,0 @@
1/*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 *
5 * 2004-06-25 Jesper Juhl
6 * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7 * failing to inline.
8 */
9
10#include <linux/spinlock.h>
11#include <linux/init.h>
12#include <linux/timex.h>
13#include <linux/errno.h>
14#include <linux/cpufreq.h>
15#include <linux/string.h>
16#include <linux/jiffies.h>
17
18#include <asm/timer.h>
19#include <asm/io.h>
20/* processor.h for distable_tsc flag */
21#include <asm/processor.h>
22
23#include "io_ports.h"
24#include "mach_timer.h"
25
26#include <asm/hpet.h>
27#include <asm/i8253.h>
28
29#ifdef CONFIG_HPET_TIMER
30static unsigned long hpet_usec_quotient;
31static unsigned long hpet_last;
32static struct timer_opts timer_tsc;
33#endif
34
35static inline void cpufreq_delayed_get(void);
36
37int tsc_disable __devinitdata = 0;
38
39static int use_tsc;
40/* Number of usecs that the last interrupt was delayed */
41static int delay_at_last_interrupt;
42
43static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
44static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
45static unsigned long long monotonic_base;
46static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
47
48/* Avoid compensating for lost ticks before TSCs are synched */
49static int detect_lost_ticks;
50static int __init start_lost_tick_compensation(void)
51{
52 detect_lost_ticks = 1;
53 return 0;
54}
55late_initcall(start_lost_tick_compensation);
56
57/* convert from cycles(64bits) => nanoseconds (64bits)
58 * basic equation:
59 * ns = cycles / (freq / ns_per_sec)
60 * ns = cycles * (ns_per_sec / freq)
61 * ns = cycles * (10^9 / (cpu_khz * 10^3))
62 * ns = cycles * (10^6 / cpu_khz)
63 *
64 * Then we use scaling math (suggested by george@mvista.com) to get:
65 * ns = cycles * (10^6 * SC / cpu_khz) / SC
66 * ns = cycles * cyc2ns_scale / SC
67 *
68 * And since SC is a constant power of two, we can convert the div
69 * into a shift.
70 *
71 * We can use khz divisor instead of mhz to keep a better percision, since
72 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
73 * (mathieu.desnoyers@polymtl.ca)
74 *
75 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
76 */
77static unsigned long cyc2ns_scale __read_mostly;
78#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
79
80static inline void set_cyc2ns_scale(unsigned long cpu_khz)
81{
82 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
83}
84
85static inline unsigned long long cycles_2_ns(unsigned long long cyc)
86{
87 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
88}
89
90static int count2; /* counter for mark_offset_tsc() */
91
92/* Cached *multiplier* to convert TSC counts to microseconds.
93 * (see the equation below).
94 * Equal to 2^32 * (1 / (clocks per usec) ).
95 * Initialized in time_init.
96 */
97static unsigned long fast_gettimeoffset_quotient;
98
99static unsigned long get_offset_tsc(void)
100{
101 register unsigned long eax, edx;
102
103 /* Read the Time Stamp Counter */
104
105 rdtsc(eax,edx);
106
107 /* .. relative to previous jiffy (32 bits is enough) */
108 eax -= last_tsc_low; /* tsc_low delta */
109
110 /*
111 * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
112 * = (tsc_low delta) * (usecs_per_clock)
113 * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
114 *
115 * Using a mull instead of a divl saves up to 31 clock cycles
116 * in the critical path.
117 */
118
119 __asm__("mull %2"
120 :"=a" (eax), "=d" (edx)
121 :"rm" (fast_gettimeoffset_quotient),
122 "0" (eax));
123
124 /* our adjusted time offset in microseconds */
125 return delay_at_last_interrupt + edx;
126}
127
128static unsigned long long monotonic_clock_tsc(void)
129{
130 unsigned long long last_offset, this_offset, base;
131 unsigned seq;
132
133 /* atomically read monotonic base & last_offset */
134 do {
135 seq = read_seqbegin(&monotonic_lock);
136 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
137 base = monotonic_base;
138 } while (read_seqretry(&monotonic_lock, seq));
139
140 /* Read the Time Stamp Counter */
141 rdtscll(this_offset);
142
143 /* return the value in ns */
144 return base + cycles_2_ns(this_offset - last_offset);
145}
146
147/*
148 * Scheduler clock - returns current time in nanosec units.
149 */
150unsigned long long sched_clock(void)
151{
152 unsigned long long this_offset;
153
154 /*
155 * In the NUMA case we dont use the TSC as they are not
156 * synchronized across all CPUs.
157 */
158#ifndef CONFIG_NUMA
159 if (!use_tsc)
160#endif
161 /* no locking but a rare wrong value is not a big deal */
162 return jiffies_64 * (1000000000 / HZ);
163
164 /* Read the Time Stamp Counter */
165 rdtscll(this_offset);
166
167 /* return the value in ns */
168 return cycles_2_ns(this_offset);
169}
170
171static void delay_tsc(unsigned long loops)
172{
173 unsigned long bclock, now;
174
175 rdtscl(bclock);
176 do
177 {
178 rep_nop();
179 rdtscl(now);
180 } while ((now-bclock) < loops);
181}
182
183#ifdef CONFIG_HPET_TIMER
184static void mark_offset_tsc_hpet(void)
185{
186 unsigned long long this_offset, last_offset;
187 unsigned long offset, temp, hpet_current;
188
189 write_seqlock(&monotonic_lock);
190 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
191 /*
192 * It is important that these two operations happen almost at
193 * the same time. We do the RDTSC stuff first, since it's
194 * faster. To avoid any inconsistencies, we need interrupts
195 * disabled locally.
196 */
197 /*
198 * Interrupts are just disabled locally since the timer irq
199 * has the SA_INTERRUPT flag set. -arca
200 */
201 /* read Pentium cycle counter */
202
203 hpet_current = hpet_readl(HPET_COUNTER);
204 rdtsc(last_tsc_low, last_tsc_high);
205
206 /* lost tick compensation */
207 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
208 if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))
209 && detect_lost_ticks) {
210 int lost_ticks = (offset - hpet_last) / hpet_tick;
211 jiffies_64 += lost_ticks;
212 }
213 hpet_last = hpet_current;
214
215 /* update the monotonic base value */
216 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
217 monotonic_base += cycles_2_ns(this_offset - last_offset);
218 write_sequnlock(&monotonic_lock);
219
220 /* calculate delay_at_last_interrupt */
221 /*
222 * Time offset = (hpet delta) * ( usecs per HPET clock )
223 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
224 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
225 * Where,
226 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
227 */
228 delay_at_last_interrupt = hpet_current - offset;
229 ASM_MUL64_REG(temp, delay_at_last_interrupt,
230 hpet_usec_quotient, delay_at_last_interrupt);
231}
232#endif
233
234
235#ifdef CONFIG_CPU_FREQ
236#include <linux/workqueue.h>
237
238static unsigned int cpufreq_delayed_issched = 0;
239static unsigned int cpufreq_init = 0;
240static struct work_struct cpufreq_delayed_get_work;
241
242static void handle_cpufreq_delayed_get(void *v)
243{
244 unsigned int cpu;
245 for_each_online_cpu(cpu) {
246 cpufreq_get(cpu);
247 }
248 cpufreq_delayed_issched = 0;
249}
250
251/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
252 * to verify the CPU frequency the timing core thinks the CPU is running
253 * at is still correct.
254 */
255static inline void cpufreq_delayed_get(void)
256{
257 if (cpufreq_init && !cpufreq_delayed_issched) {
258 cpufreq_delayed_issched = 1;
259 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
260 schedule_work(&cpufreq_delayed_get_work);
261 }
262}
263
264/* If the CPU frequency is scaled, TSC-based delays will need a different
265 * loops_per_jiffy value to function properly.
266 */
267
268static unsigned int ref_freq = 0;
269static unsigned long loops_per_jiffy_ref = 0;
270
271#ifndef CONFIG_SMP
272static unsigned long fast_gettimeoffset_ref = 0;
273static unsigned int cpu_khz_ref = 0;
274#endif
275
276static int
277time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
278 void *data)
279{
280 struct cpufreq_freqs *freq = data;
281
282 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
283 write_seqlock_irq(&xtime_lock);
284 if (!ref_freq) {
285 if (!freq->old){
286 ref_freq = freq->new;
287 goto end;
288 }
289 ref_freq = freq->old;
290 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
291#ifndef CONFIG_SMP
292 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
293 cpu_khz_ref = cpu_khz;
294#endif
295 }
296
297 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
298 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
299 (val == CPUFREQ_RESUMECHANGE)) {
300 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
301 cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
302#ifndef CONFIG_SMP
303 if (cpu_khz)
304 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
305 if (use_tsc) {
306 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
307 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
308 set_cyc2ns_scale(cpu_khz);
309 }
310 }
311#endif
312 }
313
314end:
315 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
316 write_sequnlock_irq(&xtime_lock);
317
318 return 0;
319}
320
321static struct notifier_block time_cpufreq_notifier_block = {
322 .notifier_call = time_cpufreq_notifier
323};
324
325
326static int __init cpufreq_tsc(void)
327{
328 int ret;
329 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
330 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
331 CPUFREQ_TRANSITION_NOTIFIER);
332 if (!ret)
333 cpufreq_init = 1;
334 return ret;
335}
336core_initcall(cpufreq_tsc);
337
338#else /* CONFIG_CPU_FREQ */
339static inline void cpufreq_delayed_get(void) { return; }
340#endif
341
342int recalibrate_cpu_khz(void)
343{
344#ifndef CONFIG_SMP
345 unsigned int cpu_khz_old = cpu_khz;
346
347 if (cpu_has_tsc) {
348 local_irq_disable();
349 init_cpu_khz();
350 local_irq_enable();
351 cpu_data[0].loops_per_jiffy =
352 cpufreq_scale(cpu_data[0].loops_per_jiffy,
353 cpu_khz_old,
354 cpu_khz);
355 return 0;
356 } else
357 return -ENODEV;
358#else
359 return -ENODEV;
360#endif
361}
362EXPORT_SYMBOL(recalibrate_cpu_khz);
363
364static void mark_offset_tsc(void)
365{
366 unsigned long lost,delay;
367 unsigned long delta = last_tsc_low;
368 int count;
369 int countmp;
370 static int count1 = 0;
371 unsigned long long this_offset, last_offset;
372 static int lost_count = 0;
373
374 write_seqlock(&monotonic_lock);
375 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
376 /*
377 * It is important that these two operations happen almost at
378 * the same time. We do the RDTSC stuff first, since it's
379 * faster. To avoid any inconsistencies, we need interrupts
380 * disabled locally.
381 */
382
383 /*
384 * Interrupts are just disabled locally since the timer irq
385 * has the SA_INTERRUPT flag set. -arca
386 */
387
388 /* read Pentium cycle counter */
389
390 rdtsc(last_tsc_low, last_tsc_high);
391
392 spin_lock(&i8253_lock);
393 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
394
395 count = inb_p(PIT_CH0); /* read the latched count */
396 count |= inb(PIT_CH0) << 8;
397
398 /*
399 * VIA686a test code... reset the latch if count > max + 1
400 * from timer_pit.c - cjb
401 */
402 if (count > LATCH) {
403 outb_p(0x34, PIT_MODE);
404 outb_p(LATCH & 0xff, PIT_CH0);
405 outb(LATCH >> 8, PIT_CH0);
406 count = LATCH - 1;
407 }
408
409 spin_unlock(&i8253_lock);
410
411 if (pit_latch_buggy) {
412 /* get center value of last 3 time lutch */
413 if ((count2 >= count && count >= count1)
414 || (count1 >= count && count >= count2)) {
415 count2 = count1; count1 = count;
416 } else if ((count1 >= count2 && count2 >= count)
417 || (count >= count2 && count2 >= count1)) {
418 countmp = count;count = count2;
419 count2 = count1;count1 = countmp;
420 } else {
421 count2 = count1; count1 = count; count = count1;
422 }
423 }
424
425 /* lost tick compensation */
426 delta = last_tsc_low - delta;
427 {
428 register unsigned long eax, edx;
429 eax = delta;
430 __asm__("mull %2"
431 :"=a" (eax), "=d" (edx)
432 :"rm" (fast_gettimeoffset_quotient),
433 "0" (eax));
434 delta = edx;
435 }
436 delta += delay_at_last_interrupt;
437 lost = delta/(1000000/HZ);
438 delay = delta%(1000000/HZ);
439 if (lost >= 2 && detect_lost_ticks) {
440 jiffies_64 += lost-1;
441
442 /* sanity check to ensure we're not always losing ticks */
443 if (lost_count++ > 100) {
444 printk(KERN_WARNING "Losing too many ticks!\n");
445 printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
446 printk(KERN_WARNING "Possible reasons for this are:\n");
447 printk(KERN_WARNING " You're running with Speedstep,\n");
448 printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
449 printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
450 printk(KERN_WARNING "Falling back to a sane timesource now.\n");
451
452 clock_fallback();
453 }
454 /* ... but give the TSC a fair chance */
455 if (lost_count > 25)
456 cpufreq_delayed_get();
457 } else
458 lost_count = 0;
459 /* update the monotonic base value */
460 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
461 monotonic_base += cycles_2_ns(this_offset - last_offset);
462 write_sequnlock(&monotonic_lock);
463
464 /* calculate delay_at_last_interrupt */
465 count = ((LATCH-1) - count) * TICK_SIZE;
466 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
467
468 /* catch corner case where tick rollover occured
469 * between tsc and pit reads (as noted when
470 * usec delta is > 90% # of usecs/tick)
471 */
472 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
473 jiffies_64++;
474}
475
476static int __init init_tsc(char* override)
477{
478
479 /* check clock override */
480 if (override[0] && strncmp(override,"tsc",3)) {
481#ifdef CONFIG_HPET_TIMER
482 if (is_hpet_enabled()) {
483 printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
484 } else
485#endif
486 {
487 return -ENODEV;
488 }
489 }
490
491 /*
492 * If we have APM enabled or the CPU clock speed is variable
493 * (CPU stops clock on HLT or slows clock to save power)
494 * then the TSC timestamps may diverge by up to 1 jiffy from
495 * 'real time' but nothing will break.
496 * The most frequent case is that the CPU is "woken" from a halt
497 * state by the timer interrupt itself, so we get 0 error. In the
498 * rare cases where a driver would "wake" the CPU and request a
499 * timestamp, the maximum error is < 1 jiffy. But timestamps are
500 * still perfectly ordered.
501 * Note that the TSC counter will be reset if APM suspends
502 * to disk; this won't break the kernel, though, 'cuz we're
503 * smart. See arch/i386/kernel/apm.c.
504 */
505 /*
506 * Firstly we have to do a CPU check for chips with
507 * a potentially buggy TSC. At this point we haven't run
508 * the ident/bugs checks so we must run this hook as it
509 * may turn off the TSC flag.
510 *
511 * NOTE: this doesn't yet handle SMP 486 machines where only
512 * some CPU's have a TSC. Thats never worked and nobody has
513 * moaned if you have the only one in the world - you fix it!
514 */
515
516 count2 = LATCH; /* initialize counter for mark_offset_tsc() */
517
518 if (cpu_has_tsc) {
519 unsigned long tsc_quotient;
520#ifdef CONFIG_HPET_TIMER
521 if (is_hpet_enabled() && hpet_use_timer) {
522 unsigned long result, remain;
523 printk("Using TSC for gettimeofday\n");
524 tsc_quotient = calibrate_tsc_hpet(NULL);
525 timer_tsc.mark_offset = &mark_offset_tsc_hpet;
526 /*
527 * Math to calculate hpet to usec multiplier
528 * Look for the comments at get_offset_tsc_hpet()
529 */
530 ASM_DIV64_REG(result, remain, hpet_tick,
531 0, KERNEL_TICK_USEC);
532 if (remain > (hpet_tick >> 1))
533 result++; /* rounding the result */
534
535 hpet_usec_quotient = result;
536 } else
537#endif
538 {
539 tsc_quotient = calibrate_tsc();
540 }
541
542 if (tsc_quotient) {
543 fast_gettimeoffset_quotient = tsc_quotient;
544 use_tsc = 1;
545 /*
546 * We could be more selective here I suspect
547 * and just enable this for the next intel chips ?
548 */
549 /* report CPU clock rate in Hz.
550 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
551 * clock/second. Our precision is about 100 ppm.
552 */
553 { unsigned long eax=0, edx=1000;
554 __asm__("divl %2"
555 :"=a" (cpu_khz), "=d" (edx)
556 :"r" (tsc_quotient),
557 "0" (eax), "1" (edx));
558 printk("Detected %u.%03u MHz processor.\n",
559 cpu_khz / 1000, cpu_khz % 1000);
560 }
561 set_cyc2ns_scale(cpu_khz);
562 return 0;
563 }
564 }
565 return -ENODEV;
566}
567
568static int tsc_resume(void)
569{
570 write_seqlock(&monotonic_lock);
571 /* Assume this is the last mark offset time */
572 rdtsc(last_tsc_low, last_tsc_high);
573#ifdef CONFIG_HPET_TIMER
574 if (is_hpet_enabled() && hpet_use_timer)
575 hpet_last = hpet_readl(HPET_COUNTER);
576#endif
577 write_sequnlock(&monotonic_lock);
578 return 0;
579}
580
581#ifndef CONFIG_X86_TSC
582/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
583 * in cpu/common.c */
584static int __init tsc_setup(char *str)
585{
586 tsc_disable = 1;
587 return 1;
588}
589#else
590static int __init tsc_setup(char *str)
591{
592 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
593 "cannot disable TSC.\n");
594 return 1;
595}
596#endif
597__setup("notsc", tsc_setup);
598
599
600
601/************************************************************/
602
603/* tsc timer_opts struct */
604static struct timer_opts timer_tsc = {
605 .name = "tsc",
606 .mark_offset = mark_offset_tsc,
607 .get_offset = get_offset_tsc,
608 .monotonic_clock = monotonic_clock_tsc,
609 .delay = delay_tsc,
610 .read_timer = read_timer_tsc,
611 .resume = tsc_resume,
612};
613
614struct init_timer_opts __initdata timer_tsc_init = {
615 .init = init_tsc,
616 .opts = &timer_tsc,
617};
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index dcc14477af1f..78464097470a 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -28,6 +28,7 @@
28#include <linux/utsname.h> 28#include <linux/utsname.h>
29#include <linux/kprobes.h> 29#include <linux/kprobes.h>
30#include <linux/kexec.h> 30#include <linux/kexec.h>
31#include <linux/unwind.h>
31 32
32#ifdef CONFIG_EISA 33#ifdef CONFIG_EISA
33#include <linux/ioport.h> 34#include <linux/ioport.h>
@@ -47,7 +48,7 @@
47#include <asm/desc.h> 48#include <asm/desc.h>
48#include <asm/i387.h> 49#include <asm/i387.h>
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50 51#include <asm/unwind.h>
51#include <asm/smp.h> 52#include <asm/smp.h>
52#include <asm/arch_hooks.h> 53#include <asm/arch_hooks.h>
53#include <asm/kdebug.h> 54#include <asm/kdebug.h>
@@ -92,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void);
92asmlinkage void machine_check(void); 93asmlinkage void machine_check(void);
93 94
94static int kstack_depth_to_print = 24; 95static int kstack_depth_to_print = 24;
96static int call_trace = 1;
95ATOMIC_NOTIFIER_HEAD(i386die_chain); 97ATOMIC_NOTIFIER_HEAD(i386die_chain);
96 98
97int register_die_notifier(struct notifier_block *nb) 99int register_die_notifier(struct notifier_block *nb)
@@ -170,7 +172,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
170 return ebp; 172 return ebp;
171} 173}
172 174
173static void show_trace_log_lvl(struct task_struct *task, 175static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
176{
177 int n = 0;
178 int printed = 0; /* nr of entries already printed on current line */
179
180 while (unwind(info) == 0 && UNW_PC(info)) {
181 ++n;
182 printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed);
183 if (arch_unw_user_mode(info))
184 break;
185 }
186 if (printed)
187 printk("\n");
188 return n;
189}
190
191static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
174 unsigned long *stack, char *log_lvl) 192 unsigned long *stack, char *log_lvl)
175{ 193{
176 unsigned long ebp; 194 unsigned long ebp;
@@ -178,6 +196,26 @@ static void show_trace_log_lvl(struct task_struct *task,
178 if (!task) 196 if (!task)
179 task = current; 197 task = current;
180 198
199 if (call_trace >= 0) {
200 int unw_ret = 0;
201 struct unwind_frame_info info;
202
203 if (regs) {
204 if (unwind_init_frame_info(&info, task, regs) == 0)
205 unw_ret = show_trace_unwind(&info, log_lvl);
206 } else if (task == current)
207 unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
208 else {
209 if (unwind_init_blocked(&info, task) == 0)
210 unw_ret = show_trace_unwind(&info, log_lvl);
211 }
212 if (unw_ret > 0) {
213 if (call_trace > 0)
214 return;
215 printk("%sLegacy call trace:\n", log_lvl);
216 }
217 }
218
181 if (task == current) { 219 if (task == current) {
182 /* Grab ebp right from our regs */ 220 /* Grab ebp right from our regs */
183 asm ("movl %%ebp, %0" : "=r" (ebp) : ); 221 asm ("movl %%ebp, %0" : "=r" (ebp) : );
@@ -198,13 +236,13 @@ static void show_trace_log_lvl(struct task_struct *task,
198 } 236 }
199} 237}
200 238
201void show_trace(struct task_struct *task, unsigned long * stack) 239void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
202{ 240{
203 show_trace_log_lvl(task, stack, ""); 241 show_trace_log_lvl(task, regs, stack, "");
204} 242}
205 243
206static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, 244static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
207 char *log_lvl) 245 unsigned long *esp, char *log_lvl)
208{ 246{
209 unsigned long *stack; 247 unsigned long *stack;
210 int i; 248 int i;
@@ -225,13 +263,13 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
225 printk("%08lx ", *stack++); 263 printk("%08lx ", *stack++);
226 } 264 }
227 printk("\n%sCall Trace:\n", log_lvl); 265 printk("\n%sCall Trace:\n", log_lvl);
228 show_trace_log_lvl(task, esp, log_lvl); 266 show_trace_log_lvl(task, regs, esp, log_lvl);
229} 267}
230 268
231void show_stack(struct task_struct *task, unsigned long *esp) 269void show_stack(struct task_struct *task, unsigned long *esp)
232{ 270{
233 printk(" "); 271 printk(" ");
234 show_stack_log_lvl(task, esp, ""); 272 show_stack_log_lvl(task, NULL, esp, "");
235} 273}
236 274
237/* 275/*
@@ -241,7 +279,7 @@ void dump_stack(void)
241{ 279{
242 unsigned long stack; 280 unsigned long stack;
243 281
244 show_trace(current, &stack); 282 show_trace(current, NULL, &stack);
245} 283}
246 284
247EXPORT_SYMBOL(dump_stack); 285EXPORT_SYMBOL(dump_stack);
@@ -285,7 +323,7 @@ void show_registers(struct pt_regs *regs)
285 u8 __user *eip; 323 u8 __user *eip;
286 324
287 printk("\n" KERN_EMERG "Stack: "); 325 printk("\n" KERN_EMERG "Stack: ");
288 show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG); 326 show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
289 327
290 printk(KERN_EMERG "Code: "); 328 printk(KERN_EMERG "Code: ");
291 329
@@ -1215,3 +1253,15 @@ static int __init kstack_setup(char *s)
1215 return 1; 1253 return 1;
1216} 1254}
1217__setup("kstack=", kstack_setup); 1255__setup("kstack=", kstack_setup);
1256
1257static int __init call_trace_setup(char *s)
1258{
1259 if (strcmp(s, "old") == 0)
1260 call_trace = -1;
1261 else if (strcmp(s, "both") == 0)
1262 call_trace = 0;
1263 else if (strcmp(s, "new") == 0)
1264 call_trace = 1;
1265 return 1;
1266}
1267__setup("call_trace=", call_trace_setup);
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
new file mode 100644
index 000000000000..7e0d8dab2075
--- /dev/null
+++ b/arch/i386/kernel/tsc.c
@@ -0,0 +1,478 @@
1/*
2 * This code largely moved from arch/i386/kernel/timer/timer_tsc.c
3 * which was originally moved from arch/i386/kernel/time.c.
4 * See comments there for proper credits.
5 */
6
7#include <linux/clocksource.h>
8#include <linux/workqueue.h>
9#include <linux/cpufreq.h>
10#include <linux/jiffies.h>
11#include <linux/init.h>
12#include <linux/dmi.h>
13
14#include <asm/delay.h>
15#include <asm/tsc.h>
16#include <asm/delay.h>
17#include <asm/io.h>
18
19#include "mach_timer.h"
20
21/*
22 * On some systems the TSC frequency does not
23 * change with the cpu frequency. So we need
24 * an extra value to store the TSC freq
25 */
26unsigned int tsc_khz;
27
28int tsc_disable __cpuinitdata = 0;
29
30#ifdef CONFIG_X86_TSC
31static int __init tsc_setup(char *str)
32{
33 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
34 "cannot disable TSC.\n");
35 return 1;
36}
37#else
38/*
39 * disable flag for tsc. Takes effect by clearing the TSC cpu flag
40 * in cpu/common.c
41 */
42static int __init tsc_setup(char *str)
43{
44 tsc_disable = 1;
45
46 return 1;
47}
48#endif
49
50__setup("notsc", tsc_setup);
51
52/*
53 * code to mark and check if the TSC is unstable
54 * due to cpufreq or due to unsynced TSCs
55 */
56static int tsc_unstable;
57
58static inline int check_tsc_unstable(void)
59{
60 return tsc_unstable;
61}
62
63void mark_tsc_unstable(void)
64{
65 tsc_unstable = 1;
66}
67EXPORT_SYMBOL_GPL(mark_tsc_unstable);
68
69/* Accellerators for sched_clock()
70 * convert from cycles(64bits) => nanoseconds (64bits)
71 * basic equation:
72 * ns = cycles / (freq / ns_per_sec)
73 * ns = cycles * (ns_per_sec / freq)
74 * ns = cycles * (10^9 / (cpu_khz * 10^3))
75 * ns = cycles * (10^6 / cpu_khz)
76 *
77 * Then we use scaling math (suggested by george@mvista.com) to get:
78 * ns = cycles * (10^6 * SC / cpu_khz) / SC
79 * ns = cycles * cyc2ns_scale / SC
80 *
81 * And since SC is a constant power of two, we can convert the div
82 * into a shift.
83 *
84 * We can use khz divisor instead of mhz to keep a better percision, since
85 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
86 * (mathieu.desnoyers@polymtl.ca)
87 *
88 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
89 */
90static unsigned long cyc2ns_scale __read_mostly;
91
92#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
93
94static inline void set_cyc2ns_scale(unsigned long cpu_khz)
95{
96 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
97}
98
99static inline unsigned long long cycles_2_ns(unsigned long long cyc)
100{
101 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
102}
103
104/*
105 * Scheduler clock - returns current time in nanosec units.
106 */
107unsigned long long sched_clock(void)
108{
109 unsigned long long this_offset;
110
111 /*
112 * in the NUMA case we dont use the TSC as they are not
113 * synchronized across all CPUs.
114 */
115#ifndef CONFIG_NUMA
116 if (!cpu_khz || check_tsc_unstable())
117#endif
118 /* no locking but a rare wrong value is not a big deal */
119 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
120
121 /* read the Time Stamp Counter: */
122 rdtscll(this_offset);
123
124 /* return the value in ns */
125 return cycles_2_ns(this_offset);
126}
127
128static unsigned long calculate_cpu_khz(void)
129{
130 unsigned long long start, end;
131 unsigned long count;
132 u64 delta64;
133 int i;
134 unsigned long flags;
135
136 local_irq_save(flags);
137
138 /* run 3 times to ensure the cache is warm */
139 for (i = 0; i < 3; i++) {
140 mach_prepare_counter();
141 rdtscll(start);
142 mach_countup(&count);
143 rdtscll(end);
144 }
145 /*
146 * Error: ECTCNEVERSET
147 * The CTC wasn't reliable: we got a hit on the very first read,
148 * or the CPU was so fast/slow that the quotient wouldn't fit in
149 * 32 bits..
150 */
151 if (count <= 1)
152 goto err;
153
154 delta64 = end - start;
155
156 /* cpu freq too fast: */
157 if (delta64 > (1ULL<<32))
158 goto err;
159
160 /* cpu freq too slow: */
161 if (delta64 <= CALIBRATE_TIME_MSEC)
162 goto err;
163
164 delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
165 do_div(delta64,CALIBRATE_TIME_MSEC);
166
167 local_irq_restore(flags);
168 return (unsigned long)delta64;
169err:
170 local_irq_restore(flags);
171 return 0;
172}
173
174int recalibrate_cpu_khz(void)
175{
176#ifndef CONFIG_SMP
177 unsigned long cpu_khz_old = cpu_khz;
178
179 if (cpu_has_tsc) {
180 cpu_khz = calculate_cpu_khz();
181 tsc_khz = cpu_khz;
182 cpu_data[0].loops_per_jiffy =
183 cpufreq_scale(cpu_data[0].loops_per_jiffy,
184 cpu_khz_old, cpu_khz);
185 return 0;
186 } else
187 return -ENODEV;
188#else
189 return -ENODEV;
190#endif
191}
192
193EXPORT_SYMBOL(recalibrate_cpu_khz);
194
195void tsc_init(void)
196{
197 if (!cpu_has_tsc || tsc_disable)
198 return;
199
200 cpu_khz = calculate_cpu_khz();
201 tsc_khz = cpu_khz;
202
203 if (!cpu_khz)
204 return;
205
206 printk("Detected %lu.%03lu MHz processor.\n",
207 (unsigned long)cpu_khz / 1000,
208 (unsigned long)cpu_khz % 1000);
209
210 set_cyc2ns_scale(cpu_khz);
211 use_tsc_delay();
212}
213
214#ifdef CONFIG_CPU_FREQ
215
216static unsigned int cpufreq_delayed_issched = 0;
217static unsigned int cpufreq_init = 0;
218static struct work_struct cpufreq_delayed_get_work;
219
220static void handle_cpufreq_delayed_get(void *v)
221{
222 unsigned int cpu;
223
224 for_each_online_cpu(cpu)
225 cpufreq_get(cpu);
226
227 cpufreq_delayed_issched = 0;
228}
229
230/*
231 * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries
232 * to verify the CPU frequency the timing core thinks the CPU is running
233 * at is still correct.
234 */
235static inline void cpufreq_delayed_get(void)
236{
237 if (cpufreq_init && !cpufreq_delayed_issched) {
238 cpufreq_delayed_issched = 1;
239 printk(KERN_DEBUG "Checking if CPU frequency changed.\n");
240 schedule_work(&cpufreq_delayed_get_work);
241 }
242}
243
244/*
245 * if the CPU frequency is scaled, TSC-based delays will need a different
246 * loops_per_jiffy value to function properly.
247 */
248static unsigned int ref_freq = 0;
249static unsigned long loops_per_jiffy_ref = 0;
250static unsigned long cpu_khz_ref = 0;
251
252static int
253time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
254{
255 struct cpufreq_freqs *freq = data;
256
257 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
258 write_seqlock_irq(&xtime_lock);
259
260 if (!ref_freq) {
261 if (!freq->old){
262 ref_freq = freq->new;
263 goto end;
264 }
265 ref_freq = freq->old;
266 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
267 cpu_khz_ref = cpu_khz;
268 }
269
270 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
271 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
272 (val == CPUFREQ_RESUMECHANGE)) {
273 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
274 cpu_data[freq->cpu].loops_per_jiffy =
275 cpufreq_scale(loops_per_jiffy_ref,
276 ref_freq, freq->new);
277
278 if (cpu_khz) {
279
280 if (num_online_cpus() == 1)
281 cpu_khz = cpufreq_scale(cpu_khz_ref,
282 ref_freq, freq->new);
283 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
284 tsc_khz = cpu_khz;
285 set_cyc2ns_scale(cpu_khz);
286 /*
287 * TSC based sched_clock turns
288 * to junk w/ cpufreq
289 */
290 mark_tsc_unstable();
291 }
292 }
293 }
294end:
295 if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
296 write_sequnlock_irq(&xtime_lock);
297
298 return 0;
299}
300
301static struct notifier_block time_cpufreq_notifier_block = {
302 .notifier_call = time_cpufreq_notifier
303};
304
305static int __init cpufreq_tsc(void)
306{
307 int ret;
308
309 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
310 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
311 CPUFREQ_TRANSITION_NOTIFIER);
312 if (!ret)
313 cpufreq_init = 1;
314
315 return ret;
316}
317
318core_initcall(cpufreq_tsc);
319
320#endif
321
322/* clock source code */
323
324static unsigned long current_tsc_khz = 0;
325static int tsc_update_callback(void);
326
327static cycle_t read_tsc(void)
328{
329 cycle_t ret;
330
331 rdtscll(ret);
332
333 return ret;
334}
335
336static struct clocksource clocksource_tsc = {
337 .name = "tsc",
338 .rating = 300,
339 .read = read_tsc,
340 .mask = CLOCKSOURCE_MASK(64),
341 .mult = 0, /* to be set */
342 .shift = 22,
343 .update_callback = tsc_update_callback,
344 .is_continuous = 1,
345};
346
347static int tsc_update_callback(void)
348{
349 int change = 0;
350
351 /* check to see if we should switch to the safe clocksource: */
352 if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
353 clocksource_tsc.rating = 50;
354 clocksource_reselect();
355 change = 1;
356 }
357
358 /* only update if tsc_khz has changed: */
359 if (current_tsc_khz != tsc_khz) {
360 current_tsc_khz = tsc_khz;
361 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
362 clocksource_tsc.shift);
363 change = 1;
364 }
365
366 return change;
367}
368
369static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d)
370{
371 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
372 d->ident);
373 mark_tsc_unstable();
374 return 0;
375}
376
377/* List of systems that have known TSC problems */
378static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
379 {
380 .callback = dmi_mark_tsc_unstable,
381 .ident = "IBM Thinkpad 380XD",
382 .matches = {
383 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
384 DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
385 },
386 },
387 {}
388};
389
390#define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */
391static struct timer_list verify_tsc_freq_timer;
392
393/* XXX - Probably should add locking */
394static void verify_tsc_freq(unsigned long unused)
395{
396 static u64 last_tsc;
397 static unsigned long last_jiffies;
398
399 u64 now_tsc, interval_tsc;
400 unsigned long now_jiffies, interval_jiffies;
401
402
403 if (check_tsc_unstable())
404 return;
405
406 rdtscll(now_tsc);
407 now_jiffies = jiffies;
408
409 if (!last_jiffies) {
410 goto out;
411 }
412
413 interval_jiffies = now_jiffies - last_jiffies;
414 interval_tsc = now_tsc - last_tsc;
415 interval_tsc *= HZ;
416 do_div(interval_tsc, cpu_khz*1000);
417
418 if (interval_tsc < (interval_jiffies * 3 / 4)) {
419 printk("TSC appears to be running slowly. "
420 "Marking it as unstable\n");
421 mark_tsc_unstable();
422 return;
423 }
424
425out:
426 last_tsc = now_tsc;
427 last_jiffies = now_jiffies;
428 /* set us up to go off on the next interval: */
429 mod_timer(&verify_tsc_freq_timer,
430 jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL));
431}
432
433/*
434 * Make an educated guess if the TSC is trustworthy and synchronized
435 * over all CPUs.
436 */
437static __init int unsynchronized_tsc(void)
438{
439 /*
440 * Intel systems are normally all synchronized.
441 * Exceptions must mark TSC as unstable:
442 */
443 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
444 return 0;
445
446 /* assume multi socket systems are not synchronized: */
447 return num_possible_cpus() > 1;
448}
449
450static int __init init_tsc_clocksource(void)
451{
452
453 if (cpu_has_tsc && tsc_khz && !tsc_disable) {
454 /* check blacklist */
455 dmi_check_system(bad_tsc_dmi_table);
456
457 if (unsynchronized_tsc()) /* mark unstable if unsynced */
458 mark_tsc_unstable();
459 current_tsc_khz = tsc_khz;
460 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
461 clocksource_tsc.shift);
462 /* lower the rating if we already know its unstable: */
463 if (check_tsc_unstable())
464 clocksource_tsc.rating = 50;
465
466 init_timer(&verify_tsc_freq_timer);
467 verify_tsc_freq_timer.function = verify_tsc_freq;
468 verify_tsc_freq_timer.expires =
469 jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL);
470 add_timer(&verify_tsc_freq_timer);
471
472 return clocksource_register(&clocksource_tsc);
473 }
474
475 return 0;
476}
477
478module_init(init_tsc_clocksource);
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 7512f39c9f25..2d4f1386e2b1 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -71,6 +71,15 @@ SECTIONS
71 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } 71 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
72 _edata = .; /* End of data section */ 72 _edata = .; /* End of data section */
73 73
74#ifdef CONFIG_STACK_UNWIND
75 . = ALIGN(4);
76 .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
77 __start_unwind = .;
78 *(.eh_frame)
79 __end_unwind = .;
80 }
81#endif
82
74 . = ALIGN(THREAD_SIZE); /* init_task */ 83 . = ALIGN(THREAD_SIZE); /* init_task */
75 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { 84 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
76 *(.data.init_task) 85 *(.data.init_task)
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
index c49a6acbee56..3c0714c4b669 100644
--- a/arch/i386/lib/delay.c
+++ b/arch/i386/lib/delay.c
@@ -10,43 +10,92 @@
10 * we have to worry about. 10 * we have to worry about.
11 */ 11 */
12 12
13#include <linux/module.h>
13#include <linux/config.h> 14#include <linux/config.h>
14#include <linux/sched.h> 15#include <linux/sched.h>
15#include <linux/delay.h> 16#include <linux/delay.h>
16#include <linux/module.h> 17
17#include <asm/processor.h> 18#include <asm/processor.h>
18#include <asm/delay.h> 19#include <asm/delay.h>
19#include <asm/timer.h> 20#include <asm/timer.h>
20 21
21#ifdef CONFIG_SMP 22#ifdef CONFIG_SMP
22#include <asm/smp.h> 23# include <asm/smp.h>
23#endif 24#endif
24 25
25extern struct timer_opts* timer; 26/* simple loop based delay: */
27static void delay_loop(unsigned long loops)
28{
29 int d0;
30
31 __asm__ __volatile__(
32 "\tjmp 1f\n"
33 ".align 16\n"
34 "1:\tjmp 2f\n"
35 ".align 16\n"
36 "2:\tdecl %0\n\tjns 2b"
37 :"=&a" (d0)
38 :"0" (loops));
39}
40
41/* TSC based delay: */
42static void delay_tsc(unsigned long loops)
43{
44 unsigned long bclock, now;
45
46 rdtscl(bclock);
47 do {
48 rep_nop();
49 rdtscl(now);
50 } while ((now-bclock) < loops);
51}
52
53/*
54 * Since we calibrate only once at boot, this
55 * function should be set once at boot and not changed
56 */
57static void (*delay_fn)(unsigned long) = delay_loop;
58
59void use_tsc_delay(void)
60{
61 delay_fn = delay_tsc;
62}
63
64int read_current_timer(unsigned long *timer_val)
65{
66 if (delay_fn == delay_tsc) {
67 rdtscl(*timer_val);
68 return 0;
69 }
70 return -1;
71}
26 72
27void __delay(unsigned long loops) 73void __delay(unsigned long loops)
28{ 74{
29 cur_timer->delay(loops); 75 delay_fn(loops);
30} 76}
31 77
32inline void __const_udelay(unsigned long xloops) 78inline void __const_udelay(unsigned long xloops)
33{ 79{
34 int d0; 80 int d0;
81
35 xloops *= 4; 82 xloops *= 4;
36 __asm__("mull %0" 83 __asm__("mull %0"
37 :"=d" (xloops), "=&a" (d0) 84 :"=d" (xloops), "=&a" (d0)
38 :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); 85 :"1" (xloops), "0"
39 __delay(++xloops); 86 (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
87
88 __delay(++xloops);
40} 89}
41 90
42void __udelay(unsigned long usecs) 91void __udelay(unsigned long usecs)
43{ 92{
44 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ 93 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
45} 94}
46 95
47void __ndelay(unsigned long nsecs) 96void __ndelay(unsigned long nsecs)
48{ 97{
49 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ 98 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
50} 99}
51 100
52EXPORT_SYMBOL(__delay); 101EXPORT_SYMBOL(__delay);
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index bd6fe96cc16d..6ee7faaf2c1b 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -30,6 +30,40 @@
30 30
31extern void die(const char *,struct pt_regs *,long); 31extern void die(const char *,struct pt_regs *,long);
32 32
33#ifdef CONFIG_KPROBES
34ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
35int register_page_fault_notifier(struct notifier_block *nb)
36{
37 vmalloc_sync_all();
38 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
39}
40
41int unregister_page_fault_notifier(struct notifier_block *nb)
42{
43 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
44}
45
46static inline int notify_page_fault(enum die_val val, const char *str,
47 struct pt_regs *regs, long err, int trap, int sig)
48{
49 struct die_args args = {
50 .regs = regs,
51 .str = str,
52 .err = err,
53 .trapnr = trap,
54 .signr = sig
55 };
56 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
57}
58#else
59static inline int notify_page_fault(enum die_val val, const char *str,
60 struct pt_regs *regs, long err, int trap, int sig)
61{
62 return NOTIFY_DONE;
63}
64#endif
65
66
33/* 67/*
34 * Unlock any spinlocks which will prevent us from getting the 68 * Unlock any spinlocks which will prevent us from getting the
35 * message out 69 * message out
@@ -324,7 +358,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
324 if (unlikely(address >= TASK_SIZE)) { 358 if (unlikely(address >= TASK_SIZE)) {
325 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) 359 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
326 return; 360 return;
327 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 361 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
328 SIGSEGV) == NOTIFY_STOP) 362 SIGSEGV) == NOTIFY_STOP)
329 return; 363 return;
330 /* 364 /*
@@ -334,7 +368,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
334 goto bad_area_nosemaphore; 368 goto bad_area_nosemaphore;
335 } 369 }
336 370
337 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 371 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
338 SIGSEGV) == NOTIFY_STOP) 372 SIGSEGV) == NOTIFY_STOP)
339 return; 373 return;
340 374
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index ec0fd3cfa774..fa8a37bcb391 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -281,9 +281,9 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
281 281
282 for (i = 0; i < model->num_counters; ++i) { 282 for (i = 0; i < model->num_counters; ++i) {
283 struct dentry * dir; 283 struct dentry * dir;
284 char buf[2]; 284 char buf[4];
285 285
286 snprintf(buf, 2, "%d", i); 286 snprintf(buf, sizeof(buf), "%d", i);
287 dir = oprofilefs_mkdir(sb, root, buf); 287 dir = oprofilefs_mkdir(sb, root, buf);
288 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 288 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
289 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); 289 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c
index 3ad9a72a5036..693bdea4a52b 100644
--- a/arch/i386/oprofile/op_model_athlon.c
+++ b/arch/i386/oprofile/op_model_athlon.c
@@ -13,6 +13,7 @@
13#include <linux/oprofile.h> 13#include <linux/oprofile.h>
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/nmi.h>
16 17
17#include "op_x86_model.h" 18#include "op_x86_model.h"
18#include "op_counter.h" 19#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
index ac8a066035c2..7c61d357b82b 100644
--- a/arch/i386/oprofile/op_model_p4.c
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -14,6 +14,7 @@
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/fixmap.h> 15#include <asm/fixmap.h>
16#include <asm/apic.h> 16#include <asm/apic.h>
17#include <asm/nmi.h>
17 18
18#include "op_x86_model.h" 19#include "op_x86_model.h"
19#include "op_counter.h" 20#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
index d719015fc044..5c3ab4b027ad 100644
--- a/arch/i386/oprofile/op_model_ppro.c
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -14,6 +14,7 @@
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/apic.h> 16#include <asm/apic.h>
17#include <asm/nmi.h>
17 18
18#include "op_x86_model.h" 19#include "op_x86_model.h"
19#include "op_counter.h" 20#include "op_counter.h"
diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c
index 1eec0868f4b3..ed1512a175ab 100644
--- a/arch/i386/pci/pcbios.c
+++ b/arch/i386/pci/pcbios.c
@@ -371,8 +371,7 @@ void __devinit pcibios_sort(void)
371 list_for_each(ln, &pci_devices) { 371 list_for_each(ln, &pci_devices) {
372 d = pci_dev_g(ln); 372 d = pci_dev_g(ln);
373 if (d->bus->number == bus && d->devfn == devfn) { 373 if (d->bus->number == bus && d->devfn == devfn) {
374 list_del(&d->global_list); 374 list_move_tail(&d->global_list, &sorted_devices);
375 list_add_tail(&d->global_list, &sorted_devices);
376 if (d == dev) 375 if (d == dev)
377 found = 1; 376 found = 1;
378 break; 377 break;
@@ -390,8 +389,7 @@ void __devinit pcibios_sort(void)
390 if (!found) { 389 if (!found) {
391 printk(KERN_WARNING "PCI: Device %s not found by BIOS\n", 390 printk(KERN_WARNING "PCI: Device %s not found by BIOS\n",
392 pci_name(dev)); 391 pci_name(dev));
393 list_del(&dev->global_list); 392 list_move_tail(&dev->global_list, &sorted_devices);
394 list_add_tail(&dev->global_list, &sorted_devices);
395 } 393 }
396 } 394 }
397 list_splice(&sorted_devices, &pci_devices); 395 list_splice(&sorted_devices, &pci_devices);