diff options
Diffstat (limited to 'arch/i386')
51 files changed, 1616 insertions, 2272 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 1596101cfaf8..47c08bcd9b24 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -14,6 +14,10 @@ config X86_32 | |||
14 | 486, 586, Pentiums, and various instruction-set-compatible chips by | 14 | 486, 586, Pentiums, and various instruction-set-compatible chips by |
15 | AMD, Cyrix, and others. | 15 | AMD, Cyrix, and others. |
16 | 16 | ||
17 | config GENERIC_TIME | ||
18 | bool | ||
19 | default y | ||
20 | |||
17 | config SEMAPHORE_SLEEPERS | 21 | config SEMAPHORE_SLEEPERS |
18 | bool | 22 | bool |
19 | default y | 23 | default y |
@@ -324,6 +328,15 @@ config X86_MCE_P4THERMAL | |||
324 | Enabling this feature will cause a message to be printed when the P4 | 328 | Enabling this feature will cause a message to be printed when the P4 |
325 | enters thermal throttling. | 329 | enters thermal throttling. |
326 | 330 | ||
331 | config VM86 | ||
332 | default y | ||
333 | bool "Enable VM86 support" if EMBEDDED | ||
334 | help | ||
335 | This option is required by programs like DOSEMU to run 16-bit legacy | ||
336 | code on X86 processors. It also may be needed by software like | ||
337 | XFree86 to initialize some video cards via BIOS. Disabling this | ||
338 | option saves about 6k. | ||
339 | |||
327 | config TOSHIBA | 340 | config TOSHIBA |
328 | tristate "Toshiba Laptop support" | 341 | tristate "Toshiba Laptop support" |
329 | ---help--- | 342 | ---help--- |
@@ -721,7 +734,7 @@ config KEXEC | |||
721 | help | 734 | help |
722 | kexec is a system call that implements the ability to shutdown your | 735 | kexec is a system call that implements the ability to shutdown your |
723 | current kernel, and to start another kernel. It is like a reboot | 736 | current kernel, and to start another kernel. It is like a reboot |
724 | but it is indepedent of the system firmware. And like a reboot | 737 | but it is independent of the system firmware. And like a reboot |
725 | you can start any kernel with it, not just Linux. | 738 | you can start any kernel with it, not just Linux. |
726 | 739 | ||
727 | The name comes from the similiarity to the exec system call. | 740 | The name comes from the similiarity to the exec system call. |
@@ -1046,13 +1059,27 @@ config SCx200 | |||
1046 | tristate "NatSemi SCx200 support" | 1059 | tristate "NatSemi SCx200 support" |
1047 | depends on !X86_VOYAGER | 1060 | depends on !X86_VOYAGER |
1048 | help | 1061 | help |
1049 | This provides basic support for the National Semiconductor SCx200 | 1062 | This provides basic support for National Semiconductor's |
1050 | processor. Right now this is just a driver for the GPIO pins. | 1063 | (now AMD's) Geode processors. The driver probes for the |
1064 | PCI-IDs of several on-chip devices, so its a good dependency | ||
1065 | for other scx200_* drivers. | ||
1051 | 1066 | ||
1052 | If you don't know what to do here, say N. | 1067 | If compiled as a module, the driver is named scx200. |
1053 | 1068 | ||
1054 | This support is also available as a module. If compiled as a | 1069 | config SCx200HR_TIMER |
1055 | module, it will be called scx200. | 1070 | tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" |
1071 | depends on SCx200 && GENERIC_TIME | ||
1072 | default y | ||
1073 | help | ||
1074 | This driver provides a clocksource built upon the on-chip | ||
1075 | 27MHz high-resolution timer. Its also a workaround for | ||
1076 | NSC Geode SC-1100's buggy TSC, which loses time when the | ||
1077 | processor goes idle (as is done by the scheduler). The | ||
1078 | other workaround is idle=poll boot option. | ||
1079 | |||
1080 | config K8_NB | ||
1081 | def_bool y | ||
1082 | depends on AGP_AMD64 | ||
1056 | 1083 | ||
1057 | source "drivers/pcmcia/Kconfig" | 1084 | source "drivers/pcmcia/Kconfig" |
1058 | 1085 | ||
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu index eb130482ba18..21c9a4e71104 100644 --- a/arch/i386/Kconfig.cpu +++ b/arch/i386/Kconfig.cpu | |||
@@ -41,7 +41,7 @@ config M386 | |||
41 | - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). | 41 | - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). |
42 | - "Geode GX/LX" For AMD Geode GX and LX processors. | 42 | - "Geode GX/LX" For AMD Geode GX and LX processors. |
43 | - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. | 43 | - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. |
44 | - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). | 44 | - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above). |
45 | 45 | ||
46 | If you don't know what to do, choose "386". | 46 | If you don't know what to do, choose "386". |
47 | 47 | ||
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile index 33e55476381b..e97946626064 100644 --- a/arch/i386/boot/Makefile +++ b/arch/i386/boot/Makefile | |||
@@ -109,8 +109,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf | |||
109 | isoimage: $(BOOTIMAGE) | 109 | isoimage: $(BOOTIMAGE) |
110 | -rm -rf $(obj)/isoimage | 110 | -rm -rf $(obj)/isoimage |
111 | mkdir $(obj)/isoimage | 111 | mkdir $(obj)/isoimage |
112 | cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \ | 112 | for i in lib lib64 share end ; do \ |
113 | $(obj)/isoimage | 113 | if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ |
114 | cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ | ||
115 | break ; \ | ||
116 | fi ; \ | ||
117 | if [ $$i = end ] ; then exit 1 ; fi ; \ | ||
118 | done | ||
114 | cp $(BOOTIMAGE) $(obj)/isoimage/linux | 119 | cp $(BOOTIMAGE) $(obj)/isoimage/linux |
115 | echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg | 120 | echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg |
116 | if [ -f '$(FDINITRD)' ] ; then \ | 121 | if [ -f '$(FDINITRD)' ] ; then \ |
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index f19f3a7492a5..b2ccd543410d 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c | |||
@@ -24,14 +24,6 @@ | |||
24 | 24 | ||
25 | #undef memset | 25 | #undef memset |
26 | #undef memcpy | 26 | #undef memcpy |
27 | |||
28 | /* | ||
29 | * Why do we do this? Don't ask me.. | ||
30 | * | ||
31 | * Incomprehensible are the ways of bootloaders. | ||
32 | */ | ||
33 | static void* memset(void *, int, size_t); | ||
34 | static void* memcpy(void *, __const void *, size_t); | ||
35 | #define memzero(s, n) memset ((s), 0, (n)) | 27 | #define memzero(s, n) memset ((s), 0, (n)) |
36 | 28 | ||
37 | typedef unsigned char uch; | 29 | typedef unsigned char uch; |
@@ -93,7 +85,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */ | |||
93 | #endif | 85 | #endif |
94 | #define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) | 86 | #define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) |
95 | 87 | ||
96 | extern char input_data[]; | 88 | extern unsigned char input_data[]; |
97 | extern int input_len; | 89 | extern int input_len; |
98 | 90 | ||
99 | static long bytes_out = 0; | 91 | static long bytes_out = 0; |
@@ -103,6 +95,9 @@ static unsigned long output_ptr = 0; | |||
103 | static void *malloc(int size); | 95 | static void *malloc(int size); |
104 | static void free(void *where); | 96 | static void free(void *where); |
105 | 97 | ||
98 | static void *memset(void *s, int c, unsigned n); | ||
99 | static void *memcpy(void *dest, const void *src, unsigned n); | ||
100 | |||
106 | static void putstr(const char *); | 101 | static void putstr(const char *); |
107 | 102 | ||
108 | extern int end; | 103 | extern int end; |
@@ -205,7 +200,7 @@ static void putstr(const char *s) | |||
205 | outb_p(0xff & (pos >> 1), vidport+1); | 200 | outb_p(0xff & (pos >> 1), vidport+1); |
206 | } | 201 | } |
207 | 202 | ||
208 | static void* memset(void* s, int c, size_t n) | 203 | static void* memset(void* s, int c, unsigned n) |
209 | { | 204 | { |
210 | int i; | 205 | int i; |
211 | char *ss = (char*)s; | 206 | char *ss = (char*)s; |
@@ -214,14 +209,13 @@ static void* memset(void* s, int c, size_t n) | |||
214 | return s; | 209 | return s; |
215 | } | 210 | } |
216 | 211 | ||
217 | static void* memcpy(void* __dest, __const void* __src, | 212 | static void* memcpy(void* dest, const void* src, unsigned n) |
218 | size_t __n) | ||
219 | { | 213 | { |
220 | int i; | 214 | int i; |
221 | char *d = (char *)__dest, *s = (char *)__src; | 215 | char *d = (char *)dest, *s = (char *)src; |
222 | 216 | ||
223 | for (i=0;i<__n;i++) d[i] = s[i]; | 217 | for (i=0;i<n;i++) d[i] = s[i]; |
224 | return __dest; | 218 | return dest; |
225 | } | 219 | } |
226 | 220 | ||
227 | /* =========================================================================== | 221 | /* =========================================================================== |
@@ -309,7 +303,7 @@ static void setup_normal_output_buffer(void) | |||
309 | #else | 303 | #else |
310 | if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); | 304 | if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); |
311 | #endif | 305 | #endif |
312 | output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */ | 306 | output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */ |
313 | free_mem_end_ptr = (long)real_mode; | 307 | free_mem_end_ptr = (long)real_mode; |
314 | } | 308 | } |
315 | 309 | ||
@@ -324,11 +318,9 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv) | |||
324 | #ifdef STANDARD_MEMORY_BIOS_CALL | 318 | #ifdef STANDARD_MEMORY_BIOS_CALL |
325 | if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); | 319 | if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); |
326 | #else | 320 | #else |
327 | if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < | 321 | if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory"); |
328 | (3*1024)) | ||
329 | error("Less than 4MB of memory"); | ||
330 | #endif | 322 | #endif |
331 | mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START; | 323 | mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START; |
332 | low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX | 324 | low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX |
333 | ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; | 325 | ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; |
334 | low_buffer_size = low_buffer_end - LOW_BUFFER_START; | 326 | low_buffer_size = low_buffer_end - LOW_BUFFER_START; |
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S index c9343c3a8082..8c2a6faeeae5 100644 --- a/arch/i386/boot/video.S +++ b/arch/i386/boot/video.S | |||
@@ -1929,7 +1929,7 @@ skip10: movb %ah, %al | |||
1929 | ret | 1929 | ret |
1930 | 1930 | ||
1931 | store_edid: | 1931 | store_edid: |
1932 | #ifdef CONFIG_FB_FIRMWARE_EDID | 1932 | #ifdef CONFIG_FIRMWARE_EDID |
1933 | pushw %es # just save all registers | 1933 | pushw %es # just save all registers |
1934 | pushw %ax | 1934 | pushw %ax |
1935 | pushw %bx | 1935 | pushw %bx |
@@ -1947,6 +1947,22 @@ store_edid: | |||
1947 | rep | 1947 | rep |
1948 | stosl | 1948 | stosl |
1949 | 1949 | ||
1950 | pushw %es # save ES | ||
1951 | xorw %di, %di # Report Capability | ||
1952 | pushw %di | ||
1953 | popw %es # ES:DI must be 0:0 | ||
1954 | movw $0x4f15, %ax | ||
1955 | xorw %bx, %bx | ||
1956 | xorw %cx, %cx | ||
1957 | int $0x10 | ||
1958 | popw %es # restore ES | ||
1959 | |||
1960 | cmpb $0x00, %ah # call successful | ||
1961 | jne no_edid | ||
1962 | |||
1963 | cmpb $0x4f, %al # function supported | ||
1964 | jne no_edid | ||
1965 | |||
1950 | movw $0x4f15, %ax # do VBE/DDC | 1966 | movw $0x4f15, %ax # do VBE/DDC |
1951 | movw $0x01, %bx | 1967 | movw $0x01, %bx |
1952 | movw $0x00, %cx | 1968 | movw $0x00, %cx |
@@ -1954,6 +1970,7 @@ store_edid: | |||
1954 | movw $0x140, %di | 1970 | movw $0x140, %di |
1955 | int $0x10 | 1971 | int $0x10 |
1956 | 1972 | ||
1973 | no_edid: | ||
1957 | popw %di # restore all registers | 1974 | popw %di # restore all registers |
1958 | popw %dx | 1975 | popw %dx |
1959 | popw %cx | 1976 | popw %cx |
diff --git a/arch/i386/crypto/aes-i586-asm.S b/arch/i386/crypto/aes-i586-asm.S index 911b15377f2e..f942f0c8f630 100644 --- a/arch/i386/crypto/aes-i586-asm.S +++ b/arch/i386/crypto/aes-i586-asm.S | |||
@@ -36,22 +36,19 @@ | |||
36 | .file "aes-i586-asm.S" | 36 | .file "aes-i586-asm.S" |
37 | .text | 37 | .text |
38 | 38 | ||
39 | // aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])// | 39 | #include <asm/asm-offsets.h> |
40 | // aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])// | ||
41 | |||
42 | #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) | ||
43 | 40 | ||
44 | // offsets to parameters with one register pushed onto stack | 41 | #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) |
45 | |||
46 | #define in_blk 8 // input byte array address parameter | ||
47 | #define out_blk 12 // output byte array address parameter | ||
48 | #define ctx 16 // AES context structure | ||
49 | 42 | ||
50 | // offsets in context structure | 43 | /* offsets to parameters with one register pushed onto stack */ |
44 | #define tfm 8 | ||
45 | #define out_blk 12 | ||
46 | #define in_blk 16 | ||
51 | 47 | ||
52 | #define ekey 0 // encryption key schedule base address | 48 | /* offsets in crypto_tfm structure */ |
53 | #define nrnd 256 // number of rounds | 49 | #define ekey (crypto_tfm_ctx_offset + 0) |
54 | #define dkey 260 // decryption key schedule base address | 50 | #define nrnd (crypto_tfm_ctx_offset + 256) |
51 | #define dkey (crypto_tfm_ctx_offset + 260) | ||
55 | 52 | ||
56 | // register mapping for encrypt and decrypt subroutines | 53 | // register mapping for encrypt and decrypt subroutines |
57 | 54 | ||
@@ -220,6 +217,7 @@ | |||
220 | do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ | 217 | do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ |
221 | 218 | ||
222 | // AES (Rijndael) Encryption Subroutine | 219 | // AES (Rijndael) Encryption Subroutine |
220 | /* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ | ||
223 | 221 | ||
224 | .global aes_enc_blk | 222 | .global aes_enc_blk |
225 | 223 | ||
@@ -230,7 +228,7 @@ | |||
230 | 228 | ||
231 | aes_enc_blk: | 229 | aes_enc_blk: |
232 | push %ebp | 230 | push %ebp |
233 | mov ctx(%esp),%ebp // pointer to context | 231 | mov tfm(%esp),%ebp |
234 | 232 | ||
235 | // CAUTION: the order and the values used in these assigns | 233 | // CAUTION: the order and the values used in these assigns |
236 | // rely on the register mappings | 234 | // rely on the register mappings |
@@ -295,6 +293,7 @@ aes_enc_blk: | |||
295 | ret | 293 | ret |
296 | 294 | ||
297 | // AES (Rijndael) Decryption Subroutine | 295 | // AES (Rijndael) Decryption Subroutine |
296 | /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ | ||
298 | 297 | ||
299 | .global aes_dec_blk | 298 | .global aes_dec_blk |
300 | 299 | ||
@@ -305,7 +304,7 @@ aes_enc_blk: | |||
305 | 304 | ||
306 | aes_dec_blk: | 305 | aes_dec_blk: |
307 | push %ebp | 306 | push %ebp |
308 | mov ctx(%esp),%ebp // pointer to context | 307 | mov tfm(%esp),%ebp |
309 | 308 | ||
310 | // CAUTION: the order and the values used in these assigns | 309 | // CAUTION: the order and the values used in these assigns |
311 | // rely on the register mappings | 310 | // rely on the register mappings |
diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c index a50397b1d5c7..d3806daa3de3 100644 --- a/arch/i386/crypto/aes.c +++ b/arch/i386/crypto/aes.c | |||
@@ -45,8 +45,8 @@ | |||
45 | #include <linux/crypto.h> | 45 | #include <linux/crypto.h> |
46 | #include <linux/linkage.h> | 46 | #include <linux/linkage.h> |
47 | 47 | ||
48 | asmlinkage void aes_enc_blk(const u8 *src, u8 *dst, void *ctx); | 48 | asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); |
49 | asmlinkage void aes_dec_blk(const u8 *src, u8 *dst, void *ctx); | 49 | asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); |
50 | 50 | ||
51 | #define AES_MIN_KEY_SIZE 16 | 51 | #define AES_MIN_KEY_SIZE 16 |
52 | #define AES_MAX_KEY_SIZE 32 | 52 | #define AES_MAX_KEY_SIZE 32 |
@@ -378,12 +378,12 @@ static void gen_tabs(void) | |||
378 | k[8*(i)+11] = ss[3]; \ | 378 | k[8*(i)+11] = ss[3]; \ |
379 | } | 379 | } |
380 | 380 | ||
381 | static int | 381 | static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, |
382 | aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags) | 382 | unsigned int key_len, u32 *flags) |
383 | { | 383 | { |
384 | int i; | 384 | int i; |
385 | u32 ss[8]; | 385 | u32 ss[8]; |
386 | struct aes_ctx *ctx = ctx_arg; | 386 | struct aes_ctx *ctx = crypto_tfm_ctx(tfm); |
387 | const __le32 *key = (const __le32 *)in_key; | 387 | const __le32 *key = (const __le32 *)in_key; |
388 | 388 | ||
389 | /* encryption schedule */ | 389 | /* encryption schedule */ |
@@ -464,16 +464,16 @@ aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags) | |||
464 | return 0; | 464 | return 0; |
465 | } | 465 | } |
466 | 466 | ||
467 | static inline void aes_encrypt(void *ctx, u8 *dst, const u8 *src) | 467 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) |
468 | { | 468 | { |
469 | aes_enc_blk(src, dst, ctx); | 469 | aes_enc_blk(tfm, dst, src); |
470 | } | 470 | } |
471 | static inline void aes_decrypt(void *ctx, u8 *dst, const u8 *src) | 471 | |
472 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
472 | { | 473 | { |
473 | aes_dec_blk(src, dst, ctx); | 474 | aes_dec_blk(tfm, dst, src); |
474 | } | 475 | } |
475 | 476 | ||
476 | |||
477 | static struct crypto_alg aes_alg = { | 477 | static struct crypto_alg aes_alg = { |
478 | .cra_name = "aes", | 478 | .cra_name = "aes", |
479 | .cra_driver_name = "aes-i586", | 479 | .cra_driver_name = "aes-i586", |
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 96fb8a020af2..5e70c2fb273a 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -7,10 +7,9 @@ extra-y := head.o init_task.o vmlinux.lds | |||
7 | obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ | 7 | obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ |
8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ | 8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ |
9 | pci-dma.o i386_ksyms.o i387.o bootflag.o \ | 9 | pci-dma.o i386_ksyms.o i387.o bootflag.o \ |
10 | quirks.o i8237.o topology.o alternative.o | 10 | quirks.o i8237.o topology.o alternative.o i8253.o tsc.o |
11 | 11 | ||
12 | obj-y += cpu/ | 12 | obj-y += cpu/ |
13 | obj-y += timers/ | ||
14 | obj-y += acpi/ | 13 | obj-y += acpi/ |
15 | obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o | 14 | obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o |
16 | obj-$(CONFIG_MCA) += mca.o | 15 | obj-$(CONFIG_MCA) += mca.o |
@@ -37,6 +36,8 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o | |||
37 | obj-$(CONFIG_DOUBLEFAULT) += doublefault.o | 36 | obj-$(CONFIG_DOUBLEFAULT) += doublefault.o |
38 | obj-$(CONFIG_VM86) += vm86.o | 37 | obj-$(CONFIG_VM86) += vm86.o |
39 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 38 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
39 | obj-$(CONFIG_HPET_TIMER) += hpet.o | ||
40 | obj-$(CONFIG_K8_NB) += k8.o | ||
40 | 41 | ||
41 | EXTRA_AFLAGS := -traditional | 42 | EXTRA_AFLAGS := -traditional |
42 | 43 | ||
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r | |||
76 | $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ | 77 | $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ |
77 | $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE | 78 | $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE |
78 | $(call if_changed,syscall) | 79 | $(call if_changed,syscall) |
80 | |||
81 | k8-y += ../../x86_64/kernel/k8.o | ||
82 | |||
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 5cbd6f99fb2a..50eb0e03777e 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c | |||
@@ -4,27 +4,41 @@ | |||
4 | #include <asm/alternative.h> | 4 | #include <asm/alternative.h> |
5 | #include <asm/sections.h> | 5 | #include <asm/sections.h> |
6 | 6 | ||
7 | #define DEBUG 0 | 7 | static int no_replacement = 0; |
8 | #if DEBUG | 8 | static int smp_alt_once = 0; |
9 | # define DPRINTK(fmt, args...) printk(fmt, args) | 9 | static int debug_alternative = 0; |
10 | #else | 10 | |
11 | # define DPRINTK(fmt, args...) | 11 | static int __init noreplacement_setup(char *s) |
12 | #endif | 12 | { |
13 | no_replacement = 1; | ||
14 | return 1; | ||
15 | } | ||
16 | static int __init bootonly(char *str) | ||
17 | { | ||
18 | smp_alt_once = 1; | ||
19 | return 1; | ||
20 | } | ||
21 | static int __init debug_alt(char *str) | ||
22 | { | ||
23 | debug_alternative = 1; | ||
24 | return 1; | ||
25 | } | ||
13 | 26 | ||
27 | __setup("noreplacement", noreplacement_setup); | ||
28 | __setup("smp-alt-boot", bootonly); | ||
29 | __setup("debug-alternative", debug_alt); | ||
30 | |||
31 | #define DPRINTK(fmt, args...) if (debug_alternative) \ | ||
32 | printk(KERN_DEBUG fmt, args) | ||
33 | |||
34 | #ifdef GENERIC_NOP1 | ||
14 | /* Use inline assembly to define this because the nops are defined | 35 | /* Use inline assembly to define this because the nops are defined |
15 | as inline assembly strings in the include files and we cannot | 36 | as inline assembly strings in the include files and we cannot |
16 | get them easily into strings. */ | 37 | get them easily into strings. */ |
17 | asm("\t.data\nintelnops: " | 38 | asm("\t.data\nintelnops: " |
18 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 | 39 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 |
19 | GENERIC_NOP7 GENERIC_NOP8); | 40 | GENERIC_NOP7 GENERIC_NOP8); |
20 | asm("\t.data\nk8nops: " | 41 | extern unsigned char intelnops[]; |
21 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 | ||
22 | K8_NOP7 K8_NOP8); | ||
23 | asm("\t.data\nk7nops: " | ||
24 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 | ||
25 | K7_NOP7 K7_NOP8); | ||
26 | |||
27 | extern unsigned char intelnops[], k8nops[], k7nops[]; | ||
28 | static unsigned char *intel_nops[ASM_NOP_MAX+1] = { | 42 | static unsigned char *intel_nops[ASM_NOP_MAX+1] = { |
29 | NULL, | 43 | NULL, |
30 | intelnops, | 44 | intelnops, |
@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = { | |||
36 | intelnops + 1 + 2 + 3 + 4 + 5 + 6, | 50 | intelnops + 1 + 2 + 3 + 4 + 5 + 6, |
37 | intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 51 | intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
38 | }; | 52 | }; |
53 | #endif | ||
54 | |||
55 | #ifdef K8_NOP1 | ||
56 | asm("\t.data\nk8nops: " | ||
57 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 | ||
58 | K8_NOP7 K8_NOP8); | ||
59 | extern unsigned char k8nops[]; | ||
39 | static unsigned char *k8_nops[ASM_NOP_MAX+1] = { | 60 | static unsigned char *k8_nops[ASM_NOP_MAX+1] = { |
40 | NULL, | 61 | NULL, |
41 | k8nops, | 62 | k8nops, |
@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = { | |||
47 | k8nops + 1 + 2 + 3 + 4 + 5 + 6, | 68 | k8nops + 1 + 2 + 3 + 4 + 5 + 6, |
48 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 69 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
49 | }; | 70 | }; |
71 | #endif | ||
72 | |||
73 | #ifdef K7_NOP1 | ||
74 | asm("\t.data\nk7nops: " | ||
75 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 | ||
76 | K7_NOP7 K7_NOP8); | ||
77 | extern unsigned char k7nops[]; | ||
50 | static unsigned char *k7_nops[ASM_NOP_MAX+1] = { | 78 | static unsigned char *k7_nops[ASM_NOP_MAX+1] = { |
51 | NULL, | 79 | NULL, |
52 | k7nops, | 80 | k7nops, |
@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = { | |||
58 | k7nops + 1 + 2 + 3 + 4 + 5 + 6, | 86 | k7nops + 1 + 2 + 3 + 4 + 5 + 6, |
59 | k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 87 | k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
60 | }; | 88 | }; |
89 | #endif | ||
90 | |||
91 | #ifdef CONFIG_X86_64 | ||
92 | |||
93 | extern char __vsyscall_0; | ||
94 | static inline unsigned char** find_nop_table(void) | ||
95 | { | ||
96 | return k8_nops; | ||
97 | } | ||
98 | |||
99 | #else /* CONFIG_X86_64 */ | ||
100 | |||
61 | static struct nop { | 101 | static struct nop { |
62 | int cpuid; | 102 | int cpuid; |
63 | unsigned char **noptable; | 103 | unsigned char **noptable; |
@@ -67,14 +107,6 @@ static struct nop { | |||
67 | { -1, NULL } | 107 | { -1, NULL } |
68 | }; | 108 | }; |
69 | 109 | ||
70 | |||
71 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | ||
72 | extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; | ||
73 | extern u8 *__smp_locks[], *__smp_locks_end[]; | ||
74 | |||
75 | extern u8 __smp_alt_begin[], __smp_alt_end[]; | ||
76 | |||
77 | |||
78 | static unsigned char** find_nop_table(void) | 110 | static unsigned char** find_nop_table(void) |
79 | { | 111 | { |
80 | unsigned char **noptable = intel_nops; | 112 | unsigned char **noptable = intel_nops; |
@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void) | |||
89 | return noptable; | 121 | return noptable; |
90 | } | 122 | } |
91 | 123 | ||
124 | #endif /* CONFIG_X86_64 */ | ||
125 | |||
126 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | ||
127 | extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; | ||
128 | extern u8 *__smp_locks[], *__smp_locks_end[]; | ||
129 | |||
130 | extern u8 __smp_alt_begin[], __smp_alt_end[]; | ||
131 | |||
92 | /* Replace instructions with better alternatives for this CPU type. | 132 | /* Replace instructions with better alternatives for this CPU type. |
93 | This runs before SMP is initialized to avoid SMP problems with | 133 | This runs before SMP is initialized to avoid SMP problems with |
94 | self modifying code. This implies that assymetric systems where | 134 | self modifying code. This implies that assymetric systems where |
@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) | |||
99 | { | 139 | { |
100 | unsigned char **noptable = find_nop_table(); | 140 | unsigned char **noptable = find_nop_table(); |
101 | struct alt_instr *a; | 141 | struct alt_instr *a; |
142 | u8 *instr; | ||
102 | int diff, i, k; | 143 | int diff, i, k; |
103 | 144 | ||
104 | DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); | 145 | DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); |
@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) | |||
106 | BUG_ON(a->replacementlen > a->instrlen); | 147 | BUG_ON(a->replacementlen > a->instrlen); |
107 | if (!boot_cpu_has(a->cpuid)) | 148 | if (!boot_cpu_has(a->cpuid)) |
108 | continue; | 149 | continue; |
109 | memcpy(a->instr, a->replacement, a->replacementlen); | 150 | instr = a->instr; |
151 | #ifdef CONFIG_X86_64 | ||
152 | /* vsyscall code is not mapped yet. resolve it manually. */ | ||
153 | if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { | ||
154 | instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); | ||
155 | DPRINTK("%s: vsyscall fixup: %p => %p\n", | ||
156 | __FUNCTION__, a->instr, instr); | ||
157 | } | ||
158 | #endif | ||
159 | memcpy(instr, a->replacement, a->replacementlen); | ||
110 | diff = a->instrlen - a->replacementlen; | 160 | diff = a->instrlen - a->replacementlen; |
111 | /* Pad the rest with nops */ | 161 | /* Pad the rest with nops */ |
112 | for (i = a->replacementlen; diff > 0; diff -= k, i += k) { | 162 | for (i = a->replacementlen; diff > 0; diff -= k, i += k) { |
@@ -186,14 +236,6 @@ struct smp_alt_module { | |||
186 | static LIST_HEAD(smp_alt_modules); | 236 | static LIST_HEAD(smp_alt_modules); |
187 | static DEFINE_SPINLOCK(smp_alt); | 237 | static DEFINE_SPINLOCK(smp_alt); |
188 | 238 | ||
189 | static int smp_alt_once = 0; | ||
190 | static int __init bootonly(char *str) | ||
191 | { | ||
192 | smp_alt_once = 1; | ||
193 | return 1; | ||
194 | } | ||
195 | __setup("smp-alt-boot", bootonly); | ||
196 | |||
197 | void alternatives_smp_module_add(struct module *mod, char *name, | 239 | void alternatives_smp_module_add(struct module *mod, char *name, |
198 | void *locks, void *locks_end, | 240 | void *locks, void *locks_end, |
199 | void *text, void *text_end) | 241 | void *text, void *text_end) |
@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name, | |||
201 | struct smp_alt_module *smp; | 243 | struct smp_alt_module *smp; |
202 | unsigned long flags; | 244 | unsigned long flags; |
203 | 245 | ||
246 | if (no_replacement) | ||
247 | return; | ||
248 | |||
204 | if (smp_alt_once) { | 249 | if (smp_alt_once) { |
205 | if (boot_cpu_has(X86_FEATURE_UP)) | 250 | if (boot_cpu_has(X86_FEATURE_UP)) |
206 | alternatives_smp_unlock(locks, locks_end, | 251 | alternatives_smp_unlock(locks, locks_end, |
@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod) | |||
235 | struct smp_alt_module *item; | 280 | struct smp_alt_module *item; |
236 | unsigned long flags; | 281 | unsigned long flags; |
237 | 282 | ||
238 | if (smp_alt_once) | 283 | if (no_replacement || smp_alt_once) |
239 | return; | 284 | return; |
240 | 285 | ||
241 | spin_lock_irqsave(&smp_alt, flags); | 286 | spin_lock_irqsave(&smp_alt, flags); |
@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp) | |||
256 | struct smp_alt_module *mod; | 301 | struct smp_alt_module *mod; |
257 | unsigned long flags; | 302 | unsigned long flags; |
258 | 303 | ||
259 | if (smp_alt_once) | 304 | if (no_replacement || smp_alt_once) |
260 | return; | 305 | return; |
261 | BUG_ON(!smp && (num_online_cpus() > 1)); | 306 | BUG_ON(!smp && (num_online_cpus() > 1)); |
262 | 307 | ||
@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp) | |||
285 | 330 | ||
286 | void __init alternative_instructions(void) | 331 | void __init alternative_instructions(void) |
287 | { | 332 | { |
333 | if (no_replacement) { | ||
334 | printk(KERN_INFO "(SMP-)alternatives turned off\n"); | ||
335 | free_init_pages("SMP alternatives", | ||
336 | (unsigned long)__smp_alt_begin, | ||
337 | (unsigned long)__smp_alt_end); | ||
338 | return; | ||
339 | } | ||
288 | apply_alternatives(__alt_instructions, __alt_instructions_end); | 340 | apply_alternatives(__alt_instructions, __alt_instructions_end); |
289 | 341 | ||
290 | /* switch to patch-once-at-boottime-only mode and free the | 342 | /* switch to patch-once-at-boottime-only mode and free the |
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 5ab59c12335b..7ce09492fc0c 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/arch_hooks.h> | 36 | #include <asm/arch_hooks.h> |
37 | #include <asm/hpet.h> | 37 | #include <asm/hpet.h> |
38 | #include <asm/i8253.h> | 38 | #include <asm/i8253.h> |
39 | #include <asm/nmi.h> | ||
39 | 40 | ||
40 | #include <mach_apic.h> | 41 | #include <mach_apic.h> |
41 | #include <mach_apicdef.h> | 42 | #include <mach_apicdef.h> |
@@ -156,7 +157,7 @@ void clear_local_APIC(void) | |||
156 | maxlvt = get_maxlvt(); | 157 | maxlvt = get_maxlvt(); |
157 | 158 | ||
158 | /* | 159 | /* |
159 | * Masking an LVT entry on a P6 can trigger a local APIC error | 160 | * Masking an LVT entry can trigger a local APIC error |
160 | * if the vector is zero. Mask LVTERR first to prevent this. | 161 | * if the vector is zero. Mask LVTERR first to prevent this. |
161 | */ | 162 | */ |
162 | if (maxlvt >= 3) { | 163 | if (maxlvt >= 3) { |
@@ -1117,7 +1118,18 @@ void disable_APIC_timer(void) | |||
1117 | unsigned long v; | 1118 | unsigned long v; |
1118 | 1119 | ||
1119 | v = apic_read(APIC_LVTT); | 1120 | v = apic_read(APIC_LVTT); |
1120 | apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); | 1121 | /* |
1122 | * When an illegal vector value (0-15) is written to an LVT | ||
1123 | * entry and delivery mode is Fixed, the APIC may signal an | ||
1124 | * illegal vector error, with out regard to whether the mask | ||
1125 | * bit is set or whether an interrupt is actually seen on input. | ||
1126 | * | ||
1127 | * Boot sequence might call this function when the LVTT has | ||
1128 | * '0' vector value. So make sure vector field is set to | ||
1129 | * valid value. | ||
1130 | */ | ||
1131 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
1132 | apic_write_around(APIC_LVTT, v); | ||
1121 | } | 1133 | } |
1122 | } | 1134 | } |
1123 | 1135 | ||
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 9e819eb68229..7c5729d1fd06 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c | |||
@@ -764,9 +764,9 @@ static int apm_do_idle(void) | |||
764 | int idled = 0; | 764 | int idled = 0; |
765 | int polling; | 765 | int polling; |
766 | 766 | ||
767 | polling = test_thread_flag(TIF_POLLING_NRFLAG); | 767 | polling = !!(current_thread_info()->status & TS_POLLING); |
768 | if (polling) { | 768 | if (polling) { |
769 | clear_thread_flag(TIF_POLLING_NRFLAG); | 769 | current_thread_info()->status &= ~TS_POLLING; |
770 | smp_mb__after_clear_bit(); | 770 | smp_mb__after_clear_bit(); |
771 | } | 771 | } |
772 | if (!need_resched()) { | 772 | if (!need_resched()) { |
@@ -774,7 +774,7 @@ static int apm_do_idle(void) | |||
774 | ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); | 774 | ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); |
775 | } | 775 | } |
776 | if (polling) | 776 | if (polling) |
777 | set_thread_flag(TIF_POLLING_NRFLAG); | 777 | current_thread_info()->status |= TS_POLLING; |
778 | 778 | ||
779 | if (!idled) | 779 | if (!idled) |
780 | return 0; | 780 | return 0; |
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 36d66e2077d0..1c3a809e6421 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * to extract and format the required data. | 4 | * to extract and format the required data. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/crypto.h> | ||
7 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
8 | #include <linux/signal.h> | 9 | #include <linux/signal.h> |
9 | #include <linux/personality.h> | 10 | #include <linux/personality.h> |
@@ -69,4 +70,6 @@ void foo(void) | |||
69 | 70 | ||
70 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | 71 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); |
71 | DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); | 72 | DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); |
73 | |||
74 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | ||
72 | } | 75 | } |
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 786d1a57048b..fd0457c9c827 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c | |||
@@ -224,15 +224,17 @@ static void __init init_amd(struct cpuinfo_x86 *c) | |||
224 | 224 | ||
225 | #ifdef CONFIG_X86_HT | 225 | #ifdef CONFIG_X86_HT |
226 | /* | 226 | /* |
227 | * On a AMD dual core setup the lower bits of the APIC id | 227 | * On a AMD multi core setup the lower bits of the APIC id |
228 | * distingush the cores. Assumes number of cores is a power | 228 | * distingush the cores. |
229 | * of two. | ||
230 | */ | 229 | */ |
231 | if (c->x86_max_cores > 1) { | 230 | if (c->x86_max_cores > 1) { |
232 | int cpu = smp_processor_id(); | 231 | int cpu = smp_processor_id(); |
233 | unsigned bits = 0; | 232 | unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; |
234 | while ((1 << bits) < c->x86_max_cores) | 233 | |
235 | bits++; | 234 | if (bits == 0) { |
235 | while ((1 << bits) < c->x86_max_cores) | ||
236 | bits++; | ||
237 | } | ||
236 | cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); | 238 | cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); |
237 | phys_proc_id[cpu] >>= bits; | 239 | phys_proc_id[cpu] >>= bits; |
238 | printk(KERN_INFO "CPU %d(%d) -> Core %d\n", | 240 | printk(KERN_INFO "CPU %d(%d) -> Core %d\n", |
@@ -240,6 +242,8 @@ static void __init init_amd(struct cpuinfo_x86 *c) | |||
240 | } | 242 | } |
241 | #endif | 243 | #endif |
242 | 244 | ||
245 | if (cpuid_eax(0x80000000) >= 0x80000006) | ||
246 | num_cache_leaves = 3; | ||
243 | } | 247 | } |
244 | 248 | ||
245 | static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) | 249 | static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) |
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index fc32c8028e24..f03b7f94c304 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c | |||
@@ -354,7 +354,7 @@ static void __init init_nsc(struct cpuinfo_x86 *c) | |||
354 | * This function only handles the GX processor, and kicks every | 354 | * This function only handles the GX processor, and kicks every |
355 | * thing else to the Cyrix init function above - that should | 355 | * thing else to the Cyrix init function above - that should |
356 | * cover any processors that might have been branded differently | 356 | * cover any processors that might have been branded differently |
357 | * after NSC aquired Cyrix. | 357 | * after NSC acquired Cyrix. |
358 | * | 358 | * |
359 | * If this breaks your GX1 horribly, please e-mail | 359 | * If this breaks your GX1 horribly, please e-mail |
360 | * info-linux@ldcmail.amd.com to tell us. | 360 | * info-linux@ldcmail.amd.com to tell us. |
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 5386b29bb5a5..10afc645c540 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c | |||
@@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
122 | 122 | ||
123 | select_idle_routine(c); | 123 | select_idle_routine(c); |
124 | l2 = init_intel_cacheinfo(c); | 124 | l2 = init_intel_cacheinfo(c); |
125 | if (c->cpuid_level > 9 ) { | ||
126 | unsigned eax = cpuid_eax(10); | ||
127 | /* Check for version and the number of counters */ | ||
128 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) | ||
129 | set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); | ||
130 | } | ||
125 | 131 | ||
126 | /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ | 132 | /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ |
127 | if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) | 133 | if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) |
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index c8547a6fa7e6..6c37b4fd8ce2 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * Changes: | 4 | * Changes: |
5 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) | 5 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) |
6 | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. | 6 | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. |
7 | * Andi Kleen : CPUID4 emulation on AMD. | ||
7 | */ | 8 | */ |
8 | 9 | ||
9 | #include <linux/init.h> | 10 | #include <linux/init.h> |
@@ -130,25 +131,111 @@ struct _cpuid4_info { | |||
130 | cpumask_t shared_cpu_map; | 131 | cpumask_t shared_cpu_map; |
131 | }; | 132 | }; |
132 | 133 | ||
133 | static unsigned short num_cache_leaves; | 134 | unsigned short num_cache_leaves; |
135 | |||
136 | /* AMD doesn't have CPUID4. Emulate it here to report the same | ||
137 | information to the user. This makes some assumptions about the machine: | ||
138 | No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs. | ||
139 | |||
140 | In theory the TLBs could be reported as fake type (they are in "dummy"). | ||
141 | Maybe later */ | ||
142 | union l1_cache { | ||
143 | struct { | ||
144 | unsigned line_size : 8; | ||
145 | unsigned lines_per_tag : 8; | ||
146 | unsigned assoc : 8; | ||
147 | unsigned size_in_kb : 8; | ||
148 | }; | ||
149 | unsigned val; | ||
150 | }; | ||
151 | |||
152 | union l2_cache { | ||
153 | struct { | ||
154 | unsigned line_size : 8; | ||
155 | unsigned lines_per_tag : 4; | ||
156 | unsigned assoc : 4; | ||
157 | unsigned size_in_kb : 16; | ||
158 | }; | ||
159 | unsigned val; | ||
160 | }; | ||
161 | |||
162 | static unsigned short assocs[] = { | ||
163 | [1] = 1, [2] = 2, [4] = 4, [6] = 8, | ||
164 | [8] = 16, | ||
165 | [0xf] = 0xffff // ?? | ||
166 | }; | ||
167 | static unsigned char levels[] = { 1, 1, 2 }; | ||
168 | static unsigned char types[] = { 1, 2, 3 }; | ||
169 | |||
170 | static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | ||
171 | union _cpuid4_leaf_ebx *ebx, | ||
172 | union _cpuid4_leaf_ecx *ecx) | ||
173 | { | ||
174 | unsigned dummy; | ||
175 | unsigned line_size, lines_per_tag, assoc, size_in_kb; | ||
176 | union l1_cache l1i, l1d; | ||
177 | union l2_cache l2; | ||
178 | |||
179 | eax->full = 0; | ||
180 | ebx->full = 0; | ||
181 | ecx->full = 0; | ||
182 | |||
183 | cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); | ||
184 | cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy); | ||
185 | |||
186 | if (leaf > 2 || !l1d.val || !l1i.val || !l2.val) | ||
187 | return; | ||
188 | |||
189 | eax->split.is_self_initializing = 1; | ||
190 | eax->split.type = types[leaf]; | ||
191 | eax->split.level = levels[leaf]; | ||
192 | eax->split.num_threads_sharing = 0; | ||
193 | eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; | ||
194 | |||
195 | if (leaf <= 1) { | ||
196 | union l1_cache *l1 = leaf == 0 ? &l1d : &l1i; | ||
197 | assoc = l1->assoc; | ||
198 | line_size = l1->line_size; | ||
199 | lines_per_tag = l1->lines_per_tag; | ||
200 | size_in_kb = l1->size_in_kb; | ||
201 | } else { | ||
202 | assoc = l2.assoc; | ||
203 | line_size = l2.line_size; | ||
204 | lines_per_tag = l2.lines_per_tag; | ||
205 | /* cpu_data has errata corrections for K7 applied */ | ||
206 | size_in_kb = current_cpu_data.x86_cache_size; | ||
207 | } | ||
208 | |||
209 | if (assoc == 0xf) | ||
210 | eax->split.is_fully_associative = 1; | ||
211 | ebx->split.coherency_line_size = line_size - 1; | ||
212 | ebx->split.ways_of_associativity = assocs[assoc] - 1; | ||
213 | ebx->split.physical_line_partition = lines_per_tag - 1; | ||
214 | ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / | ||
215 | (ebx->split.ways_of_associativity + 1) - 1; | ||
216 | } | ||
134 | 217 | ||
135 | static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) | 218 | static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) |
136 | { | 219 | { |
137 | unsigned int eax, ebx, ecx, edx; | 220 | union _cpuid4_leaf_eax eax; |
138 | union _cpuid4_leaf_eax cache_eax; | 221 | union _cpuid4_leaf_ebx ebx; |
222 | union _cpuid4_leaf_ecx ecx; | ||
223 | unsigned edx; | ||
139 | 224 | ||
140 | cpuid_count(4, index, &eax, &ebx, &ecx, &edx); | 225 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) |
141 | cache_eax.full = eax; | 226 | amd_cpuid4(index, &eax, &ebx, &ecx); |
142 | if (cache_eax.split.type == CACHE_TYPE_NULL) | 227 | else |
228 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | ||
229 | if (eax.split.type == CACHE_TYPE_NULL) | ||
143 | return -EIO; /* better error ? */ | 230 | return -EIO; /* better error ? */ |
144 | 231 | ||
145 | this_leaf->eax.full = eax; | 232 | this_leaf->eax = eax; |
146 | this_leaf->ebx.full = ebx; | 233 | this_leaf->ebx = ebx; |
147 | this_leaf->ecx.full = ecx; | 234 | this_leaf->ecx = ecx; |
148 | this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) * | 235 | this_leaf->size = (ecx.split.number_of_sets + 1) * |
149 | (this_leaf->ebx.split.coherency_line_size + 1) * | 236 | (ebx.split.coherency_line_size + 1) * |
150 | (this_leaf->ebx.split.physical_line_partition + 1) * | 237 | (ebx.split.physical_line_partition + 1) * |
151 | (this_leaf->ebx.split.ways_of_associativity + 1); | 238 | (ebx.split.ways_of_associativity + 1); |
152 | return 0; | 239 | return 0; |
153 | } | 240 | } |
154 | 241 | ||
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 21dc1bbb8067..48f0f62f781c 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c | |||
@@ -120,14 +120,9 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) | |||
120 | return 1; | 120 | return 1; |
121 | } | 121 | } |
122 | 122 | ||
123 | /* | ||
124 | * By using the NMI code instead of a vector we just sneak thru the | ||
125 | * word generator coming out with just what we want. AND it does | ||
126 | * not matter if clustered_apic_mode is set or not. | ||
127 | */ | ||
128 | static void smp_send_nmi_allbutself(void) | 123 | static void smp_send_nmi_allbutself(void) |
129 | { | 124 | { |
130 | send_IPI_allbutself(APIC_DM_NMI); | 125 | send_IPI_allbutself(NMI_VECTOR); |
131 | } | 126 | } |
132 | 127 | ||
133 | static void nmi_shootdown_cpus(void) | 128 | static void nmi_shootdown_cpus(void) |
@@ -163,7 +158,7 @@ static void nmi_shootdown_cpus(void) | |||
163 | void machine_crash_shutdown(struct pt_regs *regs) | 158 | void machine_crash_shutdown(struct pt_regs *regs) |
164 | { | 159 | { |
165 | /* This function is only called after the system | 160 | /* This function is only called after the system |
166 | * has paniced or is otherwise in a critical state. | 161 | * has panicked or is otherwise in a critical state. |
167 | * The minimum amount of code to allow a kexec'd kernel | 162 | * The minimum amount of code to allow a kexec'd kernel |
168 | * to run successfully needs to happen here. | 163 | * to run successfully needs to happen here. |
169 | * | 164 | * |
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index cfc683f153b9..e6e4506e749a 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <asm/smp.h> | 48 | #include <asm/smp.h> |
49 | #include <asm/page.h> | 49 | #include <asm/page.h> |
50 | #include <asm/desc.h> | 50 | #include <asm/desc.h> |
51 | #include <asm/dwarf2.h> | ||
51 | #include "irq_vectors.h" | 52 | #include "irq_vectors.h" |
52 | 53 | ||
53 | #define nr_syscalls ((syscall_table_size)/4) | 54 | #define nr_syscalls ((syscall_table_size)/4) |
@@ -85,31 +86,67 @@ VM_MASK = 0x00020000 | |||
85 | #define SAVE_ALL \ | 86 | #define SAVE_ALL \ |
86 | cld; \ | 87 | cld; \ |
87 | pushl %es; \ | 88 | pushl %es; \ |
89 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
90 | /*CFI_REL_OFFSET es, 0;*/\ | ||
88 | pushl %ds; \ | 91 | pushl %ds; \ |
92 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
93 | /*CFI_REL_OFFSET ds, 0;*/\ | ||
89 | pushl %eax; \ | 94 | pushl %eax; \ |
95 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
96 | CFI_REL_OFFSET eax, 0;\ | ||
90 | pushl %ebp; \ | 97 | pushl %ebp; \ |
98 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
99 | CFI_REL_OFFSET ebp, 0;\ | ||
91 | pushl %edi; \ | 100 | pushl %edi; \ |
101 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
102 | CFI_REL_OFFSET edi, 0;\ | ||
92 | pushl %esi; \ | 103 | pushl %esi; \ |
104 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
105 | CFI_REL_OFFSET esi, 0;\ | ||
93 | pushl %edx; \ | 106 | pushl %edx; \ |
107 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
108 | CFI_REL_OFFSET edx, 0;\ | ||
94 | pushl %ecx; \ | 109 | pushl %ecx; \ |
110 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
111 | CFI_REL_OFFSET ecx, 0;\ | ||
95 | pushl %ebx; \ | 112 | pushl %ebx; \ |
113 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
114 | CFI_REL_OFFSET ebx, 0;\ | ||
96 | movl $(__USER_DS), %edx; \ | 115 | movl $(__USER_DS), %edx; \ |
97 | movl %edx, %ds; \ | 116 | movl %edx, %ds; \ |
98 | movl %edx, %es; | 117 | movl %edx, %es; |
99 | 118 | ||
100 | #define RESTORE_INT_REGS \ | 119 | #define RESTORE_INT_REGS \ |
101 | popl %ebx; \ | 120 | popl %ebx; \ |
121 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
122 | CFI_RESTORE ebx;\ | ||
102 | popl %ecx; \ | 123 | popl %ecx; \ |
124 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
125 | CFI_RESTORE ecx;\ | ||
103 | popl %edx; \ | 126 | popl %edx; \ |
127 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
128 | CFI_RESTORE edx;\ | ||
104 | popl %esi; \ | 129 | popl %esi; \ |
130 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
131 | CFI_RESTORE esi;\ | ||
105 | popl %edi; \ | 132 | popl %edi; \ |
133 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
134 | CFI_RESTORE edi;\ | ||
106 | popl %ebp; \ | 135 | popl %ebp; \ |
107 | popl %eax | 136 | CFI_ADJUST_CFA_OFFSET -4;\ |
137 | CFI_RESTORE ebp;\ | ||
138 | popl %eax; \ | ||
139 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
140 | CFI_RESTORE eax | ||
108 | 141 | ||
109 | #define RESTORE_REGS \ | 142 | #define RESTORE_REGS \ |
110 | RESTORE_INT_REGS; \ | 143 | RESTORE_INT_REGS; \ |
111 | 1: popl %ds; \ | 144 | 1: popl %ds; \ |
145 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
146 | /*CFI_RESTORE ds;*/\ | ||
112 | 2: popl %es; \ | 147 | 2: popl %es; \ |
148 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
149 | /*CFI_RESTORE es;*/\ | ||
113 | .section .fixup,"ax"; \ | 150 | .section .fixup,"ax"; \ |
114 | 3: movl $0,(%esp); \ | 151 | 3: movl $0,(%esp); \ |
115 | jmp 1b; \ | 152 | jmp 1b; \ |
@@ -122,13 +159,43 @@ VM_MASK = 0x00020000 | |||
122 | .long 2b,4b; \ | 159 | .long 2b,4b; \ |
123 | .previous | 160 | .previous |
124 | 161 | ||
162 | #define RING0_INT_FRAME \ | ||
163 | CFI_STARTPROC simple;\ | ||
164 | CFI_DEF_CFA esp, 3*4;\ | ||
165 | /*CFI_OFFSET cs, -2*4;*/\ | ||
166 | CFI_OFFSET eip, -3*4 | ||
167 | |||
168 | #define RING0_EC_FRAME \ | ||
169 | CFI_STARTPROC simple;\ | ||
170 | CFI_DEF_CFA esp, 4*4;\ | ||
171 | /*CFI_OFFSET cs, -2*4;*/\ | ||
172 | CFI_OFFSET eip, -3*4 | ||
173 | |||
174 | #define RING0_PTREGS_FRAME \ | ||
175 | CFI_STARTPROC simple;\ | ||
176 | CFI_DEF_CFA esp, OLDESP-EBX;\ | ||
177 | /*CFI_OFFSET cs, CS-OLDESP;*/\ | ||
178 | CFI_OFFSET eip, EIP-OLDESP;\ | ||
179 | /*CFI_OFFSET es, ES-OLDESP;*/\ | ||
180 | /*CFI_OFFSET ds, DS-OLDESP;*/\ | ||
181 | CFI_OFFSET eax, EAX-OLDESP;\ | ||
182 | CFI_OFFSET ebp, EBP-OLDESP;\ | ||
183 | CFI_OFFSET edi, EDI-OLDESP;\ | ||
184 | CFI_OFFSET esi, ESI-OLDESP;\ | ||
185 | CFI_OFFSET edx, EDX-OLDESP;\ | ||
186 | CFI_OFFSET ecx, ECX-OLDESP;\ | ||
187 | CFI_OFFSET ebx, EBX-OLDESP | ||
125 | 188 | ||
126 | ENTRY(ret_from_fork) | 189 | ENTRY(ret_from_fork) |
190 | CFI_STARTPROC | ||
127 | pushl %eax | 191 | pushl %eax |
192 | CFI_ADJUST_CFA_OFFSET -4 | ||
128 | call schedule_tail | 193 | call schedule_tail |
129 | GET_THREAD_INFO(%ebp) | 194 | GET_THREAD_INFO(%ebp) |
130 | popl %eax | 195 | popl %eax |
196 | CFI_ADJUST_CFA_OFFSET -4 | ||
131 | jmp syscall_exit | 197 | jmp syscall_exit |
198 | CFI_ENDPROC | ||
132 | 199 | ||
133 | /* | 200 | /* |
134 | * Return to user mode is not as complex as all this looks, | 201 | * Return to user mode is not as complex as all this looks, |
@@ -139,6 +206,7 @@ ENTRY(ret_from_fork) | |||
139 | 206 | ||
140 | # userspace resumption stub bypassing syscall exit tracing | 207 | # userspace resumption stub bypassing syscall exit tracing |
141 | ALIGN | 208 | ALIGN |
209 | RING0_PTREGS_FRAME | ||
142 | ret_from_exception: | 210 | ret_from_exception: |
143 | preempt_stop | 211 | preempt_stop |
144 | ret_from_intr: | 212 | ret_from_intr: |
@@ -171,20 +239,33 @@ need_resched: | |||
171 | call preempt_schedule_irq | 239 | call preempt_schedule_irq |
172 | jmp need_resched | 240 | jmp need_resched |
173 | #endif | 241 | #endif |
242 | CFI_ENDPROC | ||
174 | 243 | ||
175 | /* SYSENTER_RETURN points to after the "sysenter" instruction in | 244 | /* SYSENTER_RETURN points to after the "sysenter" instruction in |
176 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | 245 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ |
177 | 246 | ||
178 | # sysenter call handler stub | 247 | # sysenter call handler stub |
179 | ENTRY(sysenter_entry) | 248 | ENTRY(sysenter_entry) |
249 | CFI_STARTPROC simple | ||
250 | CFI_DEF_CFA esp, 0 | ||
251 | CFI_REGISTER esp, ebp | ||
180 | movl TSS_sysenter_esp0(%esp),%esp | 252 | movl TSS_sysenter_esp0(%esp),%esp |
181 | sysenter_past_esp: | 253 | sysenter_past_esp: |
182 | sti | 254 | sti |
183 | pushl $(__USER_DS) | 255 | pushl $(__USER_DS) |
256 | CFI_ADJUST_CFA_OFFSET 4 | ||
257 | /*CFI_REL_OFFSET ss, 0*/ | ||
184 | pushl %ebp | 258 | pushl %ebp |
259 | CFI_ADJUST_CFA_OFFSET 4 | ||
260 | CFI_REL_OFFSET esp, 0 | ||
185 | pushfl | 261 | pushfl |
262 | CFI_ADJUST_CFA_OFFSET 4 | ||
186 | pushl $(__USER_CS) | 263 | pushl $(__USER_CS) |
264 | CFI_ADJUST_CFA_OFFSET 4 | ||
265 | /*CFI_REL_OFFSET cs, 0*/ | ||
187 | pushl $SYSENTER_RETURN | 266 | pushl $SYSENTER_RETURN |
267 | CFI_ADJUST_CFA_OFFSET 4 | ||
268 | CFI_REL_OFFSET eip, 0 | ||
188 | 269 | ||
189 | /* | 270 | /* |
190 | * Load the potential sixth argument from user stack. | 271 | * Load the potential sixth argument from user stack. |
@@ -199,6 +280,7 @@ sysenter_past_esp: | |||
199 | .previous | 280 | .previous |
200 | 281 | ||
201 | pushl %eax | 282 | pushl %eax |
283 | CFI_ADJUST_CFA_OFFSET 4 | ||
202 | SAVE_ALL | 284 | SAVE_ALL |
203 | GET_THREAD_INFO(%ebp) | 285 | GET_THREAD_INFO(%ebp) |
204 | 286 | ||
@@ -219,11 +301,14 @@ sysenter_past_esp: | |||
219 | xorl %ebp,%ebp | 301 | xorl %ebp,%ebp |
220 | sti | 302 | sti |
221 | sysexit | 303 | sysexit |
304 | CFI_ENDPROC | ||
222 | 305 | ||
223 | 306 | ||
224 | # system call handler stub | 307 | # system call handler stub |
225 | ENTRY(system_call) | 308 | ENTRY(system_call) |
309 | RING0_INT_FRAME # can't unwind into user space anyway | ||
226 | pushl %eax # save orig_eax | 310 | pushl %eax # save orig_eax |
311 | CFI_ADJUST_CFA_OFFSET 4 | ||
227 | SAVE_ALL | 312 | SAVE_ALL |
228 | GET_THREAD_INFO(%ebp) | 313 | GET_THREAD_INFO(%ebp) |
229 | testl $TF_MASK,EFLAGS(%esp) | 314 | testl $TF_MASK,EFLAGS(%esp) |
@@ -256,10 +341,12 @@ restore_all: | |||
256 | movb CS(%esp), %al | 341 | movb CS(%esp), %al |
257 | andl $(VM_MASK | (4 << 8) | 3), %eax | 342 | andl $(VM_MASK | (4 << 8) | 3), %eax |
258 | cmpl $((4 << 8) | 3), %eax | 343 | cmpl $((4 << 8) | 3), %eax |
344 | CFI_REMEMBER_STATE | ||
259 | je ldt_ss # returning to user-space with LDT SS | 345 | je ldt_ss # returning to user-space with LDT SS |
260 | restore_nocheck: | 346 | restore_nocheck: |
261 | RESTORE_REGS | 347 | RESTORE_REGS |
262 | addl $4, %esp | 348 | addl $4, %esp |
349 | CFI_ADJUST_CFA_OFFSET -4 | ||
263 | 1: iret | 350 | 1: iret |
264 | .section .fixup,"ax" | 351 | .section .fixup,"ax" |
265 | iret_exc: | 352 | iret_exc: |
@@ -273,6 +360,7 @@ iret_exc: | |||
273 | .long 1b,iret_exc | 360 | .long 1b,iret_exc |
274 | .previous | 361 | .previous |
275 | 362 | ||
363 | CFI_RESTORE_STATE | ||
276 | ldt_ss: | 364 | ldt_ss: |
277 | larl OLDSS(%esp), %eax | 365 | larl OLDSS(%esp), %eax |
278 | jnz restore_nocheck | 366 | jnz restore_nocheck |
@@ -285,11 +373,13 @@ ldt_ss: | |||
285 | * CPUs, which we can try to work around to make | 373 | * CPUs, which we can try to work around to make |
286 | * dosemu and wine happy. */ | 374 | * dosemu and wine happy. */ |
287 | subl $8, %esp # reserve space for switch16 pointer | 375 | subl $8, %esp # reserve space for switch16 pointer |
376 | CFI_ADJUST_CFA_OFFSET 8 | ||
288 | cli | 377 | cli |
289 | movl %esp, %eax | 378 | movl %esp, %eax |
290 | /* Set up the 16bit stack frame with switch32 pointer on top, | 379 | /* Set up the 16bit stack frame with switch32 pointer on top, |
291 | * and a switch16 pointer on top of the current frame. */ | 380 | * and a switch16 pointer on top of the current frame. */ |
292 | call setup_x86_bogus_stack | 381 | call setup_x86_bogus_stack |
382 | CFI_ADJUST_CFA_OFFSET -8 # frame has moved | ||
293 | RESTORE_REGS | 383 | RESTORE_REGS |
294 | lss 20+4(%esp), %esp # switch to 16bit stack | 384 | lss 20+4(%esp), %esp # switch to 16bit stack |
295 | 1: iret | 385 | 1: iret |
@@ -297,9 +387,11 @@ ldt_ss: | |||
297 | .align 4 | 387 | .align 4 |
298 | .long 1b,iret_exc | 388 | .long 1b,iret_exc |
299 | .previous | 389 | .previous |
390 | CFI_ENDPROC | ||
300 | 391 | ||
301 | # perform work that needs to be done immediately before resumption | 392 | # perform work that needs to be done immediately before resumption |
302 | ALIGN | 393 | ALIGN |
394 | RING0_PTREGS_FRAME # can't unwind into user space anyway | ||
303 | work_pending: | 395 | work_pending: |
304 | testb $_TIF_NEED_RESCHED, %cl | 396 | testb $_TIF_NEED_RESCHED, %cl |
305 | jz work_notifysig | 397 | jz work_notifysig |
@@ -329,8 +421,10 @@ work_notifysig: # deal with pending signals and | |||
329 | work_notifysig_v86: | 421 | work_notifysig_v86: |
330 | #ifdef CONFIG_VM86 | 422 | #ifdef CONFIG_VM86 |
331 | pushl %ecx # save ti_flags for do_notify_resume | 423 | pushl %ecx # save ti_flags for do_notify_resume |
424 | CFI_ADJUST_CFA_OFFSET 4 | ||
332 | call save_v86_state # %eax contains pt_regs pointer | 425 | call save_v86_state # %eax contains pt_regs pointer |
333 | popl %ecx | 426 | popl %ecx |
427 | CFI_ADJUST_CFA_OFFSET -4 | ||
334 | movl %eax, %esp | 428 | movl %eax, %esp |
335 | xorl %edx, %edx | 429 | xorl %edx, %edx |
336 | call do_notify_resume | 430 | call do_notify_resume |
@@ -363,19 +457,21 @@ syscall_exit_work: | |||
363 | movl $1, %edx | 457 | movl $1, %edx |
364 | call do_syscall_trace | 458 | call do_syscall_trace |
365 | jmp resume_userspace | 459 | jmp resume_userspace |
460 | CFI_ENDPROC | ||
366 | 461 | ||
367 | ALIGN | 462 | RING0_INT_FRAME # can't unwind into user space anyway |
368 | syscall_fault: | 463 | syscall_fault: |
369 | pushl %eax # save orig_eax | 464 | pushl %eax # save orig_eax |
465 | CFI_ADJUST_CFA_OFFSET 4 | ||
370 | SAVE_ALL | 466 | SAVE_ALL |
371 | GET_THREAD_INFO(%ebp) | 467 | GET_THREAD_INFO(%ebp) |
372 | movl $-EFAULT,EAX(%esp) | 468 | movl $-EFAULT,EAX(%esp) |
373 | jmp resume_userspace | 469 | jmp resume_userspace |
374 | 470 | ||
375 | ALIGN | ||
376 | syscall_badsys: | 471 | syscall_badsys: |
377 | movl $-ENOSYS,EAX(%esp) | 472 | movl $-ENOSYS,EAX(%esp) |
378 | jmp resume_userspace | 473 | jmp resume_userspace |
474 | CFI_ENDPROC | ||
379 | 475 | ||
380 | #define FIXUP_ESPFIX_STACK \ | 476 | #define FIXUP_ESPFIX_STACK \ |
381 | movl %esp, %eax; \ | 477 | movl %esp, %eax; \ |
@@ -387,16 +483,21 @@ syscall_badsys: | |||
387 | movl %eax, %esp; | 483 | movl %eax, %esp; |
388 | #define UNWIND_ESPFIX_STACK \ | 484 | #define UNWIND_ESPFIX_STACK \ |
389 | pushl %eax; \ | 485 | pushl %eax; \ |
486 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
390 | movl %ss, %eax; \ | 487 | movl %ss, %eax; \ |
391 | /* see if on 16bit stack */ \ | 488 | /* see if on 16bit stack */ \ |
392 | cmpw $__ESPFIX_SS, %ax; \ | 489 | cmpw $__ESPFIX_SS, %ax; \ |
393 | jne 28f; \ | 490 | je 28f; \ |
394 | movl $__KERNEL_DS, %edx; \ | 491 | 27: popl %eax; \ |
395 | movl %edx, %ds; \ | 492 | CFI_ADJUST_CFA_OFFSET -4; \ |
396 | movl %edx, %es; \ | 493 | .section .fixup,"ax"; \ |
494 | 28: movl $__KERNEL_DS, %eax; \ | ||
495 | movl %eax, %ds; \ | ||
496 | movl %eax, %es; \ | ||
397 | /* switch to 32bit stack */ \ | 497 | /* switch to 32bit stack */ \ |
398 | FIXUP_ESPFIX_STACK \ | 498 | FIXUP_ESPFIX_STACK; \ |
399 | 28: popl %eax; | 499 | jmp 27b; \ |
500 | .previous | ||
400 | 501 | ||
401 | /* | 502 | /* |
402 | * Build the entry stubs and pointer table with | 503 | * Build the entry stubs and pointer table with |
@@ -408,9 +509,14 @@ ENTRY(interrupt) | |||
408 | 509 | ||
409 | vector=0 | 510 | vector=0 |
410 | ENTRY(irq_entries_start) | 511 | ENTRY(irq_entries_start) |
512 | RING0_INT_FRAME | ||
411 | .rept NR_IRQS | 513 | .rept NR_IRQS |
412 | ALIGN | 514 | ALIGN |
515 | .if vector | ||
516 | CFI_ADJUST_CFA_OFFSET -4 | ||
517 | .endif | ||
413 | 1: pushl $vector-256 | 518 | 1: pushl $vector-256 |
519 | CFI_ADJUST_CFA_OFFSET 4 | ||
414 | jmp common_interrupt | 520 | jmp common_interrupt |
415 | .data | 521 | .data |
416 | .long 1b | 522 | .long 1b |
@@ -424,60 +530,99 @@ common_interrupt: | |||
424 | movl %esp,%eax | 530 | movl %esp,%eax |
425 | call do_IRQ | 531 | call do_IRQ |
426 | jmp ret_from_intr | 532 | jmp ret_from_intr |
533 | CFI_ENDPROC | ||
427 | 534 | ||
428 | #define BUILD_INTERRUPT(name, nr) \ | 535 | #define BUILD_INTERRUPT(name, nr) \ |
429 | ENTRY(name) \ | 536 | ENTRY(name) \ |
537 | RING0_INT_FRAME; \ | ||
430 | pushl $nr-256; \ | 538 | pushl $nr-256; \ |
431 | SAVE_ALL \ | 539 | CFI_ADJUST_CFA_OFFSET 4; \ |
540 | SAVE_ALL; \ | ||
432 | movl %esp,%eax; \ | 541 | movl %esp,%eax; \ |
433 | call smp_/**/name; \ | 542 | call smp_/**/name; \ |
434 | jmp ret_from_intr; | 543 | jmp ret_from_intr; \ |
544 | CFI_ENDPROC | ||
435 | 545 | ||
436 | /* The include is where all of the SMP etc. interrupts come from */ | 546 | /* The include is where all of the SMP etc. interrupts come from */ |
437 | #include "entry_arch.h" | 547 | #include "entry_arch.h" |
438 | 548 | ||
439 | ENTRY(divide_error) | 549 | ENTRY(divide_error) |
550 | RING0_INT_FRAME | ||
440 | pushl $0 # no error code | 551 | pushl $0 # no error code |
552 | CFI_ADJUST_CFA_OFFSET 4 | ||
441 | pushl $do_divide_error | 553 | pushl $do_divide_error |
554 | CFI_ADJUST_CFA_OFFSET 4 | ||
442 | ALIGN | 555 | ALIGN |
443 | error_code: | 556 | error_code: |
444 | pushl %ds | 557 | pushl %ds |
558 | CFI_ADJUST_CFA_OFFSET 4 | ||
559 | /*CFI_REL_OFFSET ds, 0*/ | ||
445 | pushl %eax | 560 | pushl %eax |
561 | CFI_ADJUST_CFA_OFFSET 4 | ||
562 | CFI_REL_OFFSET eax, 0 | ||
446 | xorl %eax, %eax | 563 | xorl %eax, %eax |
447 | pushl %ebp | 564 | pushl %ebp |
565 | CFI_ADJUST_CFA_OFFSET 4 | ||
566 | CFI_REL_OFFSET ebp, 0 | ||
448 | pushl %edi | 567 | pushl %edi |
568 | CFI_ADJUST_CFA_OFFSET 4 | ||
569 | CFI_REL_OFFSET edi, 0 | ||
449 | pushl %esi | 570 | pushl %esi |
571 | CFI_ADJUST_CFA_OFFSET 4 | ||
572 | CFI_REL_OFFSET esi, 0 | ||
450 | pushl %edx | 573 | pushl %edx |
574 | CFI_ADJUST_CFA_OFFSET 4 | ||
575 | CFI_REL_OFFSET edx, 0 | ||
451 | decl %eax # eax = -1 | 576 | decl %eax # eax = -1 |
452 | pushl %ecx | 577 | pushl %ecx |
578 | CFI_ADJUST_CFA_OFFSET 4 | ||
579 | CFI_REL_OFFSET ecx, 0 | ||
453 | pushl %ebx | 580 | pushl %ebx |
581 | CFI_ADJUST_CFA_OFFSET 4 | ||
582 | CFI_REL_OFFSET ebx, 0 | ||
454 | cld | 583 | cld |
455 | pushl %es | 584 | pushl %es |
585 | CFI_ADJUST_CFA_OFFSET 4 | ||
586 | /*CFI_REL_OFFSET es, 0*/ | ||
456 | UNWIND_ESPFIX_STACK | 587 | UNWIND_ESPFIX_STACK |
457 | popl %ecx | 588 | popl %ecx |
589 | CFI_ADJUST_CFA_OFFSET -4 | ||
590 | /*CFI_REGISTER es, ecx*/ | ||
458 | movl ES(%esp), %edi # get the function address | 591 | movl ES(%esp), %edi # get the function address |
459 | movl ORIG_EAX(%esp), %edx # get the error code | 592 | movl ORIG_EAX(%esp), %edx # get the error code |
460 | movl %eax, ORIG_EAX(%esp) | 593 | movl %eax, ORIG_EAX(%esp) |
461 | movl %ecx, ES(%esp) | 594 | movl %ecx, ES(%esp) |
595 | /*CFI_REL_OFFSET es, ES*/ | ||
462 | movl $(__USER_DS), %ecx | 596 | movl $(__USER_DS), %ecx |
463 | movl %ecx, %ds | 597 | movl %ecx, %ds |
464 | movl %ecx, %es | 598 | movl %ecx, %es |
465 | movl %esp,%eax # pt_regs pointer | 599 | movl %esp,%eax # pt_regs pointer |
466 | call *%edi | 600 | call *%edi |
467 | jmp ret_from_exception | 601 | jmp ret_from_exception |
602 | CFI_ENDPROC | ||
468 | 603 | ||
469 | ENTRY(coprocessor_error) | 604 | ENTRY(coprocessor_error) |
605 | RING0_INT_FRAME | ||
470 | pushl $0 | 606 | pushl $0 |
607 | CFI_ADJUST_CFA_OFFSET 4 | ||
471 | pushl $do_coprocessor_error | 608 | pushl $do_coprocessor_error |
609 | CFI_ADJUST_CFA_OFFSET 4 | ||
472 | jmp error_code | 610 | jmp error_code |
611 | CFI_ENDPROC | ||
473 | 612 | ||
474 | ENTRY(simd_coprocessor_error) | 613 | ENTRY(simd_coprocessor_error) |
614 | RING0_INT_FRAME | ||
475 | pushl $0 | 615 | pushl $0 |
616 | CFI_ADJUST_CFA_OFFSET 4 | ||
476 | pushl $do_simd_coprocessor_error | 617 | pushl $do_simd_coprocessor_error |
618 | CFI_ADJUST_CFA_OFFSET 4 | ||
477 | jmp error_code | 619 | jmp error_code |
620 | CFI_ENDPROC | ||
478 | 621 | ||
479 | ENTRY(device_not_available) | 622 | ENTRY(device_not_available) |
623 | RING0_INT_FRAME | ||
480 | pushl $-1 # mark this as an int | 624 | pushl $-1 # mark this as an int |
625 | CFI_ADJUST_CFA_OFFSET 4 | ||
481 | SAVE_ALL | 626 | SAVE_ALL |
482 | movl %cr0, %eax | 627 | movl %cr0, %eax |
483 | testl $0x4, %eax # EM (math emulation bit) | 628 | testl $0x4, %eax # EM (math emulation bit) |
@@ -487,9 +632,12 @@ ENTRY(device_not_available) | |||
487 | jmp ret_from_exception | 632 | jmp ret_from_exception |
488 | device_not_available_emulate: | 633 | device_not_available_emulate: |
489 | pushl $0 # temporary storage for ORIG_EIP | 634 | pushl $0 # temporary storage for ORIG_EIP |
635 | CFI_ADJUST_CFA_OFFSET 4 | ||
490 | call math_emulate | 636 | call math_emulate |
491 | addl $4, %esp | 637 | addl $4, %esp |
638 | CFI_ADJUST_CFA_OFFSET -4 | ||
492 | jmp ret_from_exception | 639 | jmp ret_from_exception |
640 | CFI_ENDPROC | ||
493 | 641 | ||
494 | /* | 642 | /* |
495 | * Debug traps and NMI can happen at the one SYSENTER instruction | 643 | * Debug traps and NMI can happen at the one SYSENTER instruction |
@@ -514,16 +662,19 @@ label: \ | |||
514 | pushl $sysenter_past_esp | 662 | pushl $sysenter_past_esp |
515 | 663 | ||
516 | KPROBE_ENTRY(debug) | 664 | KPROBE_ENTRY(debug) |
665 | RING0_INT_FRAME | ||
517 | cmpl $sysenter_entry,(%esp) | 666 | cmpl $sysenter_entry,(%esp) |
518 | jne debug_stack_correct | 667 | jne debug_stack_correct |
519 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | 668 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) |
520 | debug_stack_correct: | 669 | debug_stack_correct: |
521 | pushl $-1 # mark this as an int | 670 | pushl $-1 # mark this as an int |
671 | CFI_ADJUST_CFA_OFFSET 4 | ||
522 | SAVE_ALL | 672 | SAVE_ALL |
523 | xorl %edx,%edx # error code 0 | 673 | xorl %edx,%edx # error code 0 |
524 | movl %esp,%eax # pt_regs pointer | 674 | movl %esp,%eax # pt_regs pointer |
525 | call do_debug | 675 | call do_debug |
526 | jmp ret_from_exception | 676 | jmp ret_from_exception |
677 | CFI_ENDPROC | ||
527 | .previous .text | 678 | .previous .text |
528 | /* | 679 | /* |
529 | * NMI is doubly nasty. It can happen _while_ we're handling | 680 | * NMI is doubly nasty. It can happen _while_ we're handling |
@@ -534,14 +685,18 @@ debug_stack_correct: | |||
534 | * fault happened on the sysenter path. | 685 | * fault happened on the sysenter path. |
535 | */ | 686 | */ |
536 | ENTRY(nmi) | 687 | ENTRY(nmi) |
688 | RING0_INT_FRAME | ||
537 | pushl %eax | 689 | pushl %eax |
690 | CFI_ADJUST_CFA_OFFSET 4 | ||
538 | movl %ss, %eax | 691 | movl %ss, %eax |
539 | cmpw $__ESPFIX_SS, %ax | 692 | cmpw $__ESPFIX_SS, %ax |
540 | popl %eax | 693 | popl %eax |
694 | CFI_ADJUST_CFA_OFFSET -4 | ||
541 | je nmi_16bit_stack | 695 | je nmi_16bit_stack |
542 | cmpl $sysenter_entry,(%esp) | 696 | cmpl $sysenter_entry,(%esp) |
543 | je nmi_stack_fixup | 697 | je nmi_stack_fixup |
544 | pushl %eax | 698 | pushl %eax |
699 | CFI_ADJUST_CFA_OFFSET 4 | ||
545 | movl %esp,%eax | 700 | movl %esp,%eax |
546 | /* Do not access memory above the end of our stack page, | 701 | /* Do not access memory above the end of our stack page, |
547 | * it might not exist. | 702 | * it might not exist. |
@@ -549,16 +704,19 @@ ENTRY(nmi) | |||
549 | andl $(THREAD_SIZE-1),%eax | 704 | andl $(THREAD_SIZE-1),%eax |
550 | cmpl $(THREAD_SIZE-20),%eax | 705 | cmpl $(THREAD_SIZE-20),%eax |
551 | popl %eax | 706 | popl %eax |
707 | CFI_ADJUST_CFA_OFFSET -4 | ||
552 | jae nmi_stack_correct | 708 | jae nmi_stack_correct |
553 | cmpl $sysenter_entry,12(%esp) | 709 | cmpl $sysenter_entry,12(%esp) |
554 | je nmi_debug_stack_check | 710 | je nmi_debug_stack_check |
555 | nmi_stack_correct: | 711 | nmi_stack_correct: |
556 | pushl %eax | 712 | pushl %eax |
713 | CFI_ADJUST_CFA_OFFSET 4 | ||
557 | SAVE_ALL | 714 | SAVE_ALL |
558 | xorl %edx,%edx # zero error code | 715 | xorl %edx,%edx # zero error code |
559 | movl %esp,%eax # pt_regs pointer | 716 | movl %esp,%eax # pt_regs pointer |
560 | call do_nmi | 717 | call do_nmi |
561 | jmp restore_all | 718 | jmp restore_all |
719 | CFI_ENDPROC | ||
562 | 720 | ||
563 | nmi_stack_fixup: | 721 | nmi_stack_fixup: |
564 | FIX_STACK(12,nmi_stack_correct, 1) | 722 | FIX_STACK(12,nmi_stack_correct, 1) |
@@ -574,94 +732,177 @@ nmi_debug_stack_check: | |||
574 | jmp nmi_stack_correct | 732 | jmp nmi_stack_correct |
575 | 733 | ||
576 | nmi_16bit_stack: | 734 | nmi_16bit_stack: |
735 | RING0_INT_FRAME | ||
577 | /* create the pointer to lss back */ | 736 | /* create the pointer to lss back */ |
578 | pushl %ss | 737 | pushl %ss |
738 | CFI_ADJUST_CFA_OFFSET 4 | ||
579 | pushl %esp | 739 | pushl %esp |
740 | CFI_ADJUST_CFA_OFFSET 4 | ||
580 | movzwl %sp, %esp | 741 | movzwl %sp, %esp |
581 | addw $4, (%esp) | 742 | addw $4, (%esp) |
582 | /* copy the iret frame of 12 bytes */ | 743 | /* copy the iret frame of 12 bytes */ |
583 | .rept 3 | 744 | .rept 3 |
584 | pushl 16(%esp) | 745 | pushl 16(%esp) |
746 | CFI_ADJUST_CFA_OFFSET 4 | ||
585 | .endr | 747 | .endr |
586 | pushl %eax | 748 | pushl %eax |
749 | CFI_ADJUST_CFA_OFFSET 4 | ||
587 | SAVE_ALL | 750 | SAVE_ALL |
588 | FIXUP_ESPFIX_STACK # %eax == %esp | 751 | FIXUP_ESPFIX_STACK # %eax == %esp |
752 | CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved | ||
589 | xorl %edx,%edx # zero error code | 753 | xorl %edx,%edx # zero error code |
590 | call do_nmi | 754 | call do_nmi |
591 | RESTORE_REGS | 755 | RESTORE_REGS |
592 | lss 12+4(%esp), %esp # back to 16bit stack | 756 | lss 12+4(%esp), %esp # back to 16bit stack |
593 | 1: iret | 757 | 1: iret |
758 | CFI_ENDPROC | ||
594 | .section __ex_table,"a" | 759 | .section __ex_table,"a" |
595 | .align 4 | 760 | .align 4 |
596 | .long 1b,iret_exc | 761 | .long 1b,iret_exc |
597 | .previous | 762 | .previous |
598 | 763 | ||
599 | KPROBE_ENTRY(int3) | 764 | KPROBE_ENTRY(int3) |
765 | RING0_INT_FRAME | ||
600 | pushl $-1 # mark this as an int | 766 | pushl $-1 # mark this as an int |
767 | CFI_ADJUST_CFA_OFFSET 4 | ||
601 | SAVE_ALL | 768 | SAVE_ALL |
602 | xorl %edx,%edx # zero error code | 769 | xorl %edx,%edx # zero error code |
603 | movl %esp,%eax # pt_regs pointer | 770 | movl %esp,%eax # pt_regs pointer |
604 | call do_int3 | 771 | call do_int3 |
605 | jmp ret_from_exception | 772 | jmp ret_from_exception |
773 | CFI_ENDPROC | ||
606 | .previous .text | 774 | .previous .text |
607 | 775 | ||
608 | ENTRY(overflow) | 776 | ENTRY(overflow) |
777 | RING0_INT_FRAME | ||
609 | pushl $0 | 778 | pushl $0 |
779 | CFI_ADJUST_CFA_OFFSET 4 | ||
610 | pushl $do_overflow | 780 | pushl $do_overflow |
781 | CFI_ADJUST_CFA_OFFSET 4 | ||
611 | jmp error_code | 782 | jmp error_code |
783 | CFI_ENDPROC | ||
612 | 784 | ||
613 | ENTRY(bounds) | 785 | ENTRY(bounds) |
786 | RING0_INT_FRAME | ||
614 | pushl $0 | 787 | pushl $0 |
788 | CFI_ADJUST_CFA_OFFSET 4 | ||
615 | pushl $do_bounds | 789 | pushl $do_bounds |
790 | CFI_ADJUST_CFA_OFFSET 4 | ||
616 | jmp error_code | 791 | jmp error_code |
792 | CFI_ENDPROC | ||
617 | 793 | ||
618 | ENTRY(invalid_op) | 794 | ENTRY(invalid_op) |
795 | RING0_INT_FRAME | ||
619 | pushl $0 | 796 | pushl $0 |
797 | CFI_ADJUST_CFA_OFFSET 4 | ||
620 | pushl $do_invalid_op | 798 | pushl $do_invalid_op |
799 | CFI_ADJUST_CFA_OFFSET 4 | ||
621 | jmp error_code | 800 | jmp error_code |
801 | CFI_ENDPROC | ||
622 | 802 | ||
623 | ENTRY(coprocessor_segment_overrun) | 803 | ENTRY(coprocessor_segment_overrun) |
804 | RING0_INT_FRAME | ||
624 | pushl $0 | 805 | pushl $0 |
806 | CFI_ADJUST_CFA_OFFSET 4 | ||
625 | pushl $do_coprocessor_segment_overrun | 807 | pushl $do_coprocessor_segment_overrun |
808 | CFI_ADJUST_CFA_OFFSET 4 | ||
626 | jmp error_code | 809 | jmp error_code |
810 | CFI_ENDPROC | ||
627 | 811 | ||
628 | ENTRY(invalid_TSS) | 812 | ENTRY(invalid_TSS) |
813 | RING0_EC_FRAME | ||
629 | pushl $do_invalid_TSS | 814 | pushl $do_invalid_TSS |
815 | CFI_ADJUST_CFA_OFFSET 4 | ||
630 | jmp error_code | 816 | jmp error_code |
817 | CFI_ENDPROC | ||
631 | 818 | ||
632 | ENTRY(segment_not_present) | 819 | ENTRY(segment_not_present) |
820 | RING0_EC_FRAME | ||
633 | pushl $do_segment_not_present | 821 | pushl $do_segment_not_present |
822 | CFI_ADJUST_CFA_OFFSET 4 | ||
634 | jmp error_code | 823 | jmp error_code |
824 | CFI_ENDPROC | ||
635 | 825 | ||
636 | ENTRY(stack_segment) | 826 | ENTRY(stack_segment) |
827 | RING0_EC_FRAME | ||
637 | pushl $do_stack_segment | 828 | pushl $do_stack_segment |
829 | CFI_ADJUST_CFA_OFFSET 4 | ||
638 | jmp error_code | 830 | jmp error_code |
831 | CFI_ENDPROC | ||
639 | 832 | ||
640 | KPROBE_ENTRY(general_protection) | 833 | KPROBE_ENTRY(general_protection) |
834 | RING0_EC_FRAME | ||
641 | pushl $do_general_protection | 835 | pushl $do_general_protection |
836 | CFI_ADJUST_CFA_OFFSET 4 | ||
642 | jmp error_code | 837 | jmp error_code |
838 | CFI_ENDPROC | ||
643 | .previous .text | 839 | .previous .text |
644 | 840 | ||
645 | ENTRY(alignment_check) | 841 | ENTRY(alignment_check) |
842 | RING0_EC_FRAME | ||
646 | pushl $do_alignment_check | 843 | pushl $do_alignment_check |
844 | CFI_ADJUST_CFA_OFFSET 4 | ||
647 | jmp error_code | 845 | jmp error_code |
846 | CFI_ENDPROC | ||
648 | 847 | ||
649 | KPROBE_ENTRY(page_fault) | 848 | KPROBE_ENTRY(page_fault) |
849 | RING0_EC_FRAME | ||
650 | pushl $do_page_fault | 850 | pushl $do_page_fault |
851 | CFI_ADJUST_CFA_OFFSET 4 | ||
651 | jmp error_code | 852 | jmp error_code |
853 | CFI_ENDPROC | ||
652 | .previous .text | 854 | .previous .text |
653 | 855 | ||
654 | #ifdef CONFIG_X86_MCE | 856 | #ifdef CONFIG_X86_MCE |
655 | ENTRY(machine_check) | 857 | ENTRY(machine_check) |
858 | RING0_INT_FRAME | ||
656 | pushl $0 | 859 | pushl $0 |
860 | CFI_ADJUST_CFA_OFFSET 4 | ||
657 | pushl machine_check_vector | 861 | pushl machine_check_vector |
862 | CFI_ADJUST_CFA_OFFSET 4 | ||
658 | jmp error_code | 863 | jmp error_code |
864 | CFI_ENDPROC | ||
659 | #endif | 865 | #endif |
660 | 866 | ||
661 | ENTRY(spurious_interrupt_bug) | 867 | ENTRY(spurious_interrupt_bug) |
868 | RING0_INT_FRAME | ||
662 | pushl $0 | 869 | pushl $0 |
870 | CFI_ADJUST_CFA_OFFSET 4 | ||
663 | pushl $do_spurious_interrupt_bug | 871 | pushl $do_spurious_interrupt_bug |
872 | CFI_ADJUST_CFA_OFFSET 4 | ||
664 | jmp error_code | 873 | jmp error_code |
874 | CFI_ENDPROC | ||
875 | |||
876 | #ifdef CONFIG_STACK_UNWIND | ||
877 | ENTRY(arch_unwind_init_running) | ||
878 | CFI_STARTPROC | ||
879 | movl 4(%esp), %edx | ||
880 | movl (%esp), %ecx | ||
881 | leal 4(%esp), %eax | ||
882 | movl %ebx, EBX(%edx) | ||
883 | xorl %ebx, %ebx | ||
884 | movl %ebx, ECX(%edx) | ||
885 | movl %ebx, EDX(%edx) | ||
886 | movl %esi, ESI(%edx) | ||
887 | movl %edi, EDI(%edx) | ||
888 | movl %ebp, EBP(%edx) | ||
889 | movl %ebx, EAX(%edx) | ||
890 | movl $__USER_DS, DS(%edx) | ||
891 | movl $__USER_DS, ES(%edx) | ||
892 | movl %ebx, ORIG_EAX(%edx) | ||
893 | movl %ecx, EIP(%edx) | ||
894 | movl 12(%esp), %ecx | ||
895 | movl $__KERNEL_CS, CS(%edx) | ||
896 | movl %ebx, EFLAGS(%edx) | ||
897 | movl %eax, OLDESP(%edx) | ||
898 | movl 8(%esp), %eax | ||
899 | movl %ecx, 8(%esp) | ||
900 | movl EBX(%edx), %ebx | ||
901 | movl $__KERNEL_DS, OLDSS(%edx) | ||
902 | jmpl *%eax | ||
903 | CFI_ENDPROC | ||
904 | ENDPROC(arch_unwind_init_running) | ||
905 | #endif | ||
665 | 906 | ||
666 | .section .rodata,"a" | 907 | .section .rodata,"a" |
667 | #include "syscall_table.S" | 908 | #include "syscall_table.S" |
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c new file mode 100644 index 000000000000..c6737c35815d --- /dev/null +++ b/arch/i386/kernel/hpet.c | |||
@@ -0,0 +1,67 @@ | |||
1 | #include <linux/clocksource.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/hpet.h> | ||
4 | #include <linux/init.h> | ||
5 | |||
6 | #include <asm/hpet.h> | ||
7 | #include <asm/io.h> | ||
8 | |||
9 | #define HPET_MASK CLOCKSOURCE_MASK(32) | ||
10 | #define HPET_SHIFT 22 | ||
11 | |||
12 | /* FSEC = 10^-15 NSEC = 10^-9 */ | ||
13 | #define FSEC_PER_NSEC 1000000 | ||
14 | |||
15 | static void *hpet_ptr; | ||
16 | |||
17 | static cycle_t read_hpet(void) | ||
18 | { | ||
19 | return (cycle_t)readl(hpet_ptr); | ||
20 | } | ||
21 | |||
22 | static struct clocksource clocksource_hpet = { | ||
23 | .name = "hpet", | ||
24 | .rating = 250, | ||
25 | .read = read_hpet, | ||
26 | .mask = HPET_MASK, | ||
27 | .mult = 0, /* set below */ | ||
28 | .shift = HPET_SHIFT, | ||
29 | .is_continuous = 1, | ||
30 | }; | ||
31 | |||
32 | static int __init init_hpet_clocksource(void) | ||
33 | { | ||
34 | unsigned long hpet_period; | ||
35 | void __iomem* hpet_base; | ||
36 | u64 tmp; | ||
37 | |||
38 | if (!hpet_address) | ||
39 | return -ENODEV; | ||
40 | |||
41 | /* calculate the hpet address: */ | ||
42 | hpet_base = | ||
43 | (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | ||
44 | hpet_ptr = hpet_base + HPET_COUNTER; | ||
45 | |||
46 | /* calculate the frequency: */ | ||
47 | hpet_period = readl(hpet_base + HPET_PERIOD); | ||
48 | |||
49 | /* | ||
50 | * hpet period is in femto seconds per cycle | ||
51 | * so we need to convert this to ns/cyc units | ||
52 | * aproximated by mult/2^shift | ||
53 | * | ||
54 | * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift | ||
55 | * fsec/cyc * 1ns/1000000fsec * 2^shift = mult | ||
56 | * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult | ||
57 | * (fsec/cyc << shift)/1000000 = mult | ||
58 | * (hpet_period << shift)/FSEC_PER_NSEC = mult | ||
59 | */ | ||
60 | tmp = (u64)hpet_period << HPET_SHIFT; | ||
61 | do_div(tmp, FSEC_PER_NSEC); | ||
62 | clocksource_hpet.mult = (u32)tmp; | ||
63 | |||
64 | return clocksource_register(&clocksource_hpet); | ||
65 | } | ||
66 | |||
67 | module_init(init_hpet_clocksource); | ||
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c new file mode 100644 index 000000000000..477b24daff53 --- /dev/null +++ b/arch/i386/kernel/i8253.c | |||
@@ -0,0 +1,118 @@ | |||
1 | /* | ||
2 | * i8253.c 8253/PIT functions | ||
3 | * | ||
4 | */ | ||
5 | #include <linux/clocksource.h> | ||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/jiffies.h> | ||
8 | #include <linux/sysdev.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/init.h> | ||
11 | |||
12 | #include <asm/smp.h> | ||
13 | #include <asm/delay.h> | ||
14 | #include <asm/i8253.h> | ||
15 | #include <asm/io.h> | ||
16 | |||
17 | #include "io_ports.h" | ||
18 | |||
19 | DEFINE_SPINLOCK(i8253_lock); | ||
20 | EXPORT_SYMBOL(i8253_lock); | ||
21 | |||
22 | void setup_pit_timer(void) | ||
23 | { | ||
24 | unsigned long flags; | ||
25 | |||
26 | spin_lock_irqsave(&i8253_lock, flags); | ||
27 | outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | ||
28 | udelay(10); | ||
29 | outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ | ||
30 | udelay(10); | ||
31 | outb(LATCH >> 8 , PIT_CH0); /* MSB */ | ||
32 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
33 | } | ||
34 | |||
35 | /* | ||
36 | * Since the PIT overflows every tick, its not very useful | ||
37 | * to just read by itself. So use jiffies to emulate a free | ||
38 | * running counter: | ||
39 | */ | ||
40 | static cycle_t pit_read(void) | ||
41 | { | ||
42 | unsigned long flags; | ||
43 | int count; | ||
44 | u32 jifs; | ||
45 | static int old_count; | ||
46 | static u32 old_jifs; | ||
47 | |||
48 | spin_lock_irqsave(&i8253_lock, flags); | ||
49 | /* | ||
50 | * Although our caller may have the read side of xtime_lock, | ||
51 | * this is now a seqlock, and we are cheating in this routine | ||
52 | * by having side effects on state that we cannot undo if | ||
53 | * there is a collision on the seqlock and our caller has to | ||
54 | * retry. (Namely, old_jifs and old_count.) So we must treat | ||
55 | * jiffies as volatile despite the lock. We read jiffies | ||
56 | * before latching the timer count to guarantee that although | ||
57 | * the jiffies value might be older than the count (that is, | ||
58 | * the counter may underflow between the last point where | ||
59 | * jiffies was incremented and the point where we latch the | ||
60 | * count), it cannot be newer. | ||
61 | */ | ||
62 | jifs = jiffies; | ||
63 | outb_p(0x00, PIT_MODE); /* latch the count ASAP */ | ||
64 | count = inb_p(PIT_CH0); /* read the latched count */ | ||
65 | count |= inb_p(PIT_CH0) << 8; | ||
66 | |||
67 | /* VIA686a test code... reset the latch if count > max + 1 */ | ||
68 | if (count > LATCH) { | ||
69 | outb_p(0x34, PIT_MODE); | ||
70 | outb_p(LATCH & 0xff, PIT_CH0); | ||
71 | outb(LATCH >> 8, PIT_CH0); | ||
72 | count = LATCH - 1; | ||
73 | } | ||
74 | |||
75 | /* | ||
76 | * It's possible for count to appear to go the wrong way for a | ||
77 | * couple of reasons: | ||
78 | * | ||
79 | * 1. The timer counter underflows, but we haven't handled the | ||
80 | * resulting interrupt and incremented jiffies yet. | ||
81 | * 2. Hardware problem with the timer, not giving us continuous time, | ||
82 | * the counter does small "jumps" upwards on some Pentium systems, | ||
83 | * (see c't 95/10 page 335 for Neptun bug.) | ||
84 | * | ||
85 | * Previous attempts to handle these cases intelligently were | ||
86 | * buggy, so we just do the simple thing now. | ||
87 | */ | ||
88 | if (count > old_count && jifs == old_jifs) { | ||
89 | count = old_count; | ||
90 | } | ||
91 | old_count = count; | ||
92 | old_jifs = jifs; | ||
93 | |||
94 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
95 | |||
96 | count = (LATCH - 1) - count; | ||
97 | |||
98 | return (cycle_t)(jifs * LATCH) + count; | ||
99 | } | ||
100 | |||
101 | static struct clocksource clocksource_pit = { | ||
102 | .name = "pit", | ||
103 | .rating = 110, | ||
104 | .read = pit_read, | ||
105 | .mask = CLOCKSOURCE_MASK(32), | ||
106 | .mult = 0, | ||
107 | .shift = 20, | ||
108 | }; | ||
109 | |||
110 | static int __init init_pit_clocksource(void) | ||
111 | { | ||
112 | if (num_possible_cpus() > 4) /* PIT does not scale! */ | ||
113 | return 0; | ||
114 | |||
115 | clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); | ||
116 | return clocksource_register(&clocksource_pit); | ||
117 | } | ||
118 | module_init(init_pit_clocksource); | ||
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index b7636b96e104..c1a42feba286 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c | |||
@@ -175,7 +175,7 @@ static void mask_and_ack_8259A(unsigned int irq) | |||
175 | * Lightweight spurious IRQ detection. We do not want | 175 | * Lightweight spurious IRQ detection. We do not want |
176 | * to overdo spurious IRQ handling - it's usually a sign | 176 | * to overdo spurious IRQ handling - it's usually a sign |
177 | * of hardware problems, so we only do the checks we can | 177 | * of hardware problems, so we only do the checks we can |
178 | * do without slowing down good hardware unnecesserily. | 178 | * do without slowing down good hardware unnecessarily. |
179 | * | 179 | * |
180 | * Note that IRQ7 and IRQ15 (the two spurious IRQs | 180 | * Note that IRQ7 and IRQ15 (the two spurious IRQs |
181 | * usually resulting from the 8259A-1|2 PICs) occur | 181 | * usually resulting from the 8259A-1|2 PICs) occur |
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index a62df3e764c5..72ae414e4d49 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <asm/desc.h> | 38 | #include <asm/desc.h> |
39 | #include <asm/timer.h> | 39 | #include <asm/timer.h> |
40 | #include <asm/i8259.h> | 40 | #include <asm/i8259.h> |
41 | #include <asm/nmi.h> | ||
41 | 42 | ||
42 | #include <mach_apic.h> | 43 | #include <mach_apic.h> |
43 | 44 | ||
@@ -50,6 +51,7 @@ atomic_t irq_mis_count; | |||
50 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | 51 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; |
51 | 52 | ||
52 | static DEFINE_SPINLOCK(ioapic_lock); | 53 | static DEFINE_SPINLOCK(ioapic_lock); |
54 | static DEFINE_SPINLOCK(vector_lock); | ||
53 | 55 | ||
54 | int timer_over_8254 __initdata = 1; | 56 | int timer_over_8254 __initdata = 1; |
55 | 57 | ||
@@ -1161,10 +1163,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; | |||
1161 | int assign_irq_vector(int irq) | 1163 | int assign_irq_vector(int irq) |
1162 | { | 1164 | { |
1163 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; | 1165 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; |
1166 | unsigned long flags; | ||
1167 | int vector; | ||
1168 | |||
1169 | BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); | ||
1164 | 1170 | ||
1165 | BUG_ON(irq >= NR_IRQ_VECTORS); | 1171 | spin_lock_irqsave(&vector_lock, flags); |
1166 | if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) | 1172 | |
1173 | if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { | ||
1174 | spin_unlock_irqrestore(&vector_lock, flags); | ||
1167 | return IO_APIC_VECTOR(irq); | 1175 | return IO_APIC_VECTOR(irq); |
1176 | } | ||
1168 | next: | 1177 | next: |
1169 | current_vector += 8; | 1178 | current_vector += 8; |
1170 | if (current_vector == SYSCALL_VECTOR) | 1179 | if (current_vector == SYSCALL_VECTOR) |
@@ -1172,16 +1181,21 @@ next: | |||
1172 | 1181 | ||
1173 | if (current_vector >= FIRST_SYSTEM_VECTOR) { | 1182 | if (current_vector >= FIRST_SYSTEM_VECTOR) { |
1174 | offset++; | 1183 | offset++; |
1175 | if (!(offset%8)) | 1184 | if (!(offset%8)) { |
1185 | spin_unlock_irqrestore(&vector_lock, flags); | ||
1176 | return -ENOSPC; | 1186 | return -ENOSPC; |
1187 | } | ||
1177 | current_vector = FIRST_DEVICE_VECTOR + offset; | 1188 | current_vector = FIRST_DEVICE_VECTOR + offset; |
1178 | } | 1189 | } |
1179 | 1190 | ||
1180 | vector_irq[current_vector] = irq; | 1191 | vector = current_vector; |
1192 | vector_irq[vector] = irq; | ||
1181 | if (irq != AUTO_ASSIGN) | 1193 | if (irq != AUTO_ASSIGN) |
1182 | IO_APIC_VECTOR(irq) = current_vector; | 1194 | IO_APIC_VECTOR(irq) = vector; |
1183 | 1195 | ||
1184 | return current_vector; | 1196 | spin_unlock_irqrestore(&vector_lock, flags); |
1197 | |||
1198 | return vector; | ||
1185 | } | 1199 | } |
1186 | 1200 | ||
1187 | static struct hw_interrupt_type ioapic_level_type; | 1201 | static struct hw_interrupt_type ioapic_level_type; |
@@ -1193,21 +1207,14 @@ static struct hw_interrupt_type ioapic_edge_type; | |||
1193 | 1207 | ||
1194 | static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) | 1208 | static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) |
1195 | { | 1209 | { |
1196 | if (use_pci_vector() && !platform_legacy_irq(irq)) { | 1210 | unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; |
1197 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | 1211 | |
1198 | trigger == IOAPIC_LEVEL) | 1212 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
1199 | irq_desc[vector].handler = &ioapic_level_type; | 1213 | trigger == IOAPIC_LEVEL) |
1200 | else | 1214 | irq_desc[idx].handler = &ioapic_level_type; |
1201 | irq_desc[vector].handler = &ioapic_edge_type; | 1215 | else |
1202 | set_intr_gate(vector, interrupt[vector]); | 1216 | irq_desc[idx].handler = &ioapic_edge_type; |
1203 | } else { | 1217 | set_intr_gate(vector, interrupt[idx]); |
1204 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | ||
1205 | trigger == IOAPIC_LEVEL) | ||
1206 | irq_desc[irq].handler = &ioapic_level_type; | ||
1207 | else | ||
1208 | irq_desc[irq].handler = &ioapic_edge_type; | ||
1209 | set_intr_gate(vector, interrupt[irq]); | ||
1210 | } | ||
1211 | } | 1218 | } |
1212 | 1219 | ||
1213 | static void __init setup_IO_APIC_irqs(void) | 1220 | static void __init setup_IO_APIC_irqs(void) |
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 49ce4c31b713..061533e0cb5e 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c | |||
@@ -227,7 +227,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
227 | if (i == 0) { | 227 | if (i == 0) { |
228 | seq_printf(p, " "); | 228 | seq_printf(p, " "); |
229 | for_each_online_cpu(j) | 229 | for_each_online_cpu(j) |
230 | seq_printf(p, "CPU%d ",j); | 230 | seq_printf(p, "CPU%-8d",j); |
231 | seq_putc(p, '\n'); | 231 | seq_putc(p, '\n'); |
232 | } | 232 | } |
233 | 233 | ||
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 395a9a6dff88..727e419ad78a 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c | |||
@@ -57,34 +57,85 @@ static __always_inline void set_jmp_op(void *from, void *to) | |||
57 | /* | 57 | /* |
58 | * returns non-zero if opcodes can be boosted. | 58 | * returns non-zero if opcodes can be boosted. |
59 | */ | 59 | */ |
60 | static __always_inline int can_boost(kprobe_opcode_t opcode) | 60 | static __always_inline int can_boost(kprobe_opcode_t *opcodes) |
61 | { | 61 | { |
62 | switch (opcode & 0xf0 ) { | 62 | #define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ |
63 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | ||
64 | (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ | ||
65 | (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ | ||
66 | (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ | ||
67 | << (row % 32)) | ||
68 | /* | ||
69 | * Undefined/reserved opcodes, conditional jump, Opcode Extension | ||
70 | * Groups, and some special opcodes can not be boost. | ||
71 | */ | ||
72 | static const unsigned long twobyte_is_boostable[256 / 32] = { | ||
73 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
74 | /* ------------------------------- */ | ||
75 | W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */ | ||
76 | W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */ | ||
77 | W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */ | ||
78 | W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ | ||
79 | W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ | ||
80 | W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */ | ||
81 | W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */ | ||
82 | W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */ | ||
83 | W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */ | ||
84 | W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ | ||
85 | W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */ | ||
86 | W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */ | ||
87 | W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */ | ||
88 | W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */ | ||
89 | W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */ | ||
90 | W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */ | ||
91 | /* ------------------------------- */ | ||
92 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
93 | }; | ||
94 | #undef W | ||
95 | kprobe_opcode_t opcode; | ||
96 | kprobe_opcode_t *orig_opcodes = opcodes; | ||
97 | retry: | ||
98 | if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) | ||
99 | return 0; | ||
100 | opcode = *(opcodes++); | ||
101 | |||
102 | /* 2nd-byte opcode */ | ||
103 | if (opcode == 0x0f) { | ||
104 | if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) | ||
105 | return 0; | ||
106 | return test_bit(*opcodes, twobyte_is_boostable); | ||
107 | } | ||
108 | |||
109 | switch (opcode & 0xf0) { | ||
110 | case 0x60: | ||
111 | if (0x63 < opcode && opcode < 0x67) | ||
112 | goto retry; /* prefixes */ | ||
113 | /* can't boost Address-size override and bound */ | ||
114 | return (opcode != 0x62 && opcode != 0x67); | ||
63 | case 0x70: | 115 | case 0x70: |
64 | return 0; /* can't boost conditional jump */ | 116 | return 0; /* can't boost conditional jump */ |
65 | case 0x90: | ||
66 | /* can't boost call and pushf */ | ||
67 | return opcode != 0x9a && opcode != 0x9c; | ||
68 | case 0xc0: | 117 | case 0xc0: |
69 | /* can't boost undefined opcodes and soft-interruptions */ | 118 | /* can't boost software-interruptions */ |
70 | return (0xc1 < opcode && opcode < 0xc6) || | 119 | return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; |
71 | (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf; | ||
72 | case 0xd0: | 120 | case 0xd0: |
73 | /* can boost AA* and XLAT */ | 121 | /* can boost AA* and XLAT */ |
74 | return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); | 122 | return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); |
75 | case 0xe0: | 123 | case 0xe0: |
76 | /* can boost in/out and (may be) jmps */ | 124 | /* can boost in/out and absolute jmps */ |
77 | return (0xe3 < opcode && opcode != 0xe8); | 125 | return ((opcode & 0x04) || opcode == 0xea); |
78 | case 0xf0: | 126 | case 0xf0: |
127 | if ((opcode & 0x0c) == 0 && opcode != 0xf1) | ||
128 | goto retry; /* lock/rep(ne) prefix */ | ||
79 | /* clear and set flags can be boost */ | 129 | /* clear and set flags can be boost */ |
80 | return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); | 130 | return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); |
81 | default: | 131 | default: |
82 | /* currently, can't boost 2 bytes opcodes */ | 132 | if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e) |
83 | return opcode != 0x0f; | 133 | goto retry; /* prefixes */ |
134 | /* can't boost CS override and call */ | ||
135 | return (opcode != 0x2e && opcode != 0x9a); | ||
84 | } | 136 | } |
85 | } | 137 | } |
86 | 138 | ||
87 | |||
88 | /* | 139 | /* |
89 | * returns non-zero if opcode modifies the interrupt flag. | 140 | * returns non-zero if opcode modifies the interrupt flag. |
90 | */ | 141 | */ |
@@ -109,7 +160,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) | |||
109 | 160 | ||
110 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 161 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); |
111 | p->opcode = *p->addr; | 162 | p->opcode = *p->addr; |
112 | if (can_boost(p->opcode)) { | 163 | if (can_boost(p->addr)) { |
113 | p->ainsn.boostable = 0; | 164 | p->ainsn.boostable = 0; |
114 | } else { | 165 | } else { |
115 | p->ainsn.boostable = -1; | 166 | p->ainsn.boostable = -1; |
@@ -208,7 +259,9 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
208 | struct kprobe_ctlblk *kcb; | 259 | struct kprobe_ctlblk *kcb; |
209 | #ifdef CONFIG_PREEMPT | 260 | #ifdef CONFIG_PREEMPT |
210 | unsigned pre_preempt_count = preempt_count(); | 261 | unsigned pre_preempt_count = preempt_count(); |
211 | #endif /* CONFIG_PREEMPT */ | 262 | #else |
263 | unsigned pre_preempt_count = 1; | ||
264 | #endif | ||
212 | 265 | ||
213 | addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); | 266 | addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); |
214 | 267 | ||
@@ -285,22 +338,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
285 | /* handler has already set things up, so skip ss setup */ | 338 | /* handler has already set things up, so skip ss setup */ |
286 | return 1; | 339 | return 1; |
287 | 340 | ||
288 | if (p->ainsn.boostable == 1 && | 341 | ss_probe: |
289 | #ifdef CONFIG_PREEMPT | 342 | if (pre_preempt_count && p->ainsn.boostable == 1 && !p->post_handler){ |
290 | !(pre_preempt_count) && /* | ||
291 | * This enables booster when the direct | ||
292 | * execution path aren't preempted. | ||
293 | */ | ||
294 | #endif /* CONFIG_PREEMPT */ | ||
295 | !p->post_handler && !p->break_handler ) { | ||
296 | /* Boost up -- we can execute copied instructions directly */ | 343 | /* Boost up -- we can execute copied instructions directly */ |
297 | reset_current_kprobe(); | 344 | reset_current_kprobe(); |
298 | regs->eip = (unsigned long)p->ainsn.insn; | 345 | regs->eip = (unsigned long)p->ainsn.insn; |
299 | preempt_enable_no_resched(); | 346 | preempt_enable_no_resched(); |
300 | return 1; | 347 | return 1; |
301 | } | 348 | } |
302 | |||
303 | ss_probe: | ||
304 | prepare_singlestep(p, regs); | 349 | prepare_singlestep(p, regs); |
305 | kcb->kprobe_status = KPROBE_HIT_SS; | 350 | kcb->kprobe_status = KPROBE_HIT_SS; |
306 | return 1; | 351 | return 1; |
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index f73d7374a2ba..511abe52a94e 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c | |||
@@ -133,9 +133,9 @@ typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( | |||
133 | unsigned long start_address, | 133 | unsigned long start_address, |
134 | unsigned int has_pae) ATTRIB_NORET; | 134 | unsigned int has_pae) ATTRIB_NORET; |
135 | 135 | ||
136 | const extern unsigned char relocate_new_kernel[]; | 136 | extern const unsigned char relocate_new_kernel[]; |
137 | extern void relocate_new_kernel_end(void); | 137 | extern void relocate_new_kernel_end(void); |
138 | const extern unsigned int relocate_new_kernel_size; | 138 | extern const unsigned int relocate_new_kernel_size; |
139 | 139 | ||
140 | /* | 140 | /* |
141 | * A architecture hook called to validate the | 141 | * A architecture hook called to validate the |
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index d43b498ec745..a76e93146585 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -14,21 +14,17 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/config.h> | 16 | #include <linux/config.h> |
17 | #include <linux/mm.h> | ||
18 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
19 | #include <linux/bootmem.h> | ||
20 | #include <linux/smp_lock.h> | ||
21 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
22 | #include <linux/mc146818rtc.h> | ||
23 | #include <linux/kernel_stat.h> | ||
24 | #include <linux/module.h> | 19 | #include <linux/module.h> |
25 | #include <linux/nmi.h> | 20 | #include <linux/nmi.h> |
26 | #include <linux/sysdev.h> | 21 | #include <linux/sysdev.h> |
27 | #include <linux/sysctl.h> | 22 | #include <linux/sysctl.h> |
23 | #include <linux/percpu.h> | ||
28 | 24 | ||
29 | #include <asm/smp.h> | 25 | #include <asm/smp.h> |
30 | #include <asm/div64.h> | ||
31 | #include <asm/nmi.h> | 26 | #include <asm/nmi.h> |
27 | #include <asm/intel_arch_perfmon.h> | ||
32 | 28 | ||
33 | #include "mach_traps.h" | 29 | #include "mach_traps.h" |
34 | 30 | ||
@@ -100,6 +96,9 @@ int nmi_active; | |||
100 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | 96 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ |
101 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | 97 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) |
102 | 98 | ||
99 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
100 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
101 | |||
103 | #ifdef CONFIG_SMP | 102 | #ifdef CONFIG_SMP |
104 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when | 103 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when |
105 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | 104 | * the CPU is idle. To make sure the NMI watchdog really ticks on all |
@@ -212,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str) | |||
212 | 211 | ||
213 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 212 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
214 | 213 | ||
214 | static void disable_intel_arch_watchdog(void); | ||
215 | |||
215 | static void disable_lapic_nmi_watchdog(void) | 216 | static void disable_lapic_nmi_watchdog(void) |
216 | { | 217 | { |
217 | if (nmi_active <= 0) | 218 | if (nmi_active <= 0) |
@@ -221,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void) | |||
221 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | 222 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); |
222 | break; | 223 | break; |
223 | case X86_VENDOR_INTEL: | 224 | case X86_VENDOR_INTEL: |
225 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
226 | disable_intel_arch_watchdog(); | ||
227 | break; | ||
228 | } | ||
224 | switch (boot_cpu_data.x86) { | 229 | switch (boot_cpu_data.x86) { |
225 | case 6: | 230 | case 6: |
226 | if (boot_cpu_data.x86_model > 0xd) | 231 | if (boot_cpu_data.x86_model > 0xd) |
@@ -449,6 +454,53 @@ static int setup_p4_watchdog(void) | |||
449 | return 1; | 454 | return 1; |
450 | } | 455 | } |
451 | 456 | ||
457 | static void disable_intel_arch_watchdog(void) | ||
458 | { | ||
459 | unsigned ebx; | ||
460 | |||
461 | /* | ||
462 | * Check whether the Architectural PerfMon supports | ||
463 | * Unhalted Core Cycles Event or not. | ||
464 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | ||
465 | */ | ||
466 | ebx = cpuid_ebx(10); | ||
467 | if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
468 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); | ||
469 | } | ||
470 | |||
471 | static int setup_intel_arch_watchdog(void) | ||
472 | { | ||
473 | unsigned int evntsel; | ||
474 | unsigned ebx; | ||
475 | |||
476 | /* | ||
477 | * Check whether the Architectural PerfMon supports | ||
478 | * Unhalted Core Cycles Event or not. | ||
479 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | ||
480 | */ | ||
481 | ebx = cpuid_ebx(10); | ||
482 | if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
483 | return 0; | ||
484 | |||
485 | nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
486 | |||
487 | clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); | ||
488 | clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); | ||
489 | |||
490 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
491 | | ARCH_PERFMON_EVENTSEL_OS | ||
492 | | ARCH_PERFMON_EVENTSEL_USR | ||
493 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
494 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
495 | |||
496 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | ||
497 | write_watchdog_counter("INTEL_ARCH_PERFCTR0"); | ||
498 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
499 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
500 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | ||
501 | return 1; | ||
502 | } | ||
503 | |||
452 | void setup_apic_nmi_watchdog (void) | 504 | void setup_apic_nmi_watchdog (void) |
453 | { | 505 | { |
454 | switch (boot_cpu_data.x86_vendor) { | 506 | switch (boot_cpu_data.x86_vendor) { |
@@ -458,6 +510,11 @@ void setup_apic_nmi_watchdog (void) | |||
458 | setup_k7_watchdog(); | 510 | setup_k7_watchdog(); |
459 | break; | 511 | break; |
460 | case X86_VENDOR_INTEL: | 512 | case X86_VENDOR_INTEL: |
513 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
514 | if (!setup_intel_arch_watchdog()) | ||
515 | return; | ||
516 | break; | ||
517 | } | ||
461 | switch (boot_cpu_data.x86) { | 518 | switch (boot_cpu_data.x86) { |
462 | case 6: | 519 | case 6: |
463 | if (boot_cpu_data.x86_model > 0xd) | 520 | if (boot_cpu_data.x86_model > 0xd) |
@@ -561,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs) | |||
561 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 618 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); |
562 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 619 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
563 | } | 620 | } |
564 | else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { | 621 | else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 || |
622 | nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
565 | /* Only P6 based Pentium M need to re-unmask | 623 | /* Only P6 based Pentium M need to re-unmask |
566 | * the apic vector but it doesn't hurt | 624 | * the apic vector but it doesn't hurt |
567 | * other P6 variant */ | 625 | * other P6 variant */ |
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c index 5f5b075f860a..0caf14652bad 100644 --- a/arch/i386/kernel/numaq.c +++ b/arch/i386/kernel/numaq.c | |||
@@ -79,10 +79,12 @@ int __init get_memcfg_numaq(void) | |||
79 | return 1; | 79 | return 1; |
80 | } | 80 | } |
81 | 81 | ||
82 | static int __init numaq_dsc_disable(void) | 82 | static int __init numaq_tsc_disable(void) |
83 | { | 83 | { |
84 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | 84 | if (num_online_nodes() > 1) { |
85 | tsc_disable = 1; | 85 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); |
86 | tsc_disable = 1; | ||
87 | } | ||
86 | return 0; | 88 | return 0; |
87 | } | 89 | } |
88 | core_initcall(numaq_dsc_disable); | 90 | arch_initcall(numaq_tsc_disable); |
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 6259afea46d1..6946b06e2784 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -102,7 +102,7 @@ void default_idle(void) | |||
102 | local_irq_enable(); | 102 | local_irq_enable(); |
103 | 103 | ||
104 | if (!hlt_counter && boot_cpu_data.hlt_works_ok) { | 104 | if (!hlt_counter && boot_cpu_data.hlt_works_ok) { |
105 | clear_thread_flag(TIF_POLLING_NRFLAG); | 105 | current_thread_info()->status &= ~TS_POLLING; |
106 | smp_mb__after_clear_bit(); | 106 | smp_mb__after_clear_bit(); |
107 | while (!need_resched()) { | 107 | while (!need_resched()) { |
108 | local_irq_disable(); | 108 | local_irq_disable(); |
@@ -111,7 +111,7 @@ void default_idle(void) | |||
111 | else | 111 | else |
112 | local_irq_enable(); | 112 | local_irq_enable(); |
113 | } | 113 | } |
114 | set_thread_flag(TIF_POLLING_NRFLAG); | 114 | current_thread_info()->status |= TS_POLLING; |
115 | } else { | 115 | } else { |
116 | while (!need_resched()) | 116 | while (!need_resched()) |
117 | cpu_relax(); | 117 | cpu_relax(); |
@@ -174,7 +174,7 @@ void cpu_idle(void) | |||
174 | { | 174 | { |
175 | int cpu = smp_processor_id(); | 175 | int cpu = smp_processor_id(); |
176 | 176 | ||
177 | set_thread_flag(TIF_POLLING_NRFLAG); | 177 | current_thread_info()->status |= TS_POLLING; |
178 | 178 | ||
179 | /* endless idle loop with no priority at all */ | 179 | /* endless idle loop with no priority at all */ |
180 | while (1) { | 180 | while (1) { |
@@ -312,7 +312,7 @@ void show_regs(struct pt_regs * regs) | |||
312 | cr3 = read_cr3(); | 312 | cr3 = read_cr3(); |
313 | cr4 = read_cr4_safe(); | 313 | cr4 = read_cr4_safe(); |
314 | printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); | 314 | printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); |
315 | show_trace(NULL, ®s->esp); | 315 | show_trace(NULL, regs, ®s->esp); |
316 | } | 316 | } |
317 | 317 | ||
318 | /* | 318 | /* |
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 6bef9273733e..4a65040cc624 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c | |||
@@ -1575,6 +1575,7 @@ void __init setup_arch(char **cmdline_p) | |||
1575 | conswitchp = &dummy_con; | 1575 | conswitchp = &dummy_con; |
1576 | #endif | 1576 | #endif |
1577 | #endif | 1577 | #endif |
1578 | tsc_init(); | ||
1578 | } | 1579 | } |
1579 | 1580 | ||
1580 | static __init int add_pcspkr(void) | 1581 | static __init int add_pcspkr(void) |
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index d134e9643a58..c10789d7a9d3 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c | |||
@@ -114,7 +114,17 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_m | |||
114 | 114 | ||
115 | static inline int __prepare_ICR (unsigned int shortcut, int vector) | 115 | static inline int __prepare_ICR (unsigned int shortcut, int vector) |
116 | { | 116 | { |
117 | return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; | 117 | unsigned int icr = shortcut | APIC_DEST_LOGICAL; |
118 | |||
119 | switch (vector) { | ||
120 | default: | ||
121 | icr |= APIC_DM_FIXED | vector; | ||
122 | break; | ||
123 | case NMI_VECTOR: | ||
124 | icr |= APIC_DM_NMI; | ||
125 | break; | ||
126 | } | ||
127 | return icr; | ||
118 | } | 128 | } |
119 | 129 | ||
120 | static inline int __prepare_ICR2 (unsigned int mask) | 130 | static inline int __prepare_ICR2 (unsigned int mask) |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index bd0ca5c9f053..bce5470ecb42 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <asm/tlbflush.h> | 52 | #include <asm/tlbflush.h> |
53 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
54 | #include <asm/arch_hooks.h> | 54 | #include <asm/arch_hooks.h> |
55 | #include <asm/nmi.h> | ||
55 | 56 | ||
56 | #include <mach_apic.h> | 57 | #include <mach_apic.h> |
57 | #include <mach_wakecpu.h> | 58 | #include <mach_wakecpu.h> |
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 9d3074759856..5f43d0410122 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c | |||
@@ -82,13 +82,6 @@ extern unsigned long wall_jiffies; | |||
82 | DEFINE_SPINLOCK(rtc_lock); | 82 | DEFINE_SPINLOCK(rtc_lock); |
83 | EXPORT_SYMBOL(rtc_lock); | 83 | EXPORT_SYMBOL(rtc_lock); |
84 | 84 | ||
85 | #include <asm/i8253.h> | ||
86 | |||
87 | DEFINE_SPINLOCK(i8253_lock); | ||
88 | EXPORT_SYMBOL(i8253_lock); | ||
89 | |||
90 | struct timer_opts *cur_timer __read_mostly = &timer_none; | ||
91 | |||
92 | /* | 85 | /* |
93 | * This is a special lock that is owned by the CPU and holds the index | 86 | * This is a special lock that is owned by the CPU and holds the index |
94 | * register we are working with. It is required for NMI access to the | 87 | * register we are working with. It is required for NMI access to the |
@@ -118,99 +111,19 @@ void rtc_cmos_write(unsigned char val, unsigned char addr) | |||
118 | } | 111 | } |
119 | EXPORT_SYMBOL(rtc_cmos_write); | 112 | EXPORT_SYMBOL(rtc_cmos_write); |
120 | 113 | ||
121 | /* | ||
122 | * This version of gettimeofday has microsecond resolution | ||
123 | * and better than microsecond precision on fast x86 machines with TSC. | ||
124 | */ | ||
125 | void do_gettimeofday(struct timeval *tv) | ||
126 | { | ||
127 | unsigned long seq; | ||
128 | unsigned long usec, sec; | ||
129 | unsigned long max_ntp_tick; | ||
130 | |||
131 | do { | ||
132 | unsigned long lost; | ||
133 | |||
134 | seq = read_seqbegin(&xtime_lock); | ||
135 | |||
136 | usec = cur_timer->get_offset(); | ||
137 | lost = jiffies - wall_jiffies; | ||
138 | |||
139 | /* | ||
140 | * If time_adjust is negative then NTP is slowing the clock | ||
141 | * so make sure not to go into next possible interval. | ||
142 | * Better to lose some accuracy than have time go backwards.. | ||
143 | */ | ||
144 | if (unlikely(time_adjust < 0)) { | ||
145 | max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj; | ||
146 | usec = min(usec, max_ntp_tick); | ||
147 | |||
148 | if (lost) | ||
149 | usec += lost * max_ntp_tick; | ||
150 | } | ||
151 | else if (unlikely(lost)) | ||
152 | usec += lost * (USEC_PER_SEC / HZ); | ||
153 | |||
154 | sec = xtime.tv_sec; | ||
155 | usec += (xtime.tv_nsec / 1000); | ||
156 | } while (read_seqretry(&xtime_lock, seq)); | ||
157 | |||
158 | while (usec >= 1000000) { | ||
159 | usec -= 1000000; | ||
160 | sec++; | ||
161 | } | ||
162 | |||
163 | tv->tv_sec = sec; | ||
164 | tv->tv_usec = usec; | ||
165 | } | ||
166 | |||
167 | EXPORT_SYMBOL(do_gettimeofday); | ||
168 | |||
169 | int do_settimeofday(struct timespec *tv) | ||
170 | { | ||
171 | time_t wtm_sec, sec = tv->tv_sec; | ||
172 | long wtm_nsec, nsec = tv->tv_nsec; | ||
173 | |||
174 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | ||
175 | return -EINVAL; | ||
176 | |||
177 | write_seqlock_irq(&xtime_lock); | ||
178 | /* | ||
179 | * This is revolting. We need to set "xtime" correctly. However, the | ||
180 | * value in this location is the value at the most recent update of | ||
181 | * wall time. Discover what correction gettimeofday() would have | ||
182 | * made, and then undo it! | ||
183 | */ | ||
184 | nsec -= cur_timer->get_offset() * NSEC_PER_USEC; | ||
185 | nsec -= (jiffies - wall_jiffies) * TICK_NSEC; | ||
186 | |||
187 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | ||
188 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | ||
189 | |||
190 | set_normalized_timespec(&xtime, sec, nsec); | ||
191 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | ||
192 | |||
193 | ntp_clear(); | ||
194 | write_sequnlock_irq(&xtime_lock); | ||
195 | clock_was_set(); | ||
196 | return 0; | ||
197 | } | ||
198 | |||
199 | EXPORT_SYMBOL(do_settimeofday); | ||
200 | |||
201 | static int set_rtc_mmss(unsigned long nowtime) | 114 | static int set_rtc_mmss(unsigned long nowtime) |
202 | { | 115 | { |
203 | int retval; | 116 | int retval; |
204 | 117 | unsigned long flags; | |
205 | WARN_ON(irqs_disabled()); | ||
206 | 118 | ||
207 | /* gets recalled with irq locally disabled */ | 119 | /* gets recalled with irq locally disabled */ |
208 | spin_lock_irq(&rtc_lock); | 120 | /* XXX - does irqsave resolve this? -johnstul */ |
121 | spin_lock_irqsave(&rtc_lock, flags); | ||
209 | if (efi_enabled) | 122 | if (efi_enabled) |
210 | retval = efi_set_rtc_mmss(nowtime); | 123 | retval = efi_set_rtc_mmss(nowtime); |
211 | else | 124 | else |
212 | retval = mach_set_rtc_mmss(nowtime); | 125 | retval = mach_set_rtc_mmss(nowtime); |
213 | spin_unlock_irq(&rtc_lock); | 126 | spin_unlock_irqrestore(&rtc_lock, flags); |
214 | 127 | ||
215 | return retval; | 128 | return retval; |
216 | } | 129 | } |
@@ -218,16 +131,6 @@ static int set_rtc_mmss(unsigned long nowtime) | |||
218 | 131 | ||
219 | int timer_ack; | 132 | int timer_ack; |
220 | 133 | ||
221 | /* monotonic_clock(): returns # of nanoseconds passed since time_init() | ||
222 | * Note: This function is required to return accurate | ||
223 | * time even in the absence of multiple timer ticks. | ||
224 | */ | ||
225 | unsigned long long monotonic_clock(void) | ||
226 | { | ||
227 | return cur_timer->monotonic_clock(); | ||
228 | } | ||
229 | EXPORT_SYMBOL(monotonic_clock); | ||
230 | |||
231 | #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) | 134 | #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) |
232 | unsigned long profile_pc(struct pt_regs *regs) | 135 | unsigned long profile_pc(struct pt_regs *regs) |
233 | { | 136 | { |
@@ -242,11 +145,21 @@ EXPORT_SYMBOL(profile_pc); | |||
242 | #endif | 145 | #endif |
243 | 146 | ||
244 | /* | 147 | /* |
245 | * timer_interrupt() needs to keep up the real-time clock, | 148 | * This is the same as the above, except we _also_ save the current |
246 | * as well as call the "do_timer()" routine every clocktick | 149 | * Time Stamp Counter value at the time of the timer interrupt, so that |
150 | * we later on can estimate the time of day more exactly. | ||
247 | */ | 151 | */ |
248 | static inline void do_timer_interrupt(int irq, struct pt_regs *regs) | 152 | irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) |
249 | { | 153 | { |
154 | /* | ||
155 | * Here we are in the timer irq handler. We just have irqs locally | ||
156 | * disabled but we don't know if the timer_bh is running on the other | ||
157 | * CPU. We need to avoid to SMP race with it. NOTE: we don' t need | ||
158 | * the irq version of write_lock because as just said we have irq | ||
159 | * locally disabled. -arca | ||
160 | */ | ||
161 | write_seqlock(&xtime_lock); | ||
162 | |||
250 | #ifdef CONFIG_X86_IO_APIC | 163 | #ifdef CONFIG_X86_IO_APIC |
251 | if (timer_ack) { | 164 | if (timer_ack) { |
252 | /* | 165 | /* |
@@ -279,27 +192,6 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs) | |||
279 | irq = inb_p( 0x61 ); /* read the current state */ | 192 | irq = inb_p( 0x61 ); /* read the current state */ |
280 | outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ | 193 | outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ |
281 | } | 194 | } |
282 | } | ||
283 | |||
284 | /* | ||
285 | * This is the same as the above, except we _also_ save the current | ||
286 | * Time Stamp Counter value at the time of the timer interrupt, so that | ||
287 | * we later on can estimate the time of day more exactly. | ||
288 | */ | ||
289 | irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | ||
290 | { | ||
291 | /* | ||
292 | * Here we are in the timer irq handler. We just have irqs locally | ||
293 | * disabled but we don't know if the timer_bh is running on the other | ||
294 | * CPU. We need to avoid to SMP race with it. NOTE: we don' t need | ||
295 | * the irq version of write_lock because as just said we have irq | ||
296 | * locally disabled. -arca | ||
297 | */ | ||
298 | write_seqlock(&xtime_lock); | ||
299 | |||
300 | cur_timer->mark_offset(); | ||
301 | |||
302 | do_timer_interrupt(irq, regs); | ||
303 | 195 | ||
304 | write_sequnlock(&xtime_lock); | 196 | write_sequnlock(&xtime_lock); |
305 | 197 | ||
@@ -380,7 +272,6 @@ void notify_arch_cmos_timer(void) | |||
380 | 272 | ||
381 | static long clock_cmos_diff, sleep_start; | 273 | static long clock_cmos_diff, sleep_start; |
382 | 274 | ||
383 | static struct timer_opts *last_timer; | ||
384 | static int timer_suspend(struct sys_device *dev, pm_message_t state) | 275 | static int timer_suspend(struct sys_device *dev, pm_message_t state) |
385 | { | 276 | { |
386 | /* | 277 | /* |
@@ -389,10 +280,6 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state) | |||
389 | clock_cmos_diff = -get_cmos_time(); | 280 | clock_cmos_diff = -get_cmos_time(); |
390 | clock_cmos_diff += get_seconds(); | 281 | clock_cmos_diff += get_seconds(); |
391 | sleep_start = get_cmos_time(); | 282 | sleep_start = get_cmos_time(); |
392 | last_timer = cur_timer; | ||
393 | cur_timer = &timer_none; | ||
394 | if (last_timer->suspend) | ||
395 | last_timer->suspend(state); | ||
396 | return 0; | 283 | return 0; |
397 | } | 284 | } |
398 | 285 | ||
@@ -415,10 +302,6 @@ static int timer_resume(struct sys_device *dev) | |||
415 | jiffies_64 += sleep_length; | 302 | jiffies_64 += sleep_length; |
416 | wall_jiffies += sleep_length; | 303 | wall_jiffies += sleep_length; |
417 | write_sequnlock_irqrestore(&xtime_lock, flags); | 304 | write_sequnlock_irqrestore(&xtime_lock, flags); |
418 | if (last_timer->resume) | ||
419 | last_timer->resume(); | ||
420 | cur_timer = last_timer; | ||
421 | last_timer = NULL; | ||
422 | touch_softlockup_watchdog(); | 305 | touch_softlockup_watchdog(); |
423 | return 0; | 306 | return 0; |
424 | } | 307 | } |
@@ -460,9 +343,6 @@ static void __init hpet_time_init(void) | |||
460 | printk("Using HPET for base-timer\n"); | 343 | printk("Using HPET for base-timer\n"); |
461 | } | 344 | } |
462 | 345 | ||
463 | cur_timer = select_timer(); | ||
464 | printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); | ||
465 | |||
466 | time_init_hook(); | 346 | time_init_hook(); |
467 | } | 347 | } |
468 | #endif | 348 | #endif |
@@ -484,8 +364,5 @@ void __init time_init(void) | |||
484 | set_normalized_timespec(&wall_to_monotonic, | 364 | set_normalized_timespec(&wall_to_monotonic, |
485 | -xtime.tv_sec, -xtime.tv_nsec); | 365 | -xtime.tv_sec, -xtime.tv_nsec); |
486 | 366 | ||
487 | cur_timer = select_timer(); | ||
488 | printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); | ||
489 | |||
490 | time_init_hook(); | 367 | time_init_hook(); |
491 | } | 368 | } |
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile deleted file mode 100644 index 8fa12be658dd..000000000000 --- a/arch/i386/kernel/timers/Makefile +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | # | ||
2 | # Makefile for x86 timers | ||
3 | # | ||
4 | |||
5 | obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o | ||
6 | |||
7 | obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o | ||
8 | obj-$(CONFIG_HPET_TIMER) += timer_hpet.o | ||
9 | obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o | ||
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c deleted file mode 100644 index 8163fe0cf1f0..000000000000 --- a/arch/i386/kernel/timers/common.c +++ /dev/null | |||
@@ -1,172 +0,0 @@ | |||
1 | /* | ||
2 | * Common functions used across the timers go here | ||
3 | */ | ||
4 | |||
5 | #include <linux/init.h> | ||
6 | #include <linux/timex.h> | ||
7 | #include <linux/errno.h> | ||
8 | #include <linux/jiffies.h> | ||
9 | #include <linux/module.h> | ||
10 | |||
11 | #include <asm/io.h> | ||
12 | #include <asm/timer.h> | ||
13 | #include <asm/hpet.h> | ||
14 | |||
15 | #include "mach_timer.h" | ||
16 | |||
17 | /* ------ Calibrate the TSC ------- | ||
18 | * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). | ||
19 | * Too much 64-bit arithmetic here to do this cleanly in C, and for | ||
20 | * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2) | ||
21 | * output busy loop as low as possible. We avoid reading the CTC registers | ||
22 | * directly because of the awkward 8-bit access mechanism of the 82C54 | ||
23 | * device. | ||
24 | */ | ||
25 | |||
26 | #define CALIBRATE_TIME (5 * 1000020/HZ) | ||
27 | |||
28 | unsigned long calibrate_tsc(void) | ||
29 | { | ||
30 | mach_prepare_counter(); | ||
31 | |||
32 | { | ||
33 | unsigned long startlow, starthigh; | ||
34 | unsigned long endlow, endhigh; | ||
35 | unsigned long count; | ||
36 | |||
37 | rdtsc(startlow,starthigh); | ||
38 | mach_countup(&count); | ||
39 | rdtsc(endlow,endhigh); | ||
40 | |||
41 | |||
42 | /* Error: ECTCNEVERSET */ | ||
43 | if (count <= 1) | ||
44 | goto bad_ctc; | ||
45 | |||
46 | /* 64-bit subtract - gcc just messes up with long longs */ | ||
47 | __asm__("subl %2,%0\n\t" | ||
48 | "sbbl %3,%1" | ||
49 | :"=a" (endlow), "=d" (endhigh) | ||
50 | :"g" (startlow), "g" (starthigh), | ||
51 | "0" (endlow), "1" (endhigh)); | ||
52 | |||
53 | /* Error: ECPUTOOFAST */ | ||
54 | if (endhigh) | ||
55 | goto bad_ctc; | ||
56 | |||
57 | /* Error: ECPUTOOSLOW */ | ||
58 | if (endlow <= CALIBRATE_TIME) | ||
59 | goto bad_ctc; | ||
60 | |||
61 | __asm__("divl %2" | ||
62 | :"=a" (endlow), "=d" (endhigh) | ||
63 | :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME)); | ||
64 | |||
65 | return endlow; | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * The CTC wasn't reliable: we got a hit on the very first read, | ||
70 | * or the CPU was so fast/slow that the quotient wouldn't fit in | ||
71 | * 32 bits.. | ||
72 | */ | ||
73 | bad_ctc: | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | #ifdef CONFIG_HPET_TIMER | ||
78 | /* ------ Calibrate the TSC using HPET ------- | ||
79 | * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq. | ||
80 | * Second output is parameter 1 (when non NULL) | ||
81 | * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet(). | ||
82 | * calibrate_tsc() calibrates the processor TSC by comparing | ||
83 | * it to the HPET timer of known frequency. | ||
84 | * Too much 64-bit arithmetic here to do this cleanly in C | ||
85 | */ | ||
86 | #define CALIBRATE_CNT_HPET (5 * hpet_tick) | ||
87 | #define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC) | ||
88 | |||
89 | unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) | ||
90 | { | ||
91 | unsigned long tsc_startlow, tsc_starthigh; | ||
92 | unsigned long tsc_endlow, tsc_endhigh; | ||
93 | unsigned long hpet_start, hpet_end; | ||
94 | unsigned long result, remain; | ||
95 | |||
96 | hpet_start = hpet_readl(HPET_COUNTER); | ||
97 | rdtsc(tsc_startlow, tsc_starthigh); | ||
98 | do { | ||
99 | hpet_end = hpet_readl(HPET_COUNTER); | ||
100 | } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET); | ||
101 | rdtsc(tsc_endlow, tsc_endhigh); | ||
102 | |||
103 | /* 64-bit subtract - gcc just messes up with long longs */ | ||
104 | __asm__("subl %2,%0\n\t" | ||
105 | "sbbl %3,%1" | ||
106 | :"=a" (tsc_endlow), "=d" (tsc_endhigh) | ||
107 | :"g" (tsc_startlow), "g" (tsc_starthigh), | ||
108 | "0" (tsc_endlow), "1" (tsc_endhigh)); | ||
109 | |||
110 | /* Error: ECPUTOOFAST */ | ||
111 | if (tsc_endhigh) | ||
112 | goto bad_calibration; | ||
113 | |||
114 | /* Error: ECPUTOOSLOW */ | ||
115 | if (tsc_endlow <= CALIBRATE_TIME_HPET) | ||
116 | goto bad_calibration; | ||
117 | |||
118 | ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET); | ||
119 | if (remain > (tsc_endlow >> 1)) | ||
120 | result++; /* rounding the result */ | ||
121 | |||
122 | if (tsc_hpet_quotient_ptr) { | ||
123 | unsigned long tsc_hpet_quotient; | ||
124 | |||
125 | ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0, | ||
126 | CALIBRATE_CNT_HPET); | ||
127 | if (remain > (tsc_endlow >> 1)) | ||
128 | tsc_hpet_quotient++; /* rounding the result */ | ||
129 | *tsc_hpet_quotient_ptr = tsc_hpet_quotient; | ||
130 | } | ||
131 | |||
132 | return result; | ||
133 | bad_calibration: | ||
134 | /* | ||
135 | * the CPU was so fast/slow that the quotient wouldn't fit in | ||
136 | * 32 bits.. | ||
137 | */ | ||
138 | return 0; | ||
139 | } | ||
140 | #endif | ||
141 | |||
142 | |||
143 | unsigned long read_timer_tsc(void) | ||
144 | { | ||
145 | unsigned long retval; | ||
146 | rdtscl(retval); | ||
147 | return retval; | ||
148 | } | ||
149 | |||
150 | |||
151 | /* calculate cpu_khz */ | ||
152 | void init_cpu_khz(void) | ||
153 | { | ||
154 | if (cpu_has_tsc) { | ||
155 | unsigned long tsc_quotient = calibrate_tsc(); | ||
156 | if (tsc_quotient) { | ||
157 | /* report CPU clock rate in Hz. | ||
158 | * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = | ||
159 | * clock/second. Our precision is about 100 ppm. | ||
160 | */ | ||
161 | { unsigned long eax=0, edx=1000; | ||
162 | __asm__("divl %2" | ||
163 | :"=a" (cpu_khz), "=d" (edx) | ||
164 | :"r" (tsc_quotient), | ||
165 | "0" (eax), "1" (edx)); | ||
166 | printk("Detected %u.%03u MHz processor.\n", | ||
167 | cpu_khz / 1000, cpu_khz % 1000); | ||
168 | } | ||
169 | } | ||
170 | } | ||
171 | } | ||
172 | |||
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c deleted file mode 100644 index 7e39ed8e33f8..000000000000 --- a/arch/i386/kernel/timers/timer.c +++ /dev/null | |||
@@ -1,75 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/string.h> | ||
4 | #include <asm/timer.h> | ||
5 | |||
6 | #ifdef CONFIG_HPET_TIMER | ||
7 | /* | ||
8 | * HPET memory read is slower than tsc reads, but is more dependable as it | ||
9 | * always runs at constant frequency and reduces complexity due to | ||
10 | * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use | ||
11 | * timer_pit when HPET is active. So, we default to timer_tsc. | ||
12 | */ | ||
13 | #endif | ||
14 | /* list of timers, ordered by preference, NULL terminated */ | ||
15 | static struct init_timer_opts* __initdata timers[] = { | ||
16 | #ifdef CONFIG_X86_CYCLONE_TIMER | ||
17 | &timer_cyclone_init, | ||
18 | #endif | ||
19 | #ifdef CONFIG_HPET_TIMER | ||
20 | &timer_hpet_init, | ||
21 | #endif | ||
22 | #ifdef CONFIG_X86_PM_TIMER | ||
23 | &timer_pmtmr_init, | ||
24 | #endif | ||
25 | &timer_tsc_init, | ||
26 | &timer_pit_init, | ||
27 | NULL, | ||
28 | }; | ||
29 | |||
30 | static char clock_override[10] __initdata; | ||
31 | |||
32 | static int __init clock_setup(char* str) | ||
33 | { | ||
34 | if (str) | ||
35 | strlcpy(clock_override, str, sizeof(clock_override)); | ||
36 | return 1; | ||
37 | } | ||
38 | __setup("clock=", clock_setup); | ||
39 | |||
40 | |||
41 | /* The chosen timesource has been found to be bad. | ||
42 | * Fall back to a known good timesource (the PIT) | ||
43 | */ | ||
44 | void clock_fallback(void) | ||
45 | { | ||
46 | cur_timer = &timer_pit; | ||
47 | } | ||
48 | |||
49 | /* iterates through the list of timers, returning the first | ||
50 | * one that initializes successfully. | ||
51 | */ | ||
52 | struct timer_opts* __init select_timer(void) | ||
53 | { | ||
54 | int i = 0; | ||
55 | |||
56 | /* find most preferred working timer */ | ||
57 | while (timers[i]) { | ||
58 | if (timers[i]->init) | ||
59 | if (timers[i]->init(clock_override) == 0) | ||
60 | return timers[i]->opts; | ||
61 | ++i; | ||
62 | } | ||
63 | |||
64 | panic("select_timer: Cannot find a suitable timer\n"); | ||
65 | return NULL; | ||
66 | } | ||
67 | |||
68 | int read_current_timer(unsigned long *timer_val) | ||
69 | { | ||
70 | if (cur_timer->read_timer) { | ||
71 | *timer_val = cur_timer->read_timer(); | ||
72 | return 0; | ||
73 | } | ||
74 | return -1; | ||
75 | } | ||
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c deleted file mode 100644 index 13892a65c941..000000000000 --- a/arch/i386/kernel/timers/timer_cyclone.c +++ /dev/null | |||
@@ -1,259 +0,0 @@ | |||
1 | /* Cyclone-timer: | ||
2 | * This code implements timer_ops for the cyclone counter found | ||
3 | * on IBM x440, x360, and other Summit based systems. | ||
4 | * | ||
5 | * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com) | ||
6 | */ | ||
7 | |||
8 | |||
9 | #include <linux/spinlock.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/timex.h> | ||
12 | #include <linux/errno.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <linux/jiffies.h> | ||
15 | |||
16 | #include <asm/timer.h> | ||
17 | #include <asm/io.h> | ||
18 | #include <asm/pgtable.h> | ||
19 | #include <asm/fixmap.h> | ||
20 | #include <asm/i8253.h> | ||
21 | |||
22 | #include "io_ports.h" | ||
23 | |||
24 | /* Number of usecs that the last interrupt was delayed */ | ||
25 | static int delay_at_last_interrupt; | ||
26 | |||
27 | #define CYCLONE_CBAR_ADDR 0xFEB00CD0 | ||
28 | #define CYCLONE_PMCC_OFFSET 0x51A0 | ||
29 | #define CYCLONE_MPMC_OFFSET 0x51D0 | ||
30 | #define CYCLONE_MPCS_OFFSET 0x51A8 | ||
31 | #define CYCLONE_TIMER_FREQ 100000000 | ||
32 | #define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */ | ||
33 | int use_cyclone = 0; | ||
34 | |||
35 | static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */ | ||
36 | static u32 last_cyclone_low; | ||
37 | static u32 last_cyclone_high; | ||
38 | static unsigned long long monotonic_base; | ||
39 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
40 | |||
41 | /* helper macro to atomically read both cyclone counter registers */ | ||
42 | #define read_cyclone_counter(low,high) \ | ||
43 | do{ \ | ||
44 | high = cyclone_timer[1]; low = cyclone_timer[0]; \ | ||
45 | } while (high != cyclone_timer[1]); | ||
46 | |||
47 | |||
48 | static void mark_offset_cyclone(void) | ||
49 | { | ||
50 | unsigned long lost, delay; | ||
51 | unsigned long delta = last_cyclone_low; | ||
52 | int count; | ||
53 | unsigned long long this_offset, last_offset; | ||
54 | |||
55 | write_seqlock(&monotonic_lock); | ||
56 | last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; | ||
57 | |||
58 | spin_lock(&i8253_lock); | ||
59 | read_cyclone_counter(last_cyclone_low,last_cyclone_high); | ||
60 | |||
61 | /* read values for delay_at_last_interrupt */ | ||
62 | outb_p(0x00, 0x43); /* latch the count ASAP */ | ||
63 | |||
64 | count = inb_p(0x40); /* read the latched count */ | ||
65 | count |= inb(0x40) << 8; | ||
66 | |||
67 | /* | ||
68 | * VIA686a test code... reset the latch if count > max + 1 | ||
69 | * from timer_pit.c - cjb | ||
70 | */ | ||
71 | if (count > LATCH) { | ||
72 | outb_p(0x34, PIT_MODE); | ||
73 | outb_p(LATCH & 0xff, PIT_CH0); | ||
74 | outb(LATCH >> 8, PIT_CH0); | ||
75 | count = LATCH - 1; | ||
76 | } | ||
77 | spin_unlock(&i8253_lock); | ||
78 | |||
79 | /* lost tick compensation */ | ||
80 | delta = last_cyclone_low - delta; | ||
81 | delta /= (CYCLONE_TIMER_FREQ/1000000); | ||
82 | delta += delay_at_last_interrupt; | ||
83 | lost = delta/(1000000/HZ); | ||
84 | delay = delta%(1000000/HZ); | ||
85 | if (lost >= 2) | ||
86 | jiffies_64 += lost-1; | ||
87 | |||
88 | /* update the monotonic base value */ | ||
89 | this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; | ||
90 | monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK; | ||
91 | write_sequnlock(&monotonic_lock); | ||
92 | |||
93 | /* calculate delay_at_last_interrupt */ | ||
94 | count = ((LATCH-1) - count) * TICK_SIZE; | ||
95 | delay_at_last_interrupt = (count + LATCH/2) / LATCH; | ||
96 | |||
97 | |||
98 | /* catch corner case where tick rollover occured | ||
99 | * between cyclone and pit reads (as noted when | ||
100 | * usec delta is > 90% # of usecs/tick) | ||
101 | */ | ||
102 | if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) | ||
103 | jiffies_64++; | ||
104 | } | ||
105 | |||
106 | static unsigned long get_offset_cyclone(void) | ||
107 | { | ||
108 | u32 offset; | ||
109 | |||
110 | if(!cyclone_timer) | ||
111 | return delay_at_last_interrupt; | ||
112 | |||
113 | /* Read the cyclone timer */ | ||
114 | offset = cyclone_timer[0]; | ||
115 | |||
116 | /* .. relative to previous jiffy */ | ||
117 | offset = offset - last_cyclone_low; | ||
118 | |||
119 | /* convert cyclone ticks to microseconds */ | ||
120 | /* XXX slow, can we speed this up? */ | ||
121 | offset = offset/(CYCLONE_TIMER_FREQ/1000000); | ||
122 | |||
123 | /* our adjusted time offset in microseconds */ | ||
124 | return delay_at_last_interrupt + offset; | ||
125 | } | ||
126 | |||
127 | static unsigned long long monotonic_clock_cyclone(void) | ||
128 | { | ||
129 | u32 now_low, now_high; | ||
130 | unsigned long long last_offset, this_offset, base; | ||
131 | unsigned long long ret; | ||
132 | unsigned seq; | ||
133 | |||
134 | /* atomically read monotonic base & last_offset */ | ||
135 | do { | ||
136 | seq = read_seqbegin(&monotonic_lock); | ||
137 | last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; | ||
138 | base = monotonic_base; | ||
139 | } while (read_seqretry(&monotonic_lock, seq)); | ||
140 | |||
141 | |||
142 | /* Read the cyclone counter */ | ||
143 | read_cyclone_counter(now_low,now_high); | ||
144 | this_offset = ((unsigned long long)now_high<<32)|now_low; | ||
145 | |||
146 | /* convert to nanoseconds */ | ||
147 | ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK); | ||
148 | return ret * (1000000000 / CYCLONE_TIMER_FREQ); | ||
149 | } | ||
150 | |||
151 | static int __init init_cyclone(char* override) | ||
152 | { | ||
153 | u32* reg; | ||
154 | u32 base; /* saved cyclone base address */ | ||
155 | u32 pageaddr; /* page that contains cyclone_timer register */ | ||
156 | u32 offset; /* offset from pageaddr to cyclone_timer register */ | ||
157 | int i; | ||
158 | |||
159 | /* check clock override */ | ||
160 | if (override[0] && strncmp(override,"cyclone",7)) | ||
161 | return -ENODEV; | ||
162 | |||
163 | /*make sure we're on a summit box*/ | ||
164 | if(!use_cyclone) return -ENODEV; | ||
165 | |||
166 | printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); | ||
167 | |||
168 | /* find base address */ | ||
169 | pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK; | ||
170 | offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK); | ||
171 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
172 | reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
173 | if(!reg){ | ||
174 | printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n"); | ||
175 | return -ENODEV; | ||
176 | } | ||
177 | base = *reg; | ||
178 | if(!base){ | ||
179 | printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n"); | ||
180 | return -ENODEV; | ||
181 | } | ||
182 | |||
183 | /* setup PMCC */ | ||
184 | pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK; | ||
185 | offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK); | ||
186 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
187 | reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
188 | if(!reg){ | ||
189 | printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n"); | ||
190 | return -ENODEV; | ||
191 | } | ||
192 | reg[0] = 0x00000001; | ||
193 | |||
194 | /* setup MPCS */ | ||
195 | pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK; | ||
196 | offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK); | ||
197 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
198 | reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
199 | if(!reg){ | ||
200 | printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n"); | ||
201 | return -ENODEV; | ||
202 | } | ||
203 | reg[0] = 0x00000001; | ||
204 | |||
205 | /* map in cyclone_timer */ | ||
206 | pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK; | ||
207 | offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK); | ||
208 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
209 | cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
210 | if(!cyclone_timer){ | ||
211 | printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n"); | ||
212 | return -ENODEV; | ||
213 | } | ||
214 | |||
215 | /*quick test to make sure its ticking*/ | ||
216 | for(i=0; i<3; i++){ | ||
217 | u32 old = cyclone_timer[0]; | ||
218 | int stall = 100; | ||
219 | while(stall--) barrier(); | ||
220 | if(cyclone_timer[0] == old){ | ||
221 | printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n"); | ||
222 | cyclone_timer = 0; | ||
223 | return -ENODEV; | ||
224 | } | ||
225 | } | ||
226 | |||
227 | init_cpu_khz(); | ||
228 | |||
229 | /* Everything looks good! */ | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | |||
234 | static void delay_cyclone(unsigned long loops) | ||
235 | { | ||
236 | unsigned long bclock, now; | ||
237 | if(!cyclone_timer) | ||
238 | return; | ||
239 | bclock = cyclone_timer[0]; | ||
240 | do { | ||
241 | rep_nop(); | ||
242 | now = cyclone_timer[0]; | ||
243 | } while ((now-bclock) < loops); | ||
244 | } | ||
245 | /************************************************************/ | ||
246 | |||
247 | /* cyclone timer_opts struct */ | ||
248 | static struct timer_opts timer_cyclone = { | ||
249 | .name = "cyclone", | ||
250 | .mark_offset = mark_offset_cyclone, | ||
251 | .get_offset = get_offset_cyclone, | ||
252 | .monotonic_clock = monotonic_clock_cyclone, | ||
253 | .delay = delay_cyclone, | ||
254 | }; | ||
255 | |||
256 | struct init_timer_opts __initdata timer_cyclone_init = { | ||
257 | .init = init_cyclone, | ||
258 | .opts = &timer_cyclone, | ||
259 | }; | ||
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c deleted file mode 100644 index 17a6fe7166e7..000000000000 --- a/arch/i386/kernel/timers/timer_hpet.c +++ /dev/null | |||
@@ -1,217 +0,0 @@ | |||
1 | /* | ||
2 | * This code largely moved from arch/i386/kernel/time.c. | ||
3 | * See comments there for proper credits. | ||
4 | */ | ||
5 | |||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/timex.h> | ||
9 | #include <linux/errno.h> | ||
10 | #include <linux/string.h> | ||
11 | #include <linux/jiffies.h> | ||
12 | |||
13 | #include <asm/timer.h> | ||
14 | #include <asm/io.h> | ||
15 | #include <asm/processor.h> | ||
16 | |||
17 | #include "io_ports.h" | ||
18 | #include "mach_timer.h" | ||
19 | #include <asm/hpet.h> | ||
20 | |||
21 | static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */ | ||
22 | static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */ | ||
23 | static unsigned long hpet_last; /* hpet counter value at last tick*/ | ||
24 | static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ | ||
25 | static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ | ||
26 | static unsigned long long monotonic_base; | ||
27 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
28 | |||
29 | /* convert from cycles(64bits) => nanoseconds (64bits) | ||
30 | * basic equation: | ||
31 | * ns = cycles / (freq / ns_per_sec) | ||
32 | * ns = cycles * (ns_per_sec / freq) | ||
33 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
34 | * ns = cycles * (10^6 / cpu_khz) | ||
35 | * | ||
36 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
37 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
38 | * ns = cycles * cyc2ns_scale / SC | ||
39 | * | ||
40 | * And since SC is a constant power of two, we can convert the div | ||
41 | * into a shift. | ||
42 | * | ||
43 | * We can use khz divisor instead of mhz to keep a better percision, since | ||
44 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
45 | * (mathieu.desnoyers@polymtl.ca) | ||
46 | * | ||
47 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
48 | */ | ||
49 | static unsigned long cyc2ns_scale __read_mostly; | ||
50 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
51 | |||
52 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | ||
53 | { | ||
54 | cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; | ||
55 | } | ||
56 | |||
57 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
58 | { | ||
59 | return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; | ||
60 | } | ||
61 | |||
62 | static unsigned long long monotonic_clock_hpet(void) | ||
63 | { | ||
64 | unsigned long long last_offset, this_offset, base; | ||
65 | unsigned seq; | ||
66 | |||
67 | /* atomically read monotonic base & last_offset */ | ||
68 | do { | ||
69 | seq = read_seqbegin(&monotonic_lock); | ||
70 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
71 | base = monotonic_base; | ||
72 | } while (read_seqretry(&monotonic_lock, seq)); | ||
73 | |||
74 | /* Read the Time Stamp Counter */ | ||
75 | rdtscll(this_offset); | ||
76 | |||
77 | /* return the value in ns */ | ||
78 | return base + cycles_2_ns(this_offset - last_offset); | ||
79 | } | ||
80 | |||
81 | static unsigned long get_offset_hpet(void) | ||
82 | { | ||
83 | register unsigned long eax, edx; | ||
84 | |||
85 | eax = hpet_readl(HPET_COUNTER); | ||
86 | eax -= hpet_last; /* hpet delta */ | ||
87 | eax = min(hpet_tick, eax); | ||
88 | /* | ||
89 | * Time offset = (hpet delta) * ( usecs per HPET clock ) | ||
90 | * = (hpet delta) * ( usecs per tick / HPET clocks per tick) | ||
91 | * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) | ||
92 | * | ||
93 | * Where, | ||
94 | * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick | ||
95 | * | ||
96 | * Using a mull instead of a divl saves some cycles in critical path. | ||
97 | */ | ||
98 | ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax); | ||
99 | |||
100 | /* our adjusted time offset in microseconds */ | ||
101 | return edx; | ||
102 | } | ||
103 | |||
104 | static void mark_offset_hpet(void) | ||
105 | { | ||
106 | unsigned long long this_offset, last_offset; | ||
107 | unsigned long offset; | ||
108 | |||
109 | write_seqlock(&monotonic_lock); | ||
110 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
111 | rdtsc(last_tsc_low, last_tsc_high); | ||
112 | |||
113 | if (hpet_use_timer) | ||
114 | offset = hpet_readl(HPET_T0_CMP) - hpet_tick; | ||
115 | else | ||
116 | offset = hpet_readl(HPET_COUNTER); | ||
117 | if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) { | ||
118 | int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1; | ||
119 | jiffies_64 += lost_ticks; | ||
120 | } | ||
121 | hpet_last = offset; | ||
122 | |||
123 | /* update the monotonic base value */ | ||
124 | this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
125 | monotonic_base += cycles_2_ns(this_offset - last_offset); | ||
126 | write_sequnlock(&monotonic_lock); | ||
127 | } | ||
128 | |||
129 | static void delay_hpet(unsigned long loops) | ||
130 | { | ||
131 | unsigned long hpet_start, hpet_end; | ||
132 | unsigned long eax; | ||
133 | |||
134 | /* loops is the number of cpu cycles. Convert it to hpet clocks */ | ||
135 | ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops); | ||
136 | |||
137 | hpet_start = hpet_readl(HPET_COUNTER); | ||
138 | do { | ||
139 | rep_nop(); | ||
140 | hpet_end = hpet_readl(HPET_COUNTER); | ||
141 | } while ((hpet_end - hpet_start) < (loops)); | ||
142 | } | ||
143 | |||
144 | static struct timer_opts timer_hpet; | ||
145 | |||
146 | static int __init init_hpet(char* override) | ||
147 | { | ||
148 | unsigned long result, remain; | ||
149 | |||
150 | /* check clock override */ | ||
151 | if (override[0] && strncmp(override,"hpet",4)) | ||
152 | return -ENODEV; | ||
153 | |||
154 | if (!is_hpet_enabled()) | ||
155 | return -ENODEV; | ||
156 | |||
157 | printk("Using HPET for gettimeofday\n"); | ||
158 | if (cpu_has_tsc) { | ||
159 | unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient); | ||
160 | if (tsc_quotient) { | ||
161 | /* report CPU clock rate in Hz. | ||
162 | * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = | ||
163 | * clock/second. Our precision is about 100 ppm. | ||
164 | */ | ||
165 | { unsigned long eax=0, edx=1000; | ||
166 | ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, | ||
167 | eax, edx); | ||
168 | printk("Detected %u.%03u MHz processor.\n", | ||
169 | cpu_khz / 1000, cpu_khz % 1000); | ||
170 | } | ||
171 | set_cyc2ns_scale(cpu_khz); | ||
172 | } | ||
173 | /* set this only when cpu_has_tsc */ | ||
174 | timer_hpet.read_timer = read_timer_tsc; | ||
175 | } | ||
176 | |||
177 | /* | ||
178 | * Math to calculate hpet to usec multiplier | ||
179 | * Look for the comments at get_offset_hpet() | ||
180 | */ | ||
181 | ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC); | ||
182 | if (remain > (hpet_tick >> 1)) | ||
183 | result++; /* rounding the result */ | ||
184 | hpet_usec_quotient = result; | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | static int hpet_resume(void) | ||
190 | { | ||
191 | write_seqlock(&monotonic_lock); | ||
192 | /* Assume this is the last mark offset time */ | ||
193 | rdtsc(last_tsc_low, last_tsc_high); | ||
194 | |||
195 | if (hpet_use_timer) | ||
196 | hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick; | ||
197 | else | ||
198 | hpet_last = hpet_readl(HPET_COUNTER); | ||
199 | write_sequnlock(&monotonic_lock); | ||
200 | return 0; | ||
201 | } | ||
202 | /************************************************************/ | ||
203 | |||
204 | /* tsc timer_opts struct */ | ||
205 | static struct timer_opts timer_hpet __read_mostly = { | ||
206 | .name = "hpet", | ||
207 | .mark_offset = mark_offset_hpet, | ||
208 | .get_offset = get_offset_hpet, | ||
209 | .monotonic_clock = monotonic_clock_hpet, | ||
210 | .delay = delay_hpet, | ||
211 | .resume = hpet_resume, | ||
212 | }; | ||
213 | |||
214 | struct init_timer_opts __initdata timer_hpet_init = { | ||
215 | .init = init_hpet, | ||
216 | .opts = &timer_hpet, | ||
217 | }; | ||
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c deleted file mode 100644 index 4ea2f414dbbd..000000000000 --- a/arch/i386/kernel/timers/timer_none.c +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <asm/timer.h> | ||
3 | |||
4 | static void mark_offset_none(void) | ||
5 | { | ||
6 | /* nothing needed */ | ||
7 | } | ||
8 | |||
9 | static unsigned long get_offset_none(void) | ||
10 | { | ||
11 | return 0; | ||
12 | } | ||
13 | |||
14 | static unsigned long long monotonic_clock_none(void) | ||
15 | { | ||
16 | return 0; | ||
17 | } | ||
18 | |||
19 | static void delay_none(unsigned long loops) | ||
20 | { | ||
21 | int d0; | ||
22 | __asm__ __volatile__( | ||
23 | "\tjmp 1f\n" | ||
24 | ".align 16\n" | ||
25 | "1:\tjmp 2f\n" | ||
26 | ".align 16\n" | ||
27 | "2:\tdecl %0\n\tjns 2b" | ||
28 | :"=&a" (d0) | ||
29 | :"0" (loops)); | ||
30 | } | ||
31 | |||
32 | /* none timer_opts struct */ | ||
33 | struct timer_opts timer_none = { | ||
34 | .name = "none", | ||
35 | .mark_offset = mark_offset_none, | ||
36 | .get_offset = get_offset_none, | ||
37 | .monotonic_clock = monotonic_clock_none, | ||
38 | .delay = delay_none, | ||
39 | }; | ||
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c deleted file mode 100644 index b9b6bd56b9ba..000000000000 --- a/arch/i386/kernel/timers/timer_pit.c +++ /dev/null | |||
@@ -1,177 +0,0 @@ | |||
1 | /* | ||
2 | * This code largely moved from arch/i386/kernel/time.c. | ||
3 | * See comments there for proper credits. | ||
4 | */ | ||
5 | |||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/module.h> | ||
8 | #include <linux/device.h> | ||
9 | #include <linux/sysdev.h> | ||
10 | #include <linux/timex.h> | ||
11 | #include <asm/delay.h> | ||
12 | #include <asm/mpspec.h> | ||
13 | #include <asm/timer.h> | ||
14 | #include <asm/smp.h> | ||
15 | #include <asm/io.h> | ||
16 | #include <asm/arch_hooks.h> | ||
17 | #include <asm/i8253.h> | ||
18 | |||
19 | #include "do_timer.h" | ||
20 | #include "io_ports.h" | ||
21 | |||
22 | static int count_p; /* counter in get_offset_pit() */ | ||
23 | |||
24 | static int __init init_pit(char* override) | ||
25 | { | ||
26 | /* check clock override */ | ||
27 | if (override[0] && strncmp(override,"pit",3)) | ||
28 | printk(KERN_ERR "Warning: clock= override failed. Defaulting " | ||
29 | "to PIT\n"); | ||
30 | init_cpu_khz(); | ||
31 | count_p = LATCH; | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | static void mark_offset_pit(void) | ||
36 | { | ||
37 | /* nothing needed */ | ||
38 | } | ||
39 | |||
40 | static unsigned long long monotonic_clock_pit(void) | ||
41 | { | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static void delay_pit(unsigned long loops) | ||
46 | { | ||
47 | int d0; | ||
48 | __asm__ __volatile__( | ||
49 | "\tjmp 1f\n" | ||
50 | ".align 16\n" | ||
51 | "1:\tjmp 2f\n" | ||
52 | ".align 16\n" | ||
53 | "2:\tdecl %0\n\tjns 2b" | ||
54 | :"=&a" (d0) | ||
55 | :"0" (loops)); | ||
56 | } | ||
57 | |||
58 | |||
59 | /* This function must be called with xtime_lock held. | ||
60 | * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs | ||
61 | * | ||
62 | * However, the pc-audio speaker driver changes the divisor so that | ||
63 | * it gets interrupted rather more often - it loads 64 into the | ||
64 | * counter rather than 11932! This has an adverse impact on | ||
65 | * do_gettimeoffset() -- it stops working! What is also not | ||
66 | * good is that the interval that our timer function gets called | ||
67 | * is no longer 10.0002 ms, but 9.9767 ms. To get around this | ||
68 | * would require using a different timing source. Maybe someone | ||
69 | * could use the RTC - I know that this can interrupt at frequencies | ||
70 | * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix | ||
71 | * it so that at startup, the timer code in sched.c would select | ||
72 | * using either the RTC or the 8253 timer. The decision would be | ||
73 | * based on whether there was any other device around that needed | ||
74 | * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz, | ||
75 | * and then do some jiggery to have a version of do_timer that | ||
76 | * advanced the clock by 1/1024 s. Every time that reached over 1/100 | ||
77 | * of a second, then do all the old code. If the time was kept correct | ||
78 | * then do_gettimeoffset could just return 0 - there is no low order | ||
79 | * divider that can be accessed. | ||
80 | * | ||
81 | * Ideally, you would be able to use the RTC for the speaker driver, | ||
82 | * but it appears that the speaker driver really needs interrupt more | ||
83 | * often than every 120 us or so. | ||
84 | * | ||
85 | * Anyway, this needs more thought.... pjsg (1993-08-28) | ||
86 | * | ||
87 | * If you are really that interested, you should be reading | ||
88 | * comp.protocols.time.ntp! | ||
89 | */ | ||
90 | |||
91 | static unsigned long get_offset_pit(void) | ||
92 | { | ||
93 | int count; | ||
94 | unsigned long flags; | ||
95 | static unsigned long jiffies_p = 0; | ||
96 | |||
97 | /* | ||
98 | * cache volatile jiffies temporarily; we have xtime_lock. | ||
99 | */ | ||
100 | unsigned long jiffies_t; | ||
101 | |||
102 | spin_lock_irqsave(&i8253_lock, flags); | ||
103 | /* timer count may underflow right here */ | ||
104 | outb_p(0x00, PIT_MODE); /* latch the count ASAP */ | ||
105 | |||
106 | count = inb_p(PIT_CH0); /* read the latched count */ | ||
107 | |||
108 | /* | ||
109 | * We do this guaranteed double memory access instead of a _p | ||
110 | * postfix in the previous port access. Wheee, hackady hack | ||
111 | */ | ||
112 | jiffies_t = jiffies; | ||
113 | |||
114 | count |= inb_p(PIT_CH0) << 8; | ||
115 | |||
116 | /* VIA686a test code... reset the latch if count > max + 1 */ | ||
117 | if (count > LATCH) { | ||
118 | outb_p(0x34, PIT_MODE); | ||
119 | outb_p(LATCH & 0xff, PIT_CH0); | ||
120 | outb(LATCH >> 8, PIT_CH0); | ||
121 | count = LATCH - 1; | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * avoiding timer inconsistencies (they are rare, but they happen)... | ||
126 | * there are two kinds of problems that must be avoided here: | ||
127 | * 1. the timer counter underflows | ||
128 | * 2. hardware problem with the timer, not giving us continuous time, | ||
129 | * the counter does small "jumps" upwards on some Pentium systems, | ||
130 | * (see c't 95/10 page 335 for Neptun bug.) | ||
131 | */ | ||
132 | |||
133 | if( jiffies_t == jiffies_p ) { | ||
134 | if( count > count_p ) { | ||
135 | /* the nutcase */ | ||
136 | count = do_timer_overflow(count); | ||
137 | } | ||
138 | } else | ||
139 | jiffies_p = jiffies_t; | ||
140 | |||
141 | count_p = count; | ||
142 | |||
143 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
144 | |||
145 | count = ((LATCH-1) - count) * TICK_SIZE; | ||
146 | count = (count + LATCH/2) / LATCH; | ||
147 | |||
148 | return count; | ||
149 | } | ||
150 | |||
151 | |||
152 | /* tsc timer_opts struct */ | ||
153 | struct timer_opts timer_pit = { | ||
154 | .name = "pit", | ||
155 | .mark_offset = mark_offset_pit, | ||
156 | .get_offset = get_offset_pit, | ||
157 | .monotonic_clock = monotonic_clock_pit, | ||
158 | .delay = delay_pit, | ||
159 | }; | ||
160 | |||
161 | struct init_timer_opts __initdata timer_pit_init = { | ||
162 | .init = init_pit, | ||
163 | .opts = &timer_pit, | ||
164 | }; | ||
165 | |||
166 | void setup_pit_timer(void) | ||
167 | { | ||
168 | unsigned long flags; | ||
169 | |||
170 | spin_lock_irqsave(&i8253_lock, flags); | ||
171 | outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | ||
172 | udelay(10); | ||
173 | outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ | ||
174 | udelay(10); | ||
175 | outb(LATCH >> 8 , PIT_CH0); /* MSB */ | ||
176 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
177 | } | ||
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c deleted file mode 100644 index 144e94a04933..000000000000 --- a/arch/i386/kernel/timers/timer_pm.c +++ /dev/null | |||
@@ -1,342 +0,0 @@ | |||
1 | /* | ||
2 | * (C) Dominik Brodowski <linux@brodo.de> 2003 | ||
3 | * | ||
4 | * Driver to use the Power Management Timer (PMTMR) available in some | ||
5 | * southbridges as primary timing source for the Linux kernel. | ||
6 | * | ||
7 | * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, | ||
8 | * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. | ||
9 | * | ||
10 | * This file is licensed under the GPL v2. | ||
11 | */ | ||
12 | |||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/device.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/pci.h> | ||
19 | #include <asm/types.h> | ||
20 | #include <asm/timer.h> | ||
21 | #include <asm/smp.h> | ||
22 | #include <asm/io.h> | ||
23 | #include <asm/arch_hooks.h> | ||
24 | |||
25 | #include <linux/timex.h> | ||
26 | #include "mach_timer.h" | ||
27 | |||
28 | /* Number of PMTMR ticks expected during calibration run */ | ||
29 | #define PMTMR_TICKS_PER_SEC 3579545 | ||
30 | #define PMTMR_EXPECTED_RATE \ | ||
31 | ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10)) | ||
32 | |||
33 | |||
34 | /* The I/O port the PMTMR resides at. | ||
35 | * The location is detected during setup_arch(), | ||
36 | * in arch/i386/acpi/boot.c */ | ||
37 | u32 pmtmr_ioport = 0; | ||
38 | |||
39 | |||
40 | /* value of the Power timer at last timer interrupt */ | ||
41 | static u32 offset_tick; | ||
42 | static u32 offset_delay; | ||
43 | |||
44 | static unsigned long long monotonic_base; | ||
45 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
46 | |||
47 | #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ | ||
48 | |||
49 | static int pmtmr_need_workaround __read_mostly = 1; | ||
50 | |||
51 | /*helper function to safely read acpi pm timesource*/ | ||
52 | static inline u32 read_pmtmr(void) | ||
53 | { | ||
54 | if (pmtmr_need_workaround) { | ||
55 | u32 v1, v2, v3; | ||
56 | |||
57 | /* It has been reported that because of various broken | ||
58 | * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time | ||
59 | * source is not latched, so you must read it multiple | ||
60 | * times to insure a safe value is read. | ||
61 | */ | ||
62 | do { | ||
63 | v1 = inl(pmtmr_ioport); | ||
64 | v2 = inl(pmtmr_ioport); | ||
65 | v3 = inl(pmtmr_ioport); | ||
66 | } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) | ||
67 | || (v3 > v1 && v3 < v2)); | ||
68 | |||
69 | /* mask the output to 24 bits */ | ||
70 | return v2 & ACPI_PM_MASK; | ||
71 | } | ||
72 | |||
73 | return inl(pmtmr_ioport) & ACPI_PM_MASK; | ||
74 | } | ||
75 | |||
76 | |||
77 | /* | ||
78 | * Some boards have the PMTMR running way too fast. We check | ||
79 | * the PMTMR rate against PIT channel 2 to catch these cases. | ||
80 | */ | ||
81 | static int verify_pmtmr_rate(void) | ||
82 | { | ||
83 | u32 value1, value2; | ||
84 | unsigned long count, delta; | ||
85 | |||
86 | mach_prepare_counter(); | ||
87 | value1 = read_pmtmr(); | ||
88 | mach_countup(&count); | ||
89 | value2 = read_pmtmr(); | ||
90 | delta = (value2 - value1) & ACPI_PM_MASK; | ||
91 | |||
92 | /* Check that the PMTMR delta is within 5% of what we expect */ | ||
93 | if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 || | ||
94 | delta > (PMTMR_EXPECTED_RATE * 21) / 20) { | ||
95 | printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE); | ||
96 | return -1; | ||
97 | } | ||
98 | |||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | |||
103 | static int init_pmtmr(char* override) | ||
104 | { | ||
105 | u32 value1, value2; | ||
106 | unsigned int i; | ||
107 | |||
108 | if (override[0] && strncmp(override,"pmtmr",5)) | ||
109 | return -ENODEV; | ||
110 | |||
111 | if (!pmtmr_ioport) | ||
112 | return -ENODEV; | ||
113 | |||
114 | /* we use the TSC for delay_pmtmr, so make sure it exists */ | ||
115 | if (!cpu_has_tsc) | ||
116 | return -ENODEV; | ||
117 | |||
118 | /* "verify" this timing source */ | ||
119 | value1 = read_pmtmr(); | ||
120 | for (i = 0; i < 10000; i++) { | ||
121 | value2 = read_pmtmr(); | ||
122 | if (value2 == value1) | ||
123 | continue; | ||
124 | if (value2 > value1) | ||
125 | goto pm_good; | ||
126 | if ((value2 < value1) && ((value2) < 0xFFF)) | ||
127 | goto pm_good; | ||
128 | printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); | ||
129 | return -EINVAL; | ||
130 | } | ||
131 | printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); | ||
132 | return -ENODEV; | ||
133 | |||
134 | pm_good: | ||
135 | if (verify_pmtmr_rate() != 0) | ||
136 | return -ENODEV; | ||
137 | |||
138 | init_cpu_khz(); | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static inline u32 cyc2us(u32 cycles) | ||
143 | { | ||
144 | /* The Power Management Timer ticks at 3.579545 ticks per microsecond. | ||
145 | * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] | ||
146 | * | ||
147 | * Even with HZ = 100, delta is at maximum 35796 ticks, so it can | ||
148 | * easily be multiplied with 286 (=0x11E) without having to fear | ||
149 | * u32 overflows. | ||
150 | */ | ||
151 | cycles *= 286; | ||
152 | return (cycles >> 10); | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * this gets called during each timer interrupt | ||
157 | * - Called while holding the writer xtime_lock | ||
158 | */ | ||
159 | static void mark_offset_pmtmr(void) | ||
160 | { | ||
161 | u32 lost, delta, last_offset; | ||
162 | static int first_run = 1; | ||
163 | last_offset = offset_tick; | ||
164 | |||
165 | write_seqlock(&monotonic_lock); | ||
166 | |||
167 | offset_tick = read_pmtmr(); | ||
168 | |||
169 | /* calculate tick interval */ | ||
170 | delta = (offset_tick - last_offset) & ACPI_PM_MASK; | ||
171 | |||
172 | /* convert to usecs */ | ||
173 | delta = cyc2us(delta); | ||
174 | |||
175 | /* update the monotonic base value */ | ||
176 | monotonic_base += delta * NSEC_PER_USEC; | ||
177 | write_sequnlock(&monotonic_lock); | ||
178 | |||
179 | /* convert to ticks */ | ||
180 | delta += offset_delay; | ||
181 | lost = delta / (USEC_PER_SEC / HZ); | ||
182 | offset_delay = delta % (USEC_PER_SEC / HZ); | ||
183 | |||
184 | |||
185 | /* compensate for lost ticks */ | ||
186 | if (lost >= 2) | ||
187 | jiffies_64 += lost - 1; | ||
188 | |||
189 | /* don't calculate delay for first run, | ||
190 | or if we've got less then a tick */ | ||
191 | if (first_run || (lost < 1)) { | ||
192 | first_run = 0; | ||
193 | offset_delay = 0; | ||
194 | } | ||
195 | } | ||
196 | |||
197 | static int pmtmr_resume(void) | ||
198 | { | ||
199 | write_seqlock(&monotonic_lock); | ||
200 | /* Assume this is the last mark offset time */ | ||
201 | offset_tick = read_pmtmr(); | ||
202 | write_sequnlock(&monotonic_lock); | ||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | static unsigned long long monotonic_clock_pmtmr(void) | ||
207 | { | ||
208 | u32 last_offset, this_offset; | ||
209 | unsigned long long base, ret; | ||
210 | unsigned seq; | ||
211 | |||
212 | |||
213 | /* atomically read monotonic base & last_offset */ | ||
214 | do { | ||
215 | seq = read_seqbegin(&monotonic_lock); | ||
216 | last_offset = offset_tick; | ||
217 | base = monotonic_base; | ||
218 | } while (read_seqretry(&monotonic_lock, seq)); | ||
219 | |||
220 | /* Read the pmtmr */ | ||
221 | this_offset = read_pmtmr(); | ||
222 | |||
223 | /* convert to nanoseconds */ | ||
224 | ret = (this_offset - last_offset) & ACPI_PM_MASK; | ||
225 | ret = base + (cyc2us(ret) * NSEC_PER_USEC); | ||
226 | return ret; | ||
227 | } | ||
228 | |||
229 | static void delay_pmtmr(unsigned long loops) | ||
230 | { | ||
231 | unsigned long bclock, now; | ||
232 | |||
233 | rdtscl(bclock); | ||
234 | do | ||
235 | { | ||
236 | rep_nop(); | ||
237 | rdtscl(now); | ||
238 | } while ((now-bclock) < loops); | ||
239 | } | ||
240 | |||
241 | |||
242 | /* | ||
243 | * get the offset (in microseconds) from the last call to mark_offset() | ||
244 | * - Called holding a reader xtime_lock | ||
245 | */ | ||
246 | static unsigned long get_offset_pmtmr(void) | ||
247 | { | ||
248 | u32 now, offset, delta = 0; | ||
249 | |||
250 | offset = offset_tick; | ||
251 | now = read_pmtmr(); | ||
252 | delta = (now - offset)&ACPI_PM_MASK; | ||
253 | |||
254 | return (unsigned long) offset_delay + cyc2us(delta); | ||
255 | } | ||
256 | |||
257 | |||
258 | /* acpi timer_opts struct */ | ||
259 | static struct timer_opts timer_pmtmr = { | ||
260 | .name = "pmtmr", | ||
261 | .mark_offset = mark_offset_pmtmr, | ||
262 | .get_offset = get_offset_pmtmr, | ||
263 | .monotonic_clock = monotonic_clock_pmtmr, | ||
264 | .delay = delay_pmtmr, | ||
265 | .read_timer = read_timer_tsc, | ||
266 | .resume = pmtmr_resume, | ||
267 | }; | ||
268 | |||
269 | struct init_timer_opts __initdata timer_pmtmr_init = { | ||
270 | .init = init_pmtmr, | ||
271 | .opts = &timer_pmtmr, | ||
272 | }; | ||
273 | |||
274 | #ifdef CONFIG_PCI | ||
275 | /* | ||
276 | * PIIX4 Errata: | ||
277 | * | ||
278 | * The power management timer may return improper results when read. | ||
279 | * Although the timer value settles properly after incrementing, | ||
280 | * while incrementing there is a 3 ns window every 69.8 ns where the | ||
281 | * timer value is indeterminate (a 4.2% chance that the data will be | ||
282 | * incorrect when read). As a result, the ACPI free running count up | ||
283 | * timer specification is violated due to erroneous reads. | ||
284 | */ | ||
285 | static int __init pmtmr_bug_check(void) | ||
286 | { | ||
287 | static struct pci_device_id gray_list[] __initdata = { | ||
288 | /* these chipsets may have bug. */ | ||
289 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, | ||
290 | PCI_DEVICE_ID_INTEL_82801DB_0) }, | ||
291 | { }, | ||
292 | }; | ||
293 | struct pci_dev *dev; | ||
294 | int pmtmr_has_bug = 0; | ||
295 | u8 rev; | ||
296 | |||
297 | if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround) | ||
298 | return 0; | ||
299 | |||
300 | dev = pci_get_device(PCI_VENDOR_ID_INTEL, | ||
301 | PCI_DEVICE_ID_INTEL_82371AB_3, NULL); | ||
302 | if (dev) { | ||
303 | pci_read_config_byte(dev, PCI_REVISION_ID, &rev); | ||
304 | /* the bug has been fixed in PIIX4M */ | ||
305 | if (rev < 3) { | ||
306 | printk(KERN_WARNING "* Found PM-Timer Bug on this " | ||
307 | "chipset. Due to workarounds for a bug,\n" | ||
308 | "* this time source is slow. Consider trying " | ||
309 | "other time sources (clock=)\n"); | ||
310 | pmtmr_has_bug = 1; | ||
311 | } | ||
312 | pci_dev_put(dev); | ||
313 | } | ||
314 | |||
315 | if (pci_dev_present(gray_list)) { | ||
316 | printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due" | ||
317 | " to workarounds for a bug,\n" | ||
318 | "* this time source is slow. If you are sure your timer" | ||
319 | " does not have\n" | ||
320 | "* this bug, please use \"pmtmr_good\" to disable the " | ||
321 | "workaround\n"); | ||
322 | pmtmr_has_bug = 1; | ||
323 | } | ||
324 | |||
325 | if (!pmtmr_has_bug) | ||
326 | pmtmr_need_workaround = 0; | ||
327 | |||
328 | return 0; | ||
329 | } | ||
330 | device_initcall(pmtmr_bug_check); | ||
331 | #endif | ||
332 | |||
333 | static int __init pmtr_good_setup(char *__str) | ||
334 | { | ||
335 | pmtmr_need_workaround = 0; | ||
336 | return 1; | ||
337 | } | ||
338 | __setup("pmtmr_good", pmtr_good_setup); | ||
339 | |||
340 | MODULE_LICENSE("GPL"); | ||
341 | MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); | ||
342 | MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); | ||
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c deleted file mode 100644 index f1187ddb0d0f..000000000000 --- a/arch/i386/kernel/timers/timer_tsc.c +++ /dev/null | |||
@@ -1,617 +0,0 @@ | |||
1 | /* | ||
2 | * This code largely moved from arch/i386/kernel/time.c. | ||
3 | * See comments there for proper credits. | ||
4 | * | ||
5 | * 2004-06-25 Jesper Juhl | ||
6 | * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4 | ||
7 | * failing to inline. | ||
8 | */ | ||
9 | |||
10 | #include <linux/spinlock.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/timex.h> | ||
13 | #include <linux/errno.h> | ||
14 | #include <linux/cpufreq.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/jiffies.h> | ||
17 | |||
18 | #include <asm/timer.h> | ||
19 | #include <asm/io.h> | ||
20 | /* processor.h for distable_tsc flag */ | ||
21 | #include <asm/processor.h> | ||
22 | |||
23 | #include "io_ports.h" | ||
24 | #include "mach_timer.h" | ||
25 | |||
26 | #include <asm/hpet.h> | ||
27 | #include <asm/i8253.h> | ||
28 | |||
29 | #ifdef CONFIG_HPET_TIMER | ||
30 | static unsigned long hpet_usec_quotient; | ||
31 | static unsigned long hpet_last; | ||
32 | static struct timer_opts timer_tsc; | ||
33 | #endif | ||
34 | |||
35 | static inline void cpufreq_delayed_get(void); | ||
36 | |||
37 | int tsc_disable __devinitdata = 0; | ||
38 | |||
39 | static int use_tsc; | ||
40 | /* Number of usecs that the last interrupt was delayed */ | ||
41 | static int delay_at_last_interrupt; | ||
42 | |||
43 | static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ | ||
44 | static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ | ||
45 | static unsigned long long monotonic_base; | ||
46 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
47 | |||
48 | /* Avoid compensating for lost ticks before TSCs are synched */ | ||
49 | static int detect_lost_ticks; | ||
50 | static int __init start_lost_tick_compensation(void) | ||
51 | { | ||
52 | detect_lost_ticks = 1; | ||
53 | return 0; | ||
54 | } | ||
55 | late_initcall(start_lost_tick_compensation); | ||
56 | |||
57 | /* convert from cycles(64bits) => nanoseconds (64bits) | ||
58 | * basic equation: | ||
59 | * ns = cycles / (freq / ns_per_sec) | ||
60 | * ns = cycles * (ns_per_sec / freq) | ||
61 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
62 | * ns = cycles * (10^6 / cpu_khz) | ||
63 | * | ||
64 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
65 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
66 | * ns = cycles * cyc2ns_scale / SC | ||
67 | * | ||
68 | * And since SC is a constant power of two, we can convert the div | ||
69 | * into a shift. | ||
70 | * | ||
71 | * We can use khz divisor instead of mhz to keep a better percision, since | ||
72 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
73 | * (mathieu.desnoyers@polymtl.ca) | ||
74 | * | ||
75 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
76 | */ | ||
77 | static unsigned long cyc2ns_scale __read_mostly; | ||
78 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
79 | |||
80 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | ||
81 | { | ||
82 | cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; | ||
83 | } | ||
84 | |||
85 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
86 | { | ||
87 | return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; | ||
88 | } | ||
89 | |||
90 | static int count2; /* counter for mark_offset_tsc() */ | ||
91 | |||
92 | /* Cached *multiplier* to convert TSC counts to microseconds. | ||
93 | * (see the equation below). | ||
94 | * Equal to 2^32 * (1 / (clocks per usec) ). | ||
95 | * Initialized in time_init. | ||
96 | */ | ||
97 | static unsigned long fast_gettimeoffset_quotient; | ||
98 | |||
99 | static unsigned long get_offset_tsc(void) | ||
100 | { | ||
101 | register unsigned long eax, edx; | ||
102 | |||
103 | /* Read the Time Stamp Counter */ | ||
104 | |||
105 | rdtsc(eax,edx); | ||
106 | |||
107 | /* .. relative to previous jiffy (32 bits is enough) */ | ||
108 | eax -= last_tsc_low; /* tsc_low delta */ | ||
109 | |||
110 | /* | ||
111 | * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient | ||
112 | * = (tsc_low delta) * (usecs_per_clock) | ||
113 | * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) | ||
114 | * | ||
115 | * Using a mull instead of a divl saves up to 31 clock cycles | ||
116 | * in the critical path. | ||
117 | */ | ||
118 | |||
119 | __asm__("mull %2" | ||
120 | :"=a" (eax), "=d" (edx) | ||
121 | :"rm" (fast_gettimeoffset_quotient), | ||
122 | "0" (eax)); | ||
123 | |||
124 | /* our adjusted time offset in microseconds */ | ||
125 | return delay_at_last_interrupt + edx; | ||
126 | } | ||
127 | |||
128 | static unsigned long long monotonic_clock_tsc(void) | ||
129 | { | ||
130 | unsigned long long last_offset, this_offset, base; | ||
131 | unsigned seq; | ||
132 | |||
133 | /* atomically read monotonic base & last_offset */ | ||
134 | do { | ||
135 | seq = read_seqbegin(&monotonic_lock); | ||
136 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
137 | base = monotonic_base; | ||
138 | } while (read_seqretry(&monotonic_lock, seq)); | ||
139 | |||
140 | /* Read the Time Stamp Counter */ | ||
141 | rdtscll(this_offset); | ||
142 | |||
143 | /* return the value in ns */ | ||
144 | return base + cycles_2_ns(this_offset - last_offset); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * Scheduler clock - returns current time in nanosec units. | ||
149 | */ | ||
150 | unsigned long long sched_clock(void) | ||
151 | { | ||
152 | unsigned long long this_offset; | ||
153 | |||
154 | /* | ||
155 | * In the NUMA case we dont use the TSC as they are not | ||
156 | * synchronized across all CPUs. | ||
157 | */ | ||
158 | #ifndef CONFIG_NUMA | ||
159 | if (!use_tsc) | ||
160 | #endif | ||
161 | /* no locking but a rare wrong value is not a big deal */ | ||
162 | return jiffies_64 * (1000000000 / HZ); | ||
163 | |||
164 | /* Read the Time Stamp Counter */ | ||
165 | rdtscll(this_offset); | ||
166 | |||
167 | /* return the value in ns */ | ||
168 | return cycles_2_ns(this_offset); | ||
169 | } | ||
170 | |||
171 | static void delay_tsc(unsigned long loops) | ||
172 | { | ||
173 | unsigned long bclock, now; | ||
174 | |||
175 | rdtscl(bclock); | ||
176 | do | ||
177 | { | ||
178 | rep_nop(); | ||
179 | rdtscl(now); | ||
180 | } while ((now-bclock) < loops); | ||
181 | } | ||
182 | |||
183 | #ifdef CONFIG_HPET_TIMER | ||
184 | static void mark_offset_tsc_hpet(void) | ||
185 | { | ||
186 | unsigned long long this_offset, last_offset; | ||
187 | unsigned long offset, temp, hpet_current; | ||
188 | |||
189 | write_seqlock(&monotonic_lock); | ||
190 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
191 | /* | ||
192 | * It is important that these two operations happen almost at | ||
193 | * the same time. We do the RDTSC stuff first, since it's | ||
194 | * faster. To avoid any inconsistencies, we need interrupts | ||
195 | * disabled locally. | ||
196 | */ | ||
197 | /* | ||
198 | * Interrupts are just disabled locally since the timer irq | ||
199 | * has the SA_INTERRUPT flag set. -arca | ||
200 | */ | ||
201 | /* read Pentium cycle counter */ | ||
202 | |||
203 | hpet_current = hpet_readl(HPET_COUNTER); | ||
204 | rdtsc(last_tsc_low, last_tsc_high); | ||
205 | |||
206 | /* lost tick compensation */ | ||
207 | offset = hpet_readl(HPET_T0_CMP) - hpet_tick; | ||
208 | if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0)) | ||
209 | && detect_lost_ticks) { | ||
210 | int lost_ticks = (offset - hpet_last) / hpet_tick; | ||
211 | jiffies_64 += lost_ticks; | ||
212 | } | ||
213 | hpet_last = hpet_current; | ||
214 | |||
215 | /* update the monotonic base value */ | ||
216 | this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
217 | monotonic_base += cycles_2_ns(this_offset - last_offset); | ||
218 | write_sequnlock(&monotonic_lock); | ||
219 | |||
220 | /* calculate delay_at_last_interrupt */ | ||
221 | /* | ||
222 | * Time offset = (hpet delta) * ( usecs per HPET clock ) | ||
223 | * = (hpet delta) * ( usecs per tick / HPET clocks per tick) | ||
224 | * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) | ||
225 | * Where, | ||
226 | * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick | ||
227 | */ | ||
228 | delay_at_last_interrupt = hpet_current - offset; | ||
229 | ASM_MUL64_REG(temp, delay_at_last_interrupt, | ||
230 | hpet_usec_quotient, delay_at_last_interrupt); | ||
231 | } | ||
232 | #endif | ||
233 | |||
234 | |||
235 | #ifdef CONFIG_CPU_FREQ | ||
236 | #include <linux/workqueue.h> | ||
237 | |||
238 | static unsigned int cpufreq_delayed_issched = 0; | ||
239 | static unsigned int cpufreq_init = 0; | ||
240 | static struct work_struct cpufreq_delayed_get_work; | ||
241 | |||
242 | static void handle_cpufreq_delayed_get(void *v) | ||
243 | { | ||
244 | unsigned int cpu; | ||
245 | for_each_online_cpu(cpu) { | ||
246 | cpufreq_get(cpu); | ||
247 | } | ||
248 | cpufreq_delayed_issched = 0; | ||
249 | } | ||
250 | |||
251 | /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries | ||
252 | * to verify the CPU frequency the timing core thinks the CPU is running | ||
253 | * at is still correct. | ||
254 | */ | ||
255 | static inline void cpufreq_delayed_get(void) | ||
256 | { | ||
257 | if (cpufreq_init && !cpufreq_delayed_issched) { | ||
258 | cpufreq_delayed_issched = 1; | ||
259 | printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); | ||
260 | schedule_work(&cpufreq_delayed_get_work); | ||
261 | } | ||
262 | } | ||
263 | |||
264 | /* If the CPU frequency is scaled, TSC-based delays will need a different | ||
265 | * loops_per_jiffy value to function properly. | ||
266 | */ | ||
267 | |||
268 | static unsigned int ref_freq = 0; | ||
269 | static unsigned long loops_per_jiffy_ref = 0; | ||
270 | |||
271 | #ifndef CONFIG_SMP | ||
272 | static unsigned long fast_gettimeoffset_ref = 0; | ||
273 | static unsigned int cpu_khz_ref = 0; | ||
274 | #endif | ||
275 | |||
276 | static int | ||
277 | time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | ||
278 | void *data) | ||
279 | { | ||
280 | struct cpufreq_freqs *freq = data; | ||
281 | |||
282 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
283 | write_seqlock_irq(&xtime_lock); | ||
284 | if (!ref_freq) { | ||
285 | if (!freq->old){ | ||
286 | ref_freq = freq->new; | ||
287 | goto end; | ||
288 | } | ||
289 | ref_freq = freq->old; | ||
290 | loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; | ||
291 | #ifndef CONFIG_SMP | ||
292 | fast_gettimeoffset_ref = fast_gettimeoffset_quotient; | ||
293 | cpu_khz_ref = cpu_khz; | ||
294 | #endif | ||
295 | } | ||
296 | |||
297 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | ||
298 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | ||
299 | (val == CPUFREQ_RESUMECHANGE)) { | ||
300 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
301 | cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | ||
302 | #ifndef CONFIG_SMP | ||
303 | if (cpu_khz) | ||
304 | cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); | ||
305 | if (use_tsc) { | ||
306 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { | ||
307 | fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); | ||
308 | set_cyc2ns_scale(cpu_khz); | ||
309 | } | ||
310 | } | ||
311 | #endif | ||
312 | } | ||
313 | |||
314 | end: | ||
315 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
316 | write_sequnlock_irq(&xtime_lock); | ||
317 | |||
318 | return 0; | ||
319 | } | ||
320 | |||
321 | static struct notifier_block time_cpufreq_notifier_block = { | ||
322 | .notifier_call = time_cpufreq_notifier | ||
323 | }; | ||
324 | |||
325 | |||
326 | static int __init cpufreq_tsc(void) | ||
327 | { | ||
328 | int ret; | ||
329 | INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); | ||
330 | ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
331 | CPUFREQ_TRANSITION_NOTIFIER); | ||
332 | if (!ret) | ||
333 | cpufreq_init = 1; | ||
334 | return ret; | ||
335 | } | ||
336 | core_initcall(cpufreq_tsc); | ||
337 | |||
338 | #else /* CONFIG_CPU_FREQ */ | ||
339 | static inline void cpufreq_delayed_get(void) { return; } | ||
340 | #endif | ||
341 | |||
342 | int recalibrate_cpu_khz(void) | ||
343 | { | ||
344 | #ifndef CONFIG_SMP | ||
345 | unsigned int cpu_khz_old = cpu_khz; | ||
346 | |||
347 | if (cpu_has_tsc) { | ||
348 | local_irq_disable(); | ||
349 | init_cpu_khz(); | ||
350 | local_irq_enable(); | ||
351 | cpu_data[0].loops_per_jiffy = | ||
352 | cpufreq_scale(cpu_data[0].loops_per_jiffy, | ||
353 | cpu_khz_old, | ||
354 | cpu_khz); | ||
355 | return 0; | ||
356 | } else | ||
357 | return -ENODEV; | ||
358 | #else | ||
359 | return -ENODEV; | ||
360 | #endif | ||
361 | } | ||
362 | EXPORT_SYMBOL(recalibrate_cpu_khz); | ||
363 | |||
364 | static void mark_offset_tsc(void) | ||
365 | { | ||
366 | unsigned long lost,delay; | ||
367 | unsigned long delta = last_tsc_low; | ||
368 | int count; | ||
369 | int countmp; | ||
370 | static int count1 = 0; | ||
371 | unsigned long long this_offset, last_offset; | ||
372 | static int lost_count = 0; | ||
373 | |||
374 | write_seqlock(&monotonic_lock); | ||
375 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
376 | /* | ||
377 | * It is important that these two operations happen almost at | ||
378 | * the same time. We do the RDTSC stuff first, since it's | ||
379 | * faster. To avoid any inconsistencies, we need interrupts | ||
380 | * disabled locally. | ||
381 | */ | ||
382 | |||
383 | /* | ||
384 | * Interrupts are just disabled locally since the timer irq | ||
385 | * has the SA_INTERRUPT flag set. -arca | ||
386 | */ | ||
387 | |||
388 | /* read Pentium cycle counter */ | ||
389 | |||
390 | rdtsc(last_tsc_low, last_tsc_high); | ||
391 | |||
392 | spin_lock(&i8253_lock); | ||
393 | outb_p(0x00, PIT_MODE); /* latch the count ASAP */ | ||
394 | |||
395 | count = inb_p(PIT_CH0); /* read the latched count */ | ||
396 | count |= inb(PIT_CH0) << 8; | ||
397 | |||
398 | /* | ||
399 | * VIA686a test code... reset the latch if count > max + 1 | ||
400 | * from timer_pit.c - cjb | ||
401 | */ | ||
402 | if (count > LATCH) { | ||
403 | outb_p(0x34, PIT_MODE); | ||
404 | outb_p(LATCH & 0xff, PIT_CH0); | ||
405 | outb(LATCH >> 8, PIT_CH0); | ||
406 | count = LATCH - 1; | ||
407 | } | ||
408 | |||
409 | spin_unlock(&i8253_lock); | ||
410 | |||
411 | if (pit_latch_buggy) { | ||
412 | /* get center value of last 3 time lutch */ | ||
413 | if ((count2 >= count && count >= count1) | ||
414 | || (count1 >= count && count >= count2)) { | ||
415 | count2 = count1; count1 = count; | ||
416 | } else if ((count1 >= count2 && count2 >= count) | ||
417 | || (count >= count2 && count2 >= count1)) { | ||
418 | countmp = count;count = count2; | ||
419 | count2 = count1;count1 = countmp; | ||
420 | } else { | ||
421 | count2 = count1; count1 = count; count = count1; | ||
422 | } | ||
423 | } | ||
424 | |||
425 | /* lost tick compensation */ | ||
426 | delta = last_tsc_low - delta; | ||
427 | { | ||
428 | register unsigned long eax, edx; | ||
429 | eax = delta; | ||
430 | __asm__("mull %2" | ||
431 | :"=a" (eax), "=d" (edx) | ||
432 | :"rm" (fast_gettimeoffset_quotient), | ||
433 | "0" (eax)); | ||
434 | delta = edx; | ||
435 | } | ||
436 | delta += delay_at_last_interrupt; | ||
437 | lost = delta/(1000000/HZ); | ||
438 | delay = delta%(1000000/HZ); | ||
439 | if (lost >= 2 && detect_lost_ticks) { | ||
440 | jiffies_64 += lost-1; | ||
441 | |||
442 | /* sanity check to ensure we're not always losing ticks */ | ||
443 | if (lost_count++ > 100) { | ||
444 | printk(KERN_WARNING "Losing too many ticks!\n"); | ||
445 | printk(KERN_WARNING "TSC cannot be used as a timesource. \n"); | ||
446 | printk(KERN_WARNING "Possible reasons for this are:\n"); | ||
447 | printk(KERN_WARNING " You're running with Speedstep,\n"); | ||
448 | printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n"); | ||
449 | printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n"); | ||
450 | printk(KERN_WARNING "Falling back to a sane timesource now.\n"); | ||
451 | |||
452 | clock_fallback(); | ||
453 | } | ||
454 | /* ... but give the TSC a fair chance */ | ||
455 | if (lost_count > 25) | ||
456 | cpufreq_delayed_get(); | ||
457 | } else | ||
458 | lost_count = 0; | ||
459 | /* update the monotonic base value */ | ||
460 | this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
461 | monotonic_base += cycles_2_ns(this_offset - last_offset); | ||
462 | write_sequnlock(&monotonic_lock); | ||
463 | |||
464 | /* calculate delay_at_last_interrupt */ | ||
465 | count = ((LATCH-1) - count) * TICK_SIZE; | ||
466 | delay_at_last_interrupt = (count + LATCH/2) / LATCH; | ||
467 | |||
468 | /* catch corner case where tick rollover occured | ||
469 | * between tsc and pit reads (as noted when | ||
470 | * usec delta is > 90% # of usecs/tick) | ||
471 | */ | ||
472 | if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) | ||
473 | jiffies_64++; | ||
474 | } | ||
475 | |||
476 | static int __init init_tsc(char* override) | ||
477 | { | ||
478 | |||
479 | /* check clock override */ | ||
480 | if (override[0] && strncmp(override,"tsc",3)) { | ||
481 | #ifdef CONFIG_HPET_TIMER | ||
482 | if (is_hpet_enabled()) { | ||
483 | printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n"); | ||
484 | } else | ||
485 | #endif | ||
486 | { | ||
487 | return -ENODEV; | ||
488 | } | ||
489 | } | ||
490 | |||
491 | /* | ||
492 | * If we have APM enabled or the CPU clock speed is variable | ||
493 | * (CPU stops clock on HLT or slows clock to save power) | ||
494 | * then the TSC timestamps may diverge by up to 1 jiffy from | ||
495 | * 'real time' but nothing will break. | ||
496 | * The most frequent case is that the CPU is "woken" from a halt | ||
497 | * state by the timer interrupt itself, so we get 0 error. In the | ||
498 | * rare cases where a driver would "wake" the CPU and request a | ||
499 | * timestamp, the maximum error is < 1 jiffy. But timestamps are | ||
500 | * still perfectly ordered. | ||
501 | * Note that the TSC counter will be reset if APM suspends | ||
502 | * to disk; this won't break the kernel, though, 'cuz we're | ||
503 | * smart. See arch/i386/kernel/apm.c. | ||
504 | */ | ||
505 | /* | ||
506 | * Firstly we have to do a CPU check for chips with | ||
507 | * a potentially buggy TSC. At this point we haven't run | ||
508 | * the ident/bugs checks so we must run this hook as it | ||
509 | * may turn off the TSC flag. | ||
510 | * | ||
511 | * NOTE: this doesn't yet handle SMP 486 machines where only | ||
512 | * some CPU's have a TSC. Thats never worked and nobody has | ||
513 | * moaned if you have the only one in the world - you fix it! | ||
514 | */ | ||
515 | |||
516 | count2 = LATCH; /* initialize counter for mark_offset_tsc() */ | ||
517 | |||
518 | if (cpu_has_tsc) { | ||
519 | unsigned long tsc_quotient; | ||
520 | #ifdef CONFIG_HPET_TIMER | ||
521 | if (is_hpet_enabled() && hpet_use_timer) { | ||
522 | unsigned long result, remain; | ||
523 | printk("Using TSC for gettimeofday\n"); | ||
524 | tsc_quotient = calibrate_tsc_hpet(NULL); | ||
525 | timer_tsc.mark_offset = &mark_offset_tsc_hpet; | ||
526 | /* | ||
527 | * Math to calculate hpet to usec multiplier | ||
528 | * Look for the comments at get_offset_tsc_hpet() | ||
529 | */ | ||
530 | ASM_DIV64_REG(result, remain, hpet_tick, | ||
531 | 0, KERNEL_TICK_USEC); | ||
532 | if (remain > (hpet_tick >> 1)) | ||
533 | result++; /* rounding the result */ | ||
534 | |||
535 | hpet_usec_quotient = result; | ||
536 | } else | ||
537 | #endif | ||
538 | { | ||
539 | tsc_quotient = calibrate_tsc(); | ||
540 | } | ||
541 | |||
542 | if (tsc_quotient) { | ||
543 | fast_gettimeoffset_quotient = tsc_quotient; | ||
544 | use_tsc = 1; | ||
545 | /* | ||
546 | * We could be more selective here I suspect | ||
547 | * and just enable this for the next intel chips ? | ||
548 | */ | ||
549 | /* report CPU clock rate in Hz. | ||
550 | * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = | ||
551 | * clock/second. Our precision is about 100 ppm. | ||
552 | */ | ||
553 | { unsigned long eax=0, edx=1000; | ||
554 | __asm__("divl %2" | ||
555 | :"=a" (cpu_khz), "=d" (edx) | ||
556 | :"r" (tsc_quotient), | ||
557 | "0" (eax), "1" (edx)); | ||
558 | printk("Detected %u.%03u MHz processor.\n", | ||
559 | cpu_khz / 1000, cpu_khz % 1000); | ||
560 | } | ||
561 | set_cyc2ns_scale(cpu_khz); | ||
562 | return 0; | ||
563 | } | ||
564 | } | ||
565 | return -ENODEV; | ||
566 | } | ||
567 | |||
568 | static int tsc_resume(void) | ||
569 | { | ||
570 | write_seqlock(&monotonic_lock); | ||
571 | /* Assume this is the last mark offset time */ | ||
572 | rdtsc(last_tsc_low, last_tsc_high); | ||
573 | #ifdef CONFIG_HPET_TIMER | ||
574 | if (is_hpet_enabled() && hpet_use_timer) | ||
575 | hpet_last = hpet_readl(HPET_COUNTER); | ||
576 | #endif | ||
577 | write_sequnlock(&monotonic_lock); | ||
578 | return 0; | ||
579 | } | ||
580 | |||
581 | #ifndef CONFIG_X86_TSC | ||
582 | /* disable flag for tsc. Takes effect by clearing the TSC cpu flag | ||
583 | * in cpu/common.c */ | ||
584 | static int __init tsc_setup(char *str) | ||
585 | { | ||
586 | tsc_disable = 1; | ||
587 | return 1; | ||
588 | } | ||
589 | #else | ||
590 | static int __init tsc_setup(char *str) | ||
591 | { | ||
592 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | ||
593 | "cannot disable TSC.\n"); | ||
594 | return 1; | ||
595 | } | ||
596 | #endif | ||
597 | __setup("notsc", tsc_setup); | ||
598 | |||
599 | |||
600 | |||
601 | /************************************************************/ | ||
602 | |||
603 | /* tsc timer_opts struct */ | ||
604 | static struct timer_opts timer_tsc = { | ||
605 | .name = "tsc", | ||
606 | .mark_offset = mark_offset_tsc, | ||
607 | .get_offset = get_offset_tsc, | ||
608 | .monotonic_clock = monotonic_clock_tsc, | ||
609 | .delay = delay_tsc, | ||
610 | .read_timer = read_timer_tsc, | ||
611 | .resume = tsc_resume, | ||
612 | }; | ||
613 | |||
614 | struct init_timer_opts __initdata timer_tsc_init = { | ||
615 | .init = init_tsc, | ||
616 | .opts = &timer_tsc, | ||
617 | }; | ||
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index dcc14477af1f..78464097470a 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/utsname.h> | 28 | #include <linux/utsname.h> |
29 | #include <linux/kprobes.h> | 29 | #include <linux/kprobes.h> |
30 | #include <linux/kexec.h> | 30 | #include <linux/kexec.h> |
31 | #include <linux/unwind.h> | ||
31 | 32 | ||
32 | #ifdef CONFIG_EISA | 33 | #ifdef CONFIG_EISA |
33 | #include <linux/ioport.h> | 34 | #include <linux/ioport.h> |
@@ -47,7 +48,7 @@ | |||
47 | #include <asm/desc.h> | 48 | #include <asm/desc.h> |
48 | #include <asm/i387.h> | 49 | #include <asm/i387.h> |
49 | #include <asm/nmi.h> | 50 | #include <asm/nmi.h> |
50 | 51 | #include <asm/unwind.h> | |
51 | #include <asm/smp.h> | 52 | #include <asm/smp.h> |
52 | #include <asm/arch_hooks.h> | 53 | #include <asm/arch_hooks.h> |
53 | #include <asm/kdebug.h> | 54 | #include <asm/kdebug.h> |
@@ -92,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void); | |||
92 | asmlinkage void machine_check(void); | 93 | asmlinkage void machine_check(void); |
93 | 94 | ||
94 | static int kstack_depth_to_print = 24; | 95 | static int kstack_depth_to_print = 24; |
96 | static int call_trace = 1; | ||
95 | ATOMIC_NOTIFIER_HEAD(i386die_chain); | 97 | ATOMIC_NOTIFIER_HEAD(i386die_chain); |
96 | 98 | ||
97 | int register_die_notifier(struct notifier_block *nb) | 99 | int register_die_notifier(struct notifier_block *nb) |
@@ -170,7 +172,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, | |||
170 | return ebp; | 172 | return ebp; |
171 | } | 173 | } |
172 | 174 | ||
173 | static void show_trace_log_lvl(struct task_struct *task, | 175 | static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) |
176 | { | ||
177 | int n = 0; | ||
178 | int printed = 0; /* nr of entries already printed on current line */ | ||
179 | |||
180 | while (unwind(info) == 0 && UNW_PC(info)) { | ||
181 | ++n; | ||
182 | printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed); | ||
183 | if (arch_unw_user_mode(info)) | ||
184 | break; | ||
185 | } | ||
186 | if (printed) | ||
187 | printk("\n"); | ||
188 | return n; | ||
189 | } | ||
190 | |||
191 | static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
174 | unsigned long *stack, char *log_lvl) | 192 | unsigned long *stack, char *log_lvl) |
175 | { | 193 | { |
176 | unsigned long ebp; | 194 | unsigned long ebp; |
@@ -178,6 +196,26 @@ static void show_trace_log_lvl(struct task_struct *task, | |||
178 | if (!task) | 196 | if (!task) |
179 | task = current; | 197 | task = current; |
180 | 198 | ||
199 | if (call_trace >= 0) { | ||
200 | int unw_ret = 0; | ||
201 | struct unwind_frame_info info; | ||
202 | |||
203 | if (regs) { | ||
204 | if (unwind_init_frame_info(&info, task, regs) == 0) | ||
205 | unw_ret = show_trace_unwind(&info, log_lvl); | ||
206 | } else if (task == current) | ||
207 | unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl); | ||
208 | else { | ||
209 | if (unwind_init_blocked(&info, task) == 0) | ||
210 | unw_ret = show_trace_unwind(&info, log_lvl); | ||
211 | } | ||
212 | if (unw_ret > 0) { | ||
213 | if (call_trace > 0) | ||
214 | return; | ||
215 | printk("%sLegacy call trace:\n", log_lvl); | ||
216 | } | ||
217 | } | ||
218 | |||
181 | if (task == current) { | 219 | if (task == current) { |
182 | /* Grab ebp right from our regs */ | 220 | /* Grab ebp right from our regs */ |
183 | asm ("movl %%ebp, %0" : "=r" (ebp) : ); | 221 | asm ("movl %%ebp, %0" : "=r" (ebp) : ); |
@@ -198,13 +236,13 @@ static void show_trace_log_lvl(struct task_struct *task, | |||
198 | } | 236 | } |
199 | } | 237 | } |
200 | 238 | ||
201 | void show_trace(struct task_struct *task, unsigned long * stack) | 239 | void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack) |
202 | { | 240 | { |
203 | show_trace_log_lvl(task, stack, ""); | 241 | show_trace_log_lvl(task, regs, stack, ""); |
204 | } | 242 | } |
205 | 243 | ||
206 | static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, | 244 | static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
207 | char *log_lvl) | 245 | unsigned long *esp, char *log_lvl) |
208 | { | 246 | { |
209 | unsigned long *stack; | 247 | unsigned long *stack; |
210 | int i; | 248 | int i; |
@@ -225,13 +263,13 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, | |||
225 | printk("%08lx ", *stack++); | 263 | printk("%08lx ", *stack++); |
226 | } | 264 | } |
227 | printk("\n%sCall Trace:\n", log_lvl); | 265 | printk("\n%sCall Trace:\n", log_lvl); |
228 | show_trace_log_lvl(task, esp, log_lvl); | 266 | show_trace_log_lvl(task, regs, esp, log_lvl); |
229 | } | 267 | } |
230 | 268 | ||
231 | void show_stack(struct task_struct *task, unsigned long *esp) | 269 | void show_stack(struct task_struct *task, unsigned long *esp) |
232 | { | 270 | { |
233 | printk(" "); | 271 | printk(" "); |
234 | show_stack_log_lvl(task, esp, ""); | 272 | show_stack_log_lvl(task, NULL, esp, ""); |
235 | } | 273 | } |
236 | 274 | ||
237 | /* | 275 | /* |
@@ -241,7 +279,7 @@ void dump_stack(void) | |||
241 | { | 279 | { |
242 | unsigned long stack; | 280 | unsigned long stack; |
243 | 281 | ||
244 | show_trace(current, &stack); | 282 | show_trace(current, NULL, &stack); |
245 | } | 283 | } |
246 | 284 | ||
247 | EXPORT_SYMBOL(dump_stack); | 285 | EXPORT_SYMBOL(dump_stack); |
@@ -285,7 +323,7 @@ void show_registers(struct pt_regs *regs) | |||
285 | u8 __user *eip; | 323 | u8 __user *eip; |
286 | 324 | ||
287 | printk("\n" KERN_EMERG "Stack: "); | 325 | printk("\n" KERN_EMERG "Stack: "); |
288 | show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG); | 326 | show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); |
289 | 327 | ||
290 | printk(KERN_EMERG "Code: "); | 328 | printk(KERN_EMERG "Code: "); |
291 | 329 | ||
@@ -1215,3 +1253,15 @@ static int __init kstack_setup(char *s) | |||
1215 | return 1; | 1253 | return 1; |
1216 | } | 1254 | } |
1217 | __setup("kstack=", kstack_setup); | 1255 | __setup("kstack=", kstack_setup); |
1256 | |||
1257 | static int __init call_trace_setup(char *s) | ||
1258 | { | ||
1259 | if (strcmp(s, "old") == 0) | ||
1260 | call_trace = -1; | ||
1261 | else if (strcmp(s, "both") == 0) | ||
1262 | call_trace = 0; | ||
1263 | else if (strcmp(s, "new") == 0) | ||
1264 | call_trace = 1; | ||
1265 | return 1; | ||
1266 | } | ||
1267 | __setup("call_trace=", call_trace_setup); | ||
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c new file mode 100644 index 000000000000..7e0d8dab2075 --- /dev/null +++ b/arch/i386/kernel/tsc.c | |||
@@ -0,0 +1,478 @@ | |||
1 | /* | ||
2 | * This code largely moved from arch/i386/kernel/timer/timer_tsc.c | ||
3 | * which was originally moved from arch/i386/kernel/time.c. | ||
4 | * See comments there for proper credits. | ||
5 | */ | ||
6 | |||
7 | #include <linux/clocksource.h> | ||
8 | #include <linux/workqueue.h> | ||
9 | #include <linux/cpufreq.h> | ||
10 | #include <linux/jiffies.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/dmi.h> | ||
13 | |||
14 | #include <asm/delay.h> | ||
15 | #include <asm/tsc.h> | ||
16 | #include <asm/delay.h> | ||
17 | #include <asm/io.h> | ||
18 | |||
19 | #include "mach_timer.h" | ||
20 | |||
21 | /* | ||
22 | * On some systems the TSC frequency does not | ||
23 | * change with the cpu frequency. So we need | ||
24 | * an extra value to store the TSC freq | ||
25 | */ | ||
26 | unsigned int tsc_khz; | ||
27 | |||
28 | int tsc_disable __cpuinitdata = 0; | ||
29 | |||
30 | #ifdef CONFIG_X86_TSC | ||
31 | static int __init tsc_setup(char *str) | ||
32 | { | ||
33 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | ||
34 | "cannot disable TSC.\n"); | ||
35 | return 1; | ||
36 | } | ||
37 | #else | ||
38 | /* | ||
39 | * disable flag for tsc. Takes effect by clearing the TSC cpu flag | ||
40 | * in cpu/common.c | ||
41 | */ | ||
42 | static int __init tsc_setup(char *str) | ||
43 | { | ||
44 | tsc_disable = 1; | ||
45 | |||
46 | return 1; | ||
47 | } | ||
48 | #endif | ||
49 | |||
50 | __setup("notsc", tsc_setup); | ||
51 | |||
52 | /* | ||
53 | * code to mark and check if the TSC is unstable | ||
54 | * due to cpufreq or due to unsynced TSCs | ||
55 | */ | ||
56 | static int tsc_unstable; | ||
57 | |||
58 | static inline int check_tsc_unstable(void) | ||
59 | { | ||
60 | return tsc_unstable; | ||
61 | } | ||
62 | |||
63 | void mark_tsc_unstable(void) | ||
64 | { | ||
65 | tsc_unstable = 1; | ||
66 | } | ||
67 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
68 | |||
69 | /* Accellerators for sched_clock() | ||
70 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
71 | * basic equation: | ||
72 | * ns = cycles / (freq / ns_per_sec) | ||
73 | * ns = cycles * (ns_per_sec / freq) | ||
74 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
75 | * ns = cycles * (10^6 / cpu_khz) | ||
76 | * | ||
77 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
78 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
79 | * ns = cycles * cyc2ns_scale / SC | ||
80 | * | ||
81 | * And since SC is a constant power of two, we can convert the div | ||
82 | * into a shift. | ||
83 | * | ||
84 | * We can use khz divisor instead of mhz to keep a better percision, since | ||
85 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
86 | * (mathieu.desnoyers@polymtl.ca) | ||
87 | * | ||
88 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
89 | */ | ||
90 | static unsigned long cyc2ns_scale __read_mostly; | ||
91 | |||
92 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
93 | |||
94 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) | ||
95 | { | ||
96 | cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; | ||
97 | } | ||
98 | |||
99 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
100 | { | ||
101 | return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * Scheduler clock - returns current time in nanosec units. | ||
106 | */ | ||
107 | unsigned long long sched_clock(void) | ||
108 | { | ||
109 | unsigned long long this_offset; | ||
110 | |||
111 | /* | ||
112 | * in the NUMA case we dont use the TSC as they are not | ||
113 | * synchronized across all CPUs. | ||
114 | */ | ||
115 | #ifndef CONFIG_NUMA | ||
116 | if (!cpu_khz || check_tsc_unstable()) | ||
117 | #endif | ||
118 | /* no locking but a rare wrong value is not a big deal */ | ||
119 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | ||
120 | |||
121 | /* read the Time Stamp Counter: */ | ||
122 | rdtscll(this_offset); | ||
123 | |||
124 | /* return the value in ns */ | ||
125 | return cycles_2_ns(this_offset); | ||
126 | } | ||
127 | |||
128 | static unsigned long calculate_cpu_khz(void) | ||
129 | { | ||
130 | unsigned long long start, end; | ||
131 | unsigned long count; | ||
132 | u64 delta64; | ||
133 | int i; | ||
134 | unsigned long flags; | ||
135 | |||
136 | local_irq_save(flags); | ||
137 | |||
138 | /* run 3 times to ensure the cache is warm */ | ||
139 | for (i = 0; i < 3; i++) { | ||
140 | mach_prepare_counter(); | ||
141 | rdtscll(start); | ||
142 | mach_countup(&count); | ||
143 | rdtscll(end); | ||
144 | } | ||
145 | /* | ||
146 | * Error: ECTCNEVERSET | ||
147 | * The CTC wasn't reliable: we got a hit on the very first read, | ||
148 | * or the CPU was so fast/slow that the quotient wouldn't fit in | ||
149 | * 32 bits.. | ||
150 | */ | ||
151 | if (count <= 1) | ||
152 | goto err; | ||
153 | |||
154 | delta64 = end - start; | ||
155 | |||
156 | /* cpu freq too fast: */ | ||
157 | if (delta64 > (1ULL<<32)) | ||
158 | goto err; | ||
159 | |||
160 | /* cpu freq too slow: */ | ||
161 | if (delta64 <= CALIBRATE_TIME_MSEC) | ||
162 | goto err; | ||
163 | |||
164 | delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */ | ||
165 | do_div(delta64,CALIBRATE_TIME_MSEC); | ||
166 | |||
167 | local_irq_restore(flags); | ||
168 | return (unsigned long)delta64; | ||
169 | err: | ||
170 | local_irq_restore(flags); | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | int recalibrate_cpu_khz(void) | ||
175 | { | ||
176 | #ifndef CONFIG_SMP | ||
177 | unsigned long cpu_khz_old = cpu_khz; | ||
178 | |||
179 | if (cpu_has_tsc) { | ||
180 | cpu_khz = calculate_cpu_khz(); | ||
181 | tsc_khz = cpu_khz; | ||
182 | cpu_data[0].loops_per_jiffy = | ||
183 | cpufreq_scale(cpu_data[0].loops_per_jiffy, | ||
184 | cpu_khz_old, cpu_khz); | ||
185 | return 0; | ||
186 | } else | ||
187 | return -ENODEV; | ||
188 | #else | ||
189 | return -ENODEV; | ||
190 | #endif | ||
191 | } | ||
192 | |||
193 | EXPORT_SYMBOL(recalibrate_cpu_khz); | ||
194 | |||
195 | void tsc_init(void) | ||
196 | { | ||
197 | if (!cpu_has_tsc || tsc_disable) | ||
198 | return; | ||
199 | |||
200 | cpu_khz = calculate_cpu_khz(); | ||
201 | tsc_khz = cpu_khz; | ||
202 | |||
203 | if (!cpu_khz) | ||
204 | return; | ||
205 | |||
206 | printk("Detected %lu.%03lu MHz processor.\n", | ||
207 | (unsigned long)cpu_khz / 1000, | ||
208 | (unsigned long)cpu_khz % 1000); | ||
209 | |||
210 | set_cyc2ns_scale(cpu_khz); | ||
211 | use_tsc_delay(); | ||
212 | } | ||
213 | |||
214 | #ifdef CONFIG_CPU_FREQ | ||
215 | |||
216 | static unsigned int cpufreq_delayed_issched = 0; | ||
217 | static unsigned int cpufreq_init = 0; | ||
218 | static struct work_struct cpufreq_delayed_get_work; | ||
219 | |||
220 | static void handle_cpufreq_delayed_get(void *v) | ||
221 | { | ||
222 | unsigned int cpu; | ||
223 | |||
224 | for_each_online_cpu(cpu) | ||
225 | cpufreq_get(cpu); | ||
226 | |||
227 | cpufreq_delayed_issched = 0; | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries | ||
232 | * to verify the CPU frequency the timing core thinks the CPU is running | ||
233 | * at is still correct. | ||
234 | */ | ||
235 | static inline void cpufreq_delayed_get(void) | ||
236 | { | ||
237 | if (cpufreq_init && !cpufreq_delayed_issched) { | ||
238 | cpufreq_delayed_issched = 1; | ||
239 | printk(KERN_DEBUG "Checking if CPU frequency changed.\n"); | ||
240 | schedule_work(&cpufreq_delayed_get_work); | ||
241 | } | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * if the CPU frequency is scaled, TSC-based delays will need a different | ||
246 | * loops_per_jiffy value to function properly. | ||
247 | */ | ||
248 | static unsigned int ref_freq = 0; | ||
249 | static unsigned long loops_per_jiffy_ref = 0; | ||
250 | static unsigned long cpu_khz_ref = 0; | ||
251 | |||
252 | static int | ||
253 | time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) | ||
254 | { | ||
255 | struct cpufreq_freqs *freq = data; | ||
256 | |||
257 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
258 | write_seqlock_irq(&xtime_lock); | ||
259 | |||
260 | if (!ref_freq) { | ||
261 | if (!freq->old){ | ||
262 | ref_freq = freq->new; | ||
263 | goto end; | ||
264 | } | ||
265 | ref_freq = freq->old; | ||
266 | loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; | ||
267 | cpu_khz_ref = cpu_khz; | ||
268 | } | ||
269 | |||
270 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | ||
271 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | ||
272 | (val == CPUFREQ_RESUMECHANGE)) { | ||
273 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
274 | cpu_data[freq->cpu].loops_per_jiffy = | ||
275 | cpufreq_scale(loops_per_jiffy_ref, | ||
276 | ref_freq, freq->new); | ||
277 | |||
278 | if (cpu_khz) { | ||
279 | |||
280 | if (num_online_cpus() == 1) | ||
281 | cpu_khz = cpufreq_scale(cpu_khz_ref, | ||
282 | ref_freq, freq->new); | ||
283 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { | ||
284 | tsc_khz = cpu_khz; | ||
285 | set_cyc2ns_scale(cpu_khz); | ||
286 | /* | ||
287 | * TSC based sched_clock turns | ||
288 | * to junk w/ cpufreq | ||
289 | */ | ||
290 | mark_tsc_unstable(); | ||
291 | } | ||
292 | } | ||
293 | } | ||
294 | end: | ||
295 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
296 | write_sequnlock_irq(&xtime_lock); | ||
297 | |||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | static struct notifier_block time_cpufreq_notifier_block = { | ||
302 | .notifier_call = time_cpufreq_notifier | ||
303 | }; | ||
304 | |||
305 | static int __init cpufreq_tsc(void) | ||
306 | { | ||
307 | int ret; | ||
308 | |||
309 | INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); | ||
310 | ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
311 | CPUFREQ_TRANSITION_NOTIFIER); | ||
312 | if (!ret) | ||
313 | cpufreq_init = 1; | ||
314 | |||
315 | return ret; | ||
316 | } | ||
317 | |||
318 | core_initcall(cpufreq_tsc); | ||
319 | |||
320 | #endif | ||
321 | |||
322 | /* clock source code */ | ||
323 | |||
324 | static unsigned long current_tsc_khz = 0; | ||
325 | static int tsc_update_callback(void); | ||
326 | |||
327 | static cycle_t read_tsc(void) | ||
328 | { | ||
329 | cycle_t ret; | ||
330 | |||
331 | rdtscll(ret); | ||
332 | |||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | static struct clocksource clocksource_tsc = { | ||
337 | .name = "tsc", | ||
338 | .rating = 300, | ||
339 | .read = read_tsc, | ||
340 | .mask = CLOCKSOURCE_MASK(64), | ||
341 | .mult = 0, /* to be set */ | ||
342 | .shift = 22, | ||
343 | .update_callback = tsc_update_callback, | ||
344 | .is_continuous = 1, | ||
345 | }; | ||
346 | |||
347 | static int tsc_update_callback(void) | ||
348 | { | ||
349 | int change = 0; | ||
350 | |||
351 | /* check to see if we should switch to the safe clocksource: */ | ||
352 | if (clocksource_tsc.rating != 50 && check_tsc_unstable()) { | ||
353 | clocksource_tsc.rating = 50; | ||
354 | clocksource_reselect(); | ||
355 | change = 1; | ||
356 | } | ||
357 | |||
358 | /* only update if tsc_khz has changed: */ | ||
359 | if (current_tsc_khz != tsc_khz) { | ||
360 | current_tsc_khz = tsc_khz; | ||
361 | clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, | ||
362 | clocksource_tsc.shift); | ||
363 | change = 1; | ||
364 | } | ||
365 | |||
366 | return change; | ||
367 | } | ||
368 | |||
369 | static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d) | ||
370 | { | ||
371 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", | ||
372 | d->ident); | ||
373 | mark_tsc_unstable(); | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | /* List of systems that have known TSC problems */ | ||
378 | static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | ||
379 | { | ||
380 | .callback = dmi_mark_tsc_unstable, | ||
381 | .ident = "IBM Thinkpad 380XD", | ||
382 | .matches = { | ||
383 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
384 | DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), | ||
385 | }, | ||
386 | }, | ||
387 | {} | ||
388 | }; | ||
389 | |||
390 | #define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */ | ||
391 | static struct timer_list verify_tsc_freq_timer; | ||
392 | |||
393 | /* XXX - Probably should add locking */ | ||
394 | static void verify_tsc_freq(unsigned long unused) | ||
395 | { | ||
396 | static u64 last_tsc; | ||
397 | static unsigned long last_jiffies; | ||
398 | |||
399 | u64 now_tsc, interval_tsc; | ||
400 | unsigned long now_jiffies, interval_jiffies; | ||
401 | |||
402 | |||
403 | if (check_tsc_unstable()) | ||
404 | return; | ||
405 | |||
406 | rdtscll(now_tsc); | ||
407 | now_jiffies = jiffies; | ||
408 | |||
409 | if (!last_jiffies) { | ||
410 | goto out; | ||
411 | } | ||
412 | |||
413 | interval_jiffies = now_jiffies - last_jiffies; | ||
414 | interval_tsc = now_tsc - last_tsc; | ||
415 | interval_tsc *= HZ; | ||
416 | do_div(interval_tsc, cpu_khz*1000); | ||
417 | |||
418 | if (interval_tsc < (interval_jiffies * 3 / 4)) { | ||
419 | printk("TSC appears to be running slowly. " | ||
420 | "Marking it as unstable\n"); | ||
421 | mark_tsc_unstable(); | ||
422 | return; | ||
423 | } | ||
424 | |||
425 | out: | ||
426 | last_tsc = now_tsc; | ||
427 | last_jiffies = now_jiffies; | ||
428 | /* set us up to go off on the next interval: */ | ||
429 | mod_timer(&verify_tsc_freq_timer, | ||
430 | jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL)); | ||
431 | } | ||
432 | |||
433 | /* | ||
434 | * Make an educated guess if the TSC is trustworthy and synchronized | ||
435 | * over all CPUs. | ||
436 | */ | ||
437 | static __init int unsynchronized_tsc(void) | ||
438 | { | ||
439 | /* | ||
440 | * Intel systems are normally all synchronized. | ||
441 | * Exceptions must mark TSC as unstable: | ||
442 | */ | ||
443 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
444 | return 0; | ||
445 | |||
446 | /* assume multi socket systems are not synchronized: */ | ||
447 | return num_possible_cpus() > 1; | ||
448 | } | ||
449 | |||
450 | static int __init init_tsc_clocksource(void) | ||
451 | { | ||
452 | |||
453 | if (cpu_has_tsc && tsc_khz && !tsc_disable) { | ||
454 | /* check blacklist */ | ||
455 | dmi_check_system(bad_tsc_dmi_table); | ||
456 | |||
457 | if (unsynchronized_tsc()) /* mark unstable if unsynced */ | ||
458 | mark_tsc_unstable(); | ||
459 | current_tsc_khz = tsc_khz; | ||
460 | clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, | ||
461 | clocksource_tsc.shift); | ||
462 | /* lower the rating if we already know its unstable: */ | ||
463 | if (check_tsc_unstable()) | ||
464 | clocksource_tsc.rating = 50; | ||
465 | |||
466 | init_timer(&verify_tsc_freq_timer); | ||
467 | verify_tsc_freq_timer.function = verify_tsc_freq; | ||
468 | verify_tsc_freq_timer.expires = | ||
469 | jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL); | ||
470 | add_timer(&verify_tsc_freq_timer); | ||
471 | |||
472 | return clocksource_register(&clocksource_tsc); | ||
473 | } | ||
474 | |||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | module_init(init_tsc_clocksource); | ||
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 7512f39c9f25..2d4f1386e2b1 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S | |||
@@ -71,6 +71,15 @@ SECTIONS | |||
71 | .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } | 71 | .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } |
72 | _edata = .; /* End of data section */ | 72 | _edata = .; /* End of data section */ |
73 | 73 | ||
74 | #ifdef CONFIG_STACK_UNWIND | ||
75 | . = ALIGN(4); | ||
76 | .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { | ||
77 | __start_unwind = .; | ||
78 | *(.eh_frame) | ||
79 | __end_unwind = .; | ||
80 | } | ||
81 | #endif | ||
82 | |||
74 | . = ALIGN(THREAD_SIZE); /* init_task */ | 83 | . = ALIGN(THREAD_SIZE); /* init_task */ |
75 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { | 84 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { |
76 | *(.data.init_task) | 85 | *(.data.init_task) |
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c index c49a6acbee56..3c0714c4b669 100644 --- a/arch/i386/lib/delay.c +++ b/arch/i386/lib/delay.c | |||
@@ -10,43 +10,92 @@ | |||
10 | * we have to worry about. | 10 | * we have to worry about. |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/module.h> | ||
13 | #include <linux/config.h> | 14 | #include <linux/config.h> |
14 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
15 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
16 | #include <linux/module.h> | 17 | |
17 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
18 | #include <asm/delay.h> | 19 | #include <asm/delay.h> |
19 | #include <asm/timer.h> | 20 | #include <asm/timer.h> |
20 | 21 | ||
21 | #ifdef CONFIG_SMP | 22 | #ifdef CONFIG_SMP |
22 | #include <asm/smp.h> | 23 | # include <asm/smp.h> |
23 | #endif | 24 | #endif |
24 | 25 | ||
25 | extern struct timer_opts* timer; | 26 | /* simple loop based delay: */ |
27 | static void delay_loop(unsigned long loops) | ||
28 | { | ||
29 | int d0; | ||
30 | |||
31 | __asm__ __volatile__( | ||
32 | "\tjmp 1f\n" | ||
33 | ".align 16\n" | ||
34 | "1:\tjmp 2f\n" | ||
35 | ".align 16\n" | ||
36 | "2:\tdecl %0\n\tjns 2b" | ||
37 | :"=&a" (d0) | ||
38 | :"0" (loops)); | ||
39 | } | ||
40 | |||
41 | /* TSC based delay: */ | ||
42 | static void delay_tsc(unsigned long loops) | ||
43 | { | ||
44 | unsigned long bclock, now; | ||
45 | |||
46 | rdtscl(bclock); | ||
47 | do { | ||
48 | rep_nop(); | ||
49 | rdtscl(now); | ||
50 | } while ((now-bclock) < loops); | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Since we calibrate only once at boot, this | ||
55 | * function should be set once at boot and not changed | ||
56 | */ | ||
57 | static void (*delay_fn)(unsigned long) = delay_loop; | ||
58 | |||
59 | void use_tsc_delay(void) | ||
60 | { | ||
61 | delay_fn = delay_tsc; | ||
62 | } | ||
63 | |||
64 | int read_current_timer(unsigned long *timer_val) | ||
65 | { | ||
66 | if (delay_fn == delay_tsc) { | ||
67 | rdtscl(*timer_val); | ||
68 | return 0; | ||
69 | } | ||
70 | return -1; | ||
71 | } | ||
26 | 72 | ||
27 | void __delay(unsigned long loops) | 73 | void __delay(unsigned long loops) |
28 | { | 74 | { |
29 | cur_timer->delay(loops); | 75 | delay_fn(loops); |
30 | } | 76 | } |
31 | 77 | ||
32 | inline void __const_udelay(unsigned long xloops) | 78 | inline void __const_udelay(unsigned long xloops) |
33 | { | 79 | { |
34 | int d0; | 80 | int d0; |
81 | |||
35 | xloops *= 4; | 82 | xloops *= 4; |
36 | __asm__("mull %0" | 83 | __asm__("mull %0" |
37 | :"=d" (xloops), "=&a" (d0) | 84 | :"=d" (xloops), "=&a" (d0) |
38 | :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); | 85 | :"1" (xloops), "0" |
39 | __delay(++xloops); | 86 | (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); |
87 | |||
88 | __delay(++xloops); | ||
40 | } | 89 | } |
41 | 90 | ||
42 | void __udelay(unsigned long usecs) | 91 | void __udelay(unsigned long usecs) |
43 | { | 92 | { |
44 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ | 93 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ |
45 | } | 94 | } |
46 | 95 | ||
47 | void __ndelay(unsigned long nsecs) | 96 | void __ndelay(unsigned long nsecs) |
48 | { | 97 | { |
49 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | 98 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ |
50 | } | 99 | } |
51 | 100 | ||
52 | EXPORT_SYMBOL(__delay); | 101 | EXPORT_SYMBOL(__delay); |
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index bd6fe96cc16d..6ee7faaf2c1b 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c | |||
@@ -30,6 +30,40 @@ | |||
30 | 30 | ||
31 | extern void die(const char *,struct pt_regs *,long); | 31 | extern void die(const char *,struct pt_regs *,long); |
32 | 32 | ||
33 | #ifdef CONFIG_KPROBES | ||
34 | ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); | ||
35 | int register_page_fault_notifier(struct notifier_block *nb) | ||
36 | { | ||
37 | vmalloc_sync_all(); | ||
38 | return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); | ||
39 | } | ||
40 | |||
41 | int unregister_page_fault_notifier(struct notifier_block *nb) | ||
42 | { | ||
43 | return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); | ||
44 | } | ||
45 | |||
46 | static inline int notify_page_fault(enum die_val val, const char *str, | ||
47 | struct pt_regs *regs, long err, int trap, int sig) | ||
48 | { | ||
49 | struct die_args args = { | ||
50 | .regs = regs, | ||
51 | .str = str, | ||
52 | .err = err, | ||
53 | .trapnr = trap, | ||
54 | .signr = sig | ||
55 | }; | ||
56 | return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); | ||
57 | } | ||
58 | #else | ||
59 | static inline int notify_page_fault(enum die_val val, const char *str, | ||
60 | struct pt_regs *regs, long err, int trap, int sig) | ||
61 | { | ||
62 | return NOTIFY_DONE; | ||
63 | } | ||
64 | #endif | ||
65 | |||
66 | |||
33 | /* | 67 | /* |
34 | * Unlock any spinlocks which will prevent us from getting the | 68 | * Unlock any spinlocks which will prevent us from getting the |
35 | * message out | 69 | * message out |
@@ -324,7 +358,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, | |||
324 | if (unlikely(address >= TASK_SIZE)) { | 358 | if (unlikely(address >= TASK_SIZE)) { |
325 | if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) | 359 | if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) |
326 | return; | 360 | return; |
327 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | 361 | if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, |
328 | SIGSEGV) == NOTIFY_STOP) | 362 | SIGSEGV) == NOTIFY_STOP) |
329 | return; | 363 | return; |
330 | /* | 364 | /* |
@@ -334,7 +368,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, | |||
334 | goto bad_area_nosemaphore; | 368 | goto bad_area_nosemaphore; |
335 | } | 369 | } |
336 | 370 | ||
337 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | 371 | if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, |
338 | SIGSEGV) == NOTIFY_STOP) | 372 | SIGSEGV) == NOTIFY_STOP) |
339 | return; | 373 | return; |
340 | 374 | ||
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index ec0fd3cfa774..fa8a37bcb391 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c | |||
@@ -281,9 +281,9 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root) | |||
281 | 281 | ||
282 | for (i = 0; i < model->num_counters; ++i) { | 282 | for (i = 0; i < model->num_counters; ++i) { |
283 | struct dentry * dir; | 283 | struct dentry * dir; |
284 | char buf[2]; | 284 | char buf[4]; |
285 | 285 | ||
286 | snprintf(buf, 2, "%d", i); | 286 | snprintf(buf, sizeof(buf), "%d", i); |
287 | dir = oprofilefs_mkdir(sb, root, buf); | 287 | dir = oprofilefs_mkdir(sb, root, buf); |
288 | oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); | 288 | oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); |
289 | oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); | 289 | oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); |
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c index 3ad9a72a5036..693bdea4a52b 100644 --- a/arch/i386/oprofile/op_model_athlon.c +++ b/arch/i386/oprofile/op_model_athlon.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/oprofile.h> | 13 | #include <linux/oprofile.h> |
14 | #include <asm/ptrace.h> | 14 | #include <asm/ptrace.h> |
15 | #include <asm/msr.h> | 15 | #include <asm/msr.h> |
16 | #include <asm/nmi.h> | ||
16 | 17 | ||
17 | #include "op_x86_model.h" | 18 | #include "op_x86_model.h" |
18 | #include "op_counter.h" | 19 | #include "op_counter.h" |
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c index ac8a066035c2..7c61d357b82b 100644 --- a/arch/i386/oprofile/op_model_p4.c +++ b/arch/i386/oprofile/op_model_p4.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/ptrace.h> | 14 | #include <asm/ptrace.h> |
15 | #include <asm/fixmap.h> | 15 | #include <asm/fixmap.h> |
16 | #include <asm/apic.h> | 16 | #include <asm/apic.h> |
17 | #include <asm/nmi.h> | ||
17 | 18 | ||
18 | #include "op_x86_model.h" | 19 | #include "op_x86_model.h" |
19 | #include "op_counter.h" | 20 | #include "op_counter.h" |
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c index d719015fc044..5c3ab4b027ad 100644 --- a/arch/i386/oprofile/op_model_ppro.c +++ b/arch/i386/oprofile/op_model_ppro.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/ptrace.h> | 14 | #include <asm/ptrace.h> |
15 | #include <asm/msr.h> | 15 | #include <asm/msr.h> |
16 | #include <asm/apic.h> | 16 | #include <asm/apic.h> |
17 | #include <asm/nmi.h> | ||
17 | 18 | ||
18 | #include "op_x86_model.h" | 19 | #include "op_x86_model.h" |
19 | #include "op_counter.h" | 20 | #include "op_counter.h" |
diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c index 1eec0868f4b3..ed1512a175ab 100644 --- a/arch/i386/pci/pcbios.c +++ b/arch/i386/pci/pcbios.c | |||
@@ -371,8 +371,7 @@ void __devinit pcibios_sort(void) | |||
371 | list_for_each(ln, &pci_devices) { | 371 | list_for_each(ln, &pci_devices) { |
372 | d = pci_dev_g(ln); | 372 | d = pci_dev_g(ln); |
373 | if (d->bus->number == bus && d->devfn == devfn) { | 373 | if (d->bus->number == bus && d->devfn == devfn) { |
374 | list_del(&d->global_list); | 374 | list_move_tail(&d->global_list, &sorted_devices); |
375 | list_add_tail(&d->global_list, &sorted_devices); | ||
376 | if (d == dev) | 375 | if (d == dev) |
377 | found = 1; | 376 | found = 1; |
378 | break; | 377 | break; |
@@ -390,8 +389,7 @@ void __devinit pcibios_sort(void) | |||
390 | if (!found) { | 389 | if (!found) { |
391 | printk(KERN_WARNING "PCI: Device %s not found by BIOS\n", | 390 | printk(KERN_WARNING "PCI: Device %s not found by BIOS\n", |
392 | pci_name(dev)); | 391 | pci_name(dev)); |
393 | list_del(&dev->global_list); | 392 | list_move_tail(&dev->global_list, &sorted_devices); |
394 | list_add_tail(&dev->global_list, &sorted_devices); | ||
395 | } | 393 | } |
396 | } | 394 | } |
397 | list_splice(&sorted_devices, &pci_devices); | 395 | list_splice(&sorted_devices, &pci_devices); |